Huge merge correcting upstream master

This commit is contained in:
Alan Saul 2014-11-21 16:49:33 +00:00
commit 34932f8746
319 changed files with 26201 additions and 26660 deletions

2
.gitignore vendored
View file

@ -45,4 +45,4 @@ iterate.dat
# git merge files # # git merge files #
################### ###################
*.orig *.orig

View file

@ -2,14 +2,14 @@ language: python
python: python:
- "2.7" - "2.7"
#Set virtual env with system-site-packages to true
virtualenv:
system_site_packages: true
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
before_install: before_install:
- sudo apt-get install -qq python-scipy python-pip #Install a mini version of anaconda such that we can easily install our dependencies
- sudo apt-get install -qq python-matplotlib - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh
- ./miniconda.sh -b
- export PATH=/home/travis/miniconda/bin:$PATH
- conda update --yes conda
# Workaround for a permissions issue with Travis virtual machine images # Workaround for a permissions issue with Travis virtual machine images
# that breaks Python's multiprocessing: # that breaks Python's multiprocessing:
# https://github.com/travis-ci/travis-cookbooks/issues/155 # https://github.com/travis-ci/travis-cookbooks/issues/155
@ -17,11 +17,10 @@ before_install:
- sudo ln -s /run/shm /dev/shm - sudo ln -s /run/shm /dev/shm
install: install:
- pip install --upgrade numpy==1.7.1 - conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.7 scipy=0.12 matplotlib nose sphinx pip nose
- pip install sphinx - pip install .
- pip install nose #--use-mirrors
- pip install . --use-mirrors #
# command to run tests, e.g. python setup.py test # command to run tests, e.g. python setup.py test
script: script:
- nosetests GPy/testing - nosetests GPy/testing
#- yes | nosetests GPy/testing

View file

@ -1,8 +0,0 @@
Frequently Asked Questions
--------------------------
Unit tests are run through Travis-Ci. They can be run locally through entering the GPy route diretory and writing
nosetests testing/
Documentation is handled by Sphinx. To build the documentation:

View file

@ -2,15 +2,10 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import warnings import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) warnings.filterwarnings("ignore", category=DeprecationWarning)
import os
def read(fname):
with open(os.path.join(os.path.dirname(__file__), fname)) as f:
return f.read()
__version__ = read('version')
import core import core
from core.parameterization import transformations, priors
constraints = transformations
import models import models
import mappings import mappings
import inference import inference
@ -19,27 +14,36 @@ import examples
import likelihoods import likelihoods
import testing import testing
from numpy.testing import Tester from numpy.testing import Tester
from nose.tools import nottest
import kern import kern
from core import priors import plotting
@nottest # Direct imports for convenience:
def tests(): from core import Model
Tester(testing).test(verbose=10) from core.parameterization import Param, Parameterized, ObsAr
if os.name == 'nt': #@nottest
try:
#Get rid of nose dependency by only ignoring if you have nose installed
from nose.tools import nottest
@nottest
def tests():
Tester(testing).test(verbose=10)
except:
def tests():
Tester(testing).test(verbose=10)
def load(file_path):
""" """
Fortran seems to like to intercept keyboard interrupts on windows. Load a previously pickled model, using `m.pickle('path/to/file.pickle)'
This means that when a model is optimizing and the user presses Ctrl-C,
the program will crash. Since it's kind of nice to be able to stop
the optimization at any time, we define our own handler below.
:param file_name: path/to/file.pickle
""" """
import win32api import cPickle as pickle
import thread try:
with open(file_path, 'rb') as f:
def handler(sig, hook=thread.interrupt_main): m = pickle.load(f)
hook() except:
return 1 import pickle as pickle
with open(file_path, 'rb') as f:
win32api.SetConsoleCtrlHandler(handler, 1) m = pickle.load(f)
return m

View file

@ -1,10 +0,0 @@
In this text document we will describe coding conventions to be used in GPy to keep things consistent.
All arrays containing data are two dimensional. The first dimension is the number of data, the second dimension is number of features. This keeps things consistent with the idea of a design matrix.
Input matrices are either X or t, output matrices are Y.
Input dimensionality is input_dim, output dimensionality is output_dim, number of data is num_data.
Data sets are preprocessed in the datasets.py file. This file also records where the data set was obtained from in the dictionary stored in the file. Long term we should move this dictionary to sqlite or similar.

View file

@ -1,11 +1,11 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from model import * from model import *
from parameterized import * from parameterization.parameterized import adjust_name_for_printing, Parameterizable
import priors from parameterization.param import Param, ParamConcatenation
from parameterization.observable_array import ObsAr
from gp import GP from gp import GP
from sparse_gp import SparseGP from sparse_gp import SparseGP
from fitc import FITC
from svigp import SVIGP
from mapping import * from mapping import *

View file

@ -1,26 +0,0 @@
'''
Created on 4 Jun 2013
@author: maxz
(Hyper-)Parameter domains defined for :py:mod:`~GPy.core.priors` and :py:mod:`~GPy.kern`.
These domains specify the legitimate realm of the parameters to live in.
:const:`~GPy.core.domains.REAL` :
real domain, all values in the real numbers are allowed
:const:`~GPy.core.domains.POSITIVE`:
positive domain, only positive real values are allowed
:const:`~GPy.core.domains.NEGATIVE`:
same as :const:`~GPy.core.domains.POSITIVE`, but only negative values are allowed
:const:`~GPy.core.domains.BOUNDED`:
only values within the bounded range are allowed,
the bounds are specified withing the object with the bounded range
'''
REAL = 'real'
POSITIVE = "positive"
NEGATIVE = 'negative'
BOUNDED = 'bounded'

View file

@ -1,248 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, tdot, symmetrify, pdinv, dtrtrs
from ..util.plot import gpplot
from .. import kern
from scipy import stats
from sparse_gp import SparseGP
class FITC(SparseGP):
"""
Sparse FITC approximation
:param X: inputs
:type X: np.ndarray (num_data x Q)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP)
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, normalize_X=False):
SparseGP.__init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False)
assert self.output_dim == 1, "FITC model is not defined for handling multiple outputs"
def update_likelihood_approximation(self, **kwargs):
"""
Approximates a non-Gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0, **kwargs)
self._set_params(self._get_params())
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z, self.X)
self.psi2 = None
def _computations(self):
#factor Kmm
self.Lm = jitchol(self.Kmm)
self.Lmi,info = dtrtrs(self.Lm,np.eye(self.num_inducing),lower=1)
Lmipsi1 = np.dot(self.Lmi,self.psi1)
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1).copy()
self.Diag0 = self.psi0 - np.diag(self.Qnn)
self.beta_star = self.likelihood.precision/(1. + self.likelihood.precision*self.Diag0[:,None]) #NOTE: beta_star contains Diag0 and the precision
self.V_star = self.beta_star * self.likelihood.Y
# The rather complex computations of self.A
tmp = self.psi1 * (np.sqrt(self.beta_star.flatten().reshape(1, self.num_data)))
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self.A
self.LB = jitchol(self.B)
self.LBi = chol_inv(self.LB)
self.psi1V = np.dot(self.psi1, self.V_star)
Lmi_psi1V, info = dtrtrs(self.Lm, np.asfortranarray(self.psi1V), lower=1, trans=0)
self._LBi_Lmi_psi1V, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1V), lower=1, trans=0)
Kmmipsi1 = np.dot(self.Lmi.T,Lmipsi1)
b_psi1_Ki = self.beta_star * Kmmipsi1.T
Ki_pbp_Ki = np.dot(Kmmipsi1,b_psi1_Ki)
Kmmi = np.dot(self.Lmi.T,self.Lmi)
LBiLmi = np.dot(self.LBi,self.Lmi)
LBL_inv = np.dot(LBiLmi.T,LBiLmi)
VVT = np.outer(self.V_star,self.V_star)
VV_p_Ki = np.dot(VVT,Kmmipsi1.T)
Ki_pVVp_Ki = np.dot(Kmmipsi1,VV_p_Ki)
psi1beta = self.psi1*self.beta_star.T
H = self.Kmm + mdot(self.psi1,psi1beta.T)
LH = jitchol(H)
LHi = chol_inv(LH)
Hi = np.dot(LHi.T,LHi)
betapsi1TLmiLBi = np.dot(psi1beta.T,LBiLmi.T)
alpha = np.array([np.dot(a.T,a) for a in betapsi1TLmiLBi])[:,None]
gamma_1 = mdot(VVT,self.psi1.T,Hi)
pHip = mdot(self.psi1.T,Hi,self.psi1)
gamma_2 = mdot(self.beta_star*pHip,self.V_star)
gamma_3 = self.V_star * gamma_2
self._dL_dpsi0 = -0.5 * self.beta_star#dA_dpsi0: logdet(self.beta_star)
self._dL_dpsi0 += .5 * self.V_star**2 #dA_psi0: yT*beta_star*y
self._dL_dpsi0 += .5 *alpha #dC_dpsi0
self._dL_dpsi0 += 0.5*mdot(self.beta_star*pHip,self.V_star)**2 - self.V_star * mdot(self.V_star.T,pHip*self.beta_star).T #dD_dpsi0
self._dL_dpsi1 = b_psi1_Ki.copy() #dA_dpsi1: logdet(self.beta_star)
self._dL_dpsi1 += -np.dot(psi1beta.T,LBL_inv) #dC_dpsi1
self._dL_dpsi1 += gamma_1 - mdot(psi1beta.T,Hi,self.psi1,gamma_1) #dD_dpsi1
self._dL_dKmm = -0.5 * np.dot(Kmmipsi1,b_psi1_Ki) #dA_dKmm: logdet(self.beta_star)
self._dL_dKmm += .5*(LBL_inv - Kmmi) + mdot(LBL_inv,psi1beta,Kmmipsi1.T) #dC_dKmm
self._dL_dKmm += -.5 * mdot(Hi,self.psi1,gamma_1) #dD_dKmm
self._dpsi1_dtheta = 0
self._dpsi1_dX = 0
self._dKmm_dtheta = 0
self._dKmm_dX = 0
self._dpsi1_dX_jkj = 0
self._dpsi1_dtheta_jkj = 0
for i,V_n,alpha_n,gamma_n,gamma_k in zip(range(self.num_data),self.V_star,alpha,gamma_2,gamma_3):
K_pp_K = np.dot(Kmmipsi1[:,i:(i+1)],Kmmipsi1[:,i:(i+1)].T)
_dpsi1 = (-V_n**2 - alpha_n + 2.*gamma_k - gamma_n**2) * Kmmipsi1.T[i:(i+1),:]
_dKmm = .5*(V_n**2 + alpha_n + gamma_n**2 - 2.*gamma_k) * K_pp_K #Diag_dD_dKmm
self._dpsi1_dtheta += self.kern.dK_dtheta(_dpsi1,self.X[i:i+1,:],self.Z)
self._dKmm_dtheta += self.kern.dK_dtheta(_dKmm,self.Z)
self._dKmm_dX += self.kern.dK_dX(_dKmm ,self.Z)
self._dpsi1_dX += self.kern.dK_dX(_dpsi1.T,self.Z,self.X[i:i+1,:])
# the partial derivative vector for the likelihood
if self.likelihood.num_params == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
raise NotImplementedError, "heteroscedatic derivates not implemented."
else:
# likelihood is not heterscedatic
dbstar_dnoise = self.likelihood.precision * (self.beta_star**2 * self.Diag0[:,None] - self.beta_star)
Lmi_psi1 = mdot(self.Lmi,self.psi1)
LBiLmipsi1 = np.dot(self.LBi,Lmi_psi1)
aux_0 = np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_1 = self.likelihood.Y.T * np.dot(self._LBi_Lmi_psi1V.T,LBiLmipsi1)
aux_2 = np.dot(LBiLmipsi1.T,self._LBi_Lmi_psi1V)
dA_dnoise = 0.5 * self.input_dim * (dbstar_dnoise/self.beta_star).sum() - 0.5 * self.input_dim * np.sum(self.likelihood.Y**2 * dbstar_dnoise)
dC_dnoise = -0.5 * np.sum(mdot(self.LBi.T,self.LBi,Lmi_psi1) * Lmi_psi1 * dbstar_dnoise.T)
dD_dnoise_1 = mdot(self.V_star*LBiLmipsi1.T,LBiLmipsi1*dbstar_dnoise.T*self.likelihood.Y.T)
alpha = mdot(LBiLmipsi1,self.V_star)
alpha_ = mdot(LBiLmipsi1.T,alpha)
dD_dnoise_2 = -0.5 * self.input_dim * np.sum(alpha_**2 * dbstar_dnoise )
dD_dnoise_1 = mdot(self.V_star.T,self.psi1.T,self.Lmi.T,self.LBi.T,self.LBi,self.Lmi,self.psi1,dbstar_dnoise*self.likelihood.Y)
dD_dnoise_2 = 0.5*mdot(self.V_star.T,self.psi1.T,Hi,self.psi1,dbstar_dnoise*self.psi1.T,Hi,self.psi1,self.V_star)
dD_dnoise = dD_dnoise_1 + dD_dnoise_2
self.partial_for_likelihood = dA_dnoise + dC_dnoise + dD_dnoise
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.beta_star)) - 0.5 * np.sum(self.V_star * self.likelihood.Y)
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB))))
D = 0.5 * np.sum(np.square(self._LBi_Lmi_psi1V))
return A + C + D + self.likelihood.Z
def _log_likelihood_gradients(self):
pass
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
dL_dtheta = self.kern.dKdiag_dtheta(self._dL_dpsi0,self.X)
dL_dtheta += self.kern.dK_dtheta(self._dL_dpsi1,self.X,self.Z)
dL_dtheta += self.kern.dK_dtheta(self._dL_dKmm,X=self.Z)
dL_dtheta += self._dKmm_dtheta
dL_dtheta += self._dpsi1_dtheta
return dL_dtheta
def dL_dZ(self):
dL_dZ = self.kern.dK_dX(self._dL_dpsi1.T,self.Z,self.X)
dL_dZ += self.kern.dK_dX(self._dL_dKmm,X=self.Z)
dL_dZ += self._dpsi1_dX
dL_dZ += self._dKmm_dX
return dL_dZ
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
assert X_variance_new is None, "FITC model is not defined for handling uncertain inputs."
if self.likelihood.is_heteroscedastic:
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.likelihood.precision.flatten())
self.Diag = self.Diag0 * Iplus_Dprod_i
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
self.RPT0 = np.dot(self.Lmi,self.psi1)
self.L = np.linalg.cholesky(np.eye(self.num_inducing) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
self.R,info = dtrtrs(self.L,self.Lmi,lower=1)
self.RPT = np.dot(self.R,self.P.T)
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
self.w = self.Diag * self.likelihood.v_tilde
self.Gamma = np.dot(self.R.T, np.dot(self.RPT,self.likelihood.v_tilde))
self.mu = self.w + np.dot(self.P,self.Gamma)
"""
Make a prediction for the generalized FITC model
Arguments
---------
X : Input prediction data - Nx1 numpy array (floats)
"""
# q(u|f) = N(u| R0i*mu_u*f, R0i*C*R0i.T)
# Ci = I + (RPT0)Di(RPT0).T
# C = I - [RPT0] * (input_dim+[RPT0].T*[RPT0])^-1*[RPT0].T
# = I - [RPT0] * (input_dim + self.Qnn)^-1 * [RPT0].T
# = I - [RPT0] * (U*U.T)^-1 * [RPT0].T
# = I - V.T * V
U = np.linalg.cholesky(np.diag(self.Diag0) + self.Qnn)
V,info = dtrtrs(U,self.RPT0.T,lower=1)
C = np.eye(self.num_inducing) - np.dot(V.T,V)
mu_u = np.dot(C,self.RPT0)*(1./self.Diag0[None,:])
#self.C = C
#self.RPT0 = np.dot(self.R0,self.Knm.T) P0.T
#self.mu_u = mu_u
#self.U = U
# q(u|y) = N(u| R0i*mu_H,R0i*Sigma_H*R0i.T)
mu_H = np.dot(mu_u,self.mu)
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.num_inducing),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:
raise NotImplementedError, "Heteroscedastic case not implemented."
"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]
"""

View file

@ -1,204 +1,459 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb import sys
from .. import kern from .. import kern
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs from model import Model
from ..likelihoods import EP, Laplace from parameterization import ObsAr
from gp_base import GPBase from .. import likelihoods
from ..inference.latent_function_inference import exact_gaussian_inference, expectation_propagation
from parameterization.variational import VariationalPosterior
class GP(GPBase): import logging
from GPy.util.normalizer import MeanNorm
logger = logging.getLogger("GP")
class GP(Model):
""" """
Gaussian Process model for regression and EP General purpose Gaussian process model
:param X: input observations :param X: input observations
:param Y: output observations
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:param likelihood: a GPy likelihood :param likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales) :param inference_method: The :class:`~GPy.inference.latent_function_inference.LatentFunctionInference` inference method to use for this GP
:type normalize_X: False|True
:rtype: model object :rtype: model object
:param Norm normalizer:
normalize the outputs Y.
Prediction will be un-normalized using this normalizer.
If normalizer is None, we will normalize using MeanNorm.
If normalizer is False, no normalization will be done.
.. Note:: Multiple independent outputs are allowed using columns of Y .. Note:: Multiple independent outputs are allowed using columns of Y
""" """
def __init__(self, X, likelihood, kernel, normalize_X=False): def __init__(self, X, Y, kernel, likelihood, inference_method=None, name='gp', Y_metadata=None, normalizer=False):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) super(GP, self).__init__(name)
self.update_likelihood_approximation()
assert X.ndim == 2
if isinstance(X, (ObsAr, VariationalPosterior)):
self.X = X.copy()
else: self.X = ObsAr(X)
def _set_params(self, p): self.num_data, self.input_dim = self.X.shape
new_kern_params = p[:self.kern.num_params_transformed()]
new_likelihood_params = p[self.kern.num_params_transformed():]
old_likelihood_params = self.likelihood._get_params()
self.kern._set_params_transformed(new_kern_params) assert Y.ndim == 2
self.likelihood._set_params_transformed(new_likelihood_params) logger.info("initializing Y")
self.K = self.kern.K(self.X) if normalizer is True:
self.normalizer = MeanNorm()
#Re fit likelihood approximation (if it is an approx), as parameters have changed elif normalizer is False:
if isinstance(self.likelihood, Laplace): self.normalizer = None
self.likelihood.fit_full(self.K)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
# the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
# alpha = np.dot(self.Ki, self.likelihood.Y)
alpha, _ = dpotrs(self.L, self.likelihood.Y, lower=1)
self.dL_dK = 0.5 * (tdot(alpha) - self.output_dim * self.Ki)
else: else:
# tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki) self.normalizer = normalizer
tmp, _ = dpotrs(self.L, np.asfortranarray(self.likelihood.YYT), lower=1)
tmp, _ = dpotrs(self.L, np.asfortranarray(tmp.T), lower=1)
self.dL_dK = 0.5 * (tmp - self.output_dim * self.Ki)
#Adding dZ_dK (0 for a non-approximate likelihood, compensates for if self.normalizer is not None:
#additional gradients of K when log-likelihood has non-zero Z term) self.normalizer.scale_by(Y)
self.dL_dK += self.likelihood.dZ_dK self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self, **kwargs):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.restart()
self.likelihood.fit_full(self.kern.K(self.X), **kwargs)
self._set_params(self._get_params()) # update the GP
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
tmp, _ = dtrtrs(self.L, np.asfortranarray(self.likelihood.Y), lower=1)
return -0.5 * np.sum(np.square(tmp))
# return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else: else:
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT)) self.Y = ObsAr(Y)
self.Y_normalized = self.Y
assert Y.shape[0] == self.num_data
_, self.output_dim = self.Y.shape
#TODO: check the type of this is okay?
self.Y_metadata = Y_metadata
assert isinstance(kernel, kern.Kern)
#assert self.input_dim == kernel.input_dim
self.kern = kernel
assert isinstance(likelihood, likelihoods.Likelihood)
self.likelihood = likelihood
#find a sensible inference method
logger.info("initializing inference method")
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian) or isinstance(likelihood, likelihoods.MixedNoise):
inference_method = exact_gaussian_inference.ExactGaussianInference()
else:
inference_method = expectation_propagation.EP()
print "defaulting to ", inference_method, "for latent function inference"
self.inference_method = inference_method
logger.info("adding kernel and likelihood as parameters")
self.link_parameter(self.kern)
self.link_parameter(self.likelihood)
def set_XY(self, X=None, Y=None):
"""
Set the input / output data of the model
This is useful if we wish to change our existing data but maintain the same model
:param X: input observations
:type X: np.ndarray
:param Y: output observations
:type Y: np.ndarray
"""
self.update_model(False)
if Y is not None:
if self.normalizer is not None:
self.normalizer.scale_by(Y)
self.Y_normalized = ObsAr(self.normalizer.normalize(Y))
self.Y = Y
else:
self.Y = ObsAr(Y)
self.Y_normalized = self.Y
if X is not None:
if self.X in self.parameters:
# LVM models
if isinstance(self.X, VariationalPosterior):
assert isinstance(X, type(self.X)), "The given X must have the same type as the X in the model!"
self.unlink_parameter(self.X)
self.X = X
self.link_parameters(self.X)
else:
self.unlink_parameter(self.X)
from ..core import Param
self.X = Param('latent mean',X)
self.link_parameters(self.X)
else:
self.X = ObsAr(X)
self.update_model(True)
def set_X(self,X):
"""
Set the input data of the model
:param X: input observations
:type X: np.ndarray
"""
self.set_XY(X=X)
def set_Y(self,Y):
"""
Set the output data of the model
:param X: output observations
:type X: np.ndarray
"""
self.set_XY(Y=Y)
def parameters_changed(self):
"""
Method that is called upon any changes to :class:`~GPy.core.parameterization.param.Param` variables within the model.
In particular in the GP class this method reperforms inference, recalculating the posterior and log marginal likelihood and gradients of the model
.. warning::
This method is not designed to be called manually, the framework is set up to automatically call this method upon changes to parameters, if you call
this method yourself, there may be unexpected consequences.
"""
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.likelihood, self.Y_normalized, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
self.kern.update_gradients_full(self.grad_dict['dL_dK'], self.X)
def log_likelihood(self): def log_likelihood(self):
""" """
The log marginal likelihood of the GP. The log marginal likelihood of the model, :math:`p(\mathbf{y})`, this is the objective function of the model being optimised
"""
return self._log_marginal_likelihood
For an EP model, can be written as the log likelihood of a regression def _raw_predict(self, _Xnew, full_cov=False, kern=None):
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
""" """
return (-0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) - For making predictions, does not account for normalization or likelihood
0.5 * self.output_dim * self.K_logdet + self._model_fit_term() + self.likelihood.Z)
def _log_likelihood_gradients(self): full_cov is a boolean which defines whether the full covariance matrix
""" of the prediction is computed. If full_cov is False (default), only the
The gradient of all parameters. diagonal of the covariance is returned.
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta .. math::
p(f*|X*, X, Y) = \int^{\inf}_{\inf} p(f*|f,X*)p(f|X,Y) df
= N(f*| K_{x*x}(K_{xx} + \Sigma)^{-1}Y, K_{x*x*} - K_{xx*}(K_{xx} + \Sigma)^{-1}K_{xx*}
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
""" """
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) if kern is None:
kern = self.kern
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False, stop=False): Kx = kern.K(_Xnew, self.X).T
""" WiKx = np.dot(self.posterior.woodbury_inv, Kx)
Internal helper function for making predictions, does not account mu = np.dot(Kx.T, self.posterior.woodbury_vector)
for normalization or likelihood
"""
Kx = self.kern.K(_Xnew, self.X, which_parts=which_parts).T
# KiKx = np.dot(self.Ki, Kx)
KiKx, _ = dpotrs(self.L, np.asfortranarray(Kx), lower=1)
mu = np.dot(KiKx.T, self.likelihood.Y)
if full_cov: if full_cov:
Kxx = self.kern.K(_Xnew, which_parts=which_parts) Kxx = kern.K(_Xnew)
var = Kxx - np.dot(KiKx.T, Kx) var = Kxx - np.dot(Kx.T, WiKx)
else: else:
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts) Kxx = kern.Kdiag(_Xnew)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0) var = Kxx - np.sum(WiKx*Kx, 0)
var = var[:, None] var = var.reshape(-1, 1)
if stop:
debug_this # @UndefinedVariable #force mu to be a column vector
if len(mu.shape)==1: mu = mu[:,None]
return mu, var return mu, var
def predict(self, Xnew, which_parts='all', full_cov=False, **likelihood_args): def predict(self, Xnew, full_cov=False, Y_metadata=None, kern=None):
""" """
Predict the function(s) at the new point(s) Xnew. Predict the function(s) at the new point(s) Xnew.
:param Xnew: The points at which to make a prediction :param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type Xnew: np.ndarray (Nnew x self.input_dim)
:param which_parts: specifies which outputs kernel(s) to use in prediction :param full_cov: whether to return the full covariance matrix, or just
:type which_parts: ('all', list of bools) the diagonal
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool :type full_cov: bool
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim :param Y_metadata: metadata about the predicting point to pass to the likelihood
:returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :param kern: The kernel to use for prediction (defaults to the model
:returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim kern). this is useful for examining e.g. subprocesses.
:returns: (mean, var, lower_upper):
mean: posterior mean, a Numpy array, Nnew x self.input_dim
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions. This is to allow for different normalizations of the output dimensions.
""" """
# normalize X values #predict the latent function values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
mu, var = self._raw_predict(Xnew, full_cov=full_cov, which_parts=which_parts) if self.normalizer is not None:
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
# now push through likelihood # now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args) mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
return mean, var, _025pm, _975pm return mean, var
def _raw_predict_single_output(self, _Xnew, output, which_parts='all', full_cov=False,stop=False): def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
""" """
For a specific output, calls _raw_predict() at the new point(s) _Xnew. Get the predictive quantiles around the prediction at X
This functions calls _add_output_index(), so _Xnew should not have an index column specifying the output.
---------
:param Xnew: The points at which to make a prediction :param X: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim :type X: np.ndarray (Xnew x self.input_dim)
:param output: output to predict :param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
:type output: integer in {0,..., output_dim-1} :type quantiles: tuple
:param which_parts: specifies which outputs kernel(s) to use in prediction :returns: list of quantiles for each X and predictive quantiles for interval combination
:type which_parts: ('all', list of bools) :rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
:param full_cov: whether to return the full covariance matrix, or just the diagonal
.. Note:: For multiple non-independent outputs models only.
""" """
_Xnew = self._add_output_index(_Xnew, output) m, v = self._raw_predict(X, full_cov=False)
return self._raw_predict(_Xnew, which_parts=which_parts,full_cov=full_cov, stop=stop) if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
def predict_single_output(self, Xnew,output=0, which_parts='all', full_cov=False, likelihood_args=dict()): def predictive_gradients(self, Xnew):
""" """
For a specific output, calls predict() at the new point(s) Xnew. Compute the derivatives of the latent function with respect to X*
This functions calls _add_output_index(), so Xnew should not have an index column specifying the output.
:param Xnew: The points at which to make a prediction Given a set of points at which to predict X* (size [N*,Q]), compute the
:type Xnew: np.ndarray, Nnew x self.input_dim derivatives of the mean and variance. Resulting arrays are sized:
:param which_parts: specifies which outputs kernel(s) to use in prediction dmu_dX* -- [N*, Q ,D], where D is the number of output in this GP (usually one).
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal dv_dX* -- [N*, Q], (since all outputs have the same variance)
:type full_cov: bool :param X: The points at which to get the predictive gradients
:returns: mean: posterior mean, a Numpy array, Nnew x self.input_dim :type X: np.ndarray (Xnew x self.input_dim)
:returns: var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise :returns: dmu_dX, dv_dX
:returns: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim :rtype: [np.ndarray (N*, Q ,D), np.ndarray (N*,Q) ]
.. Note:: For multiple non-independent outputs models only.
""" """
Xnew = self._add_output_index(Xnew, output) dmu_dX = np.empty((Xnew.shape[0],Xnew.shape[1],self.output_dim))
return self.predict(Xnew, which_parts=which_parts, full_cov=full_cov, likelihood_args=likelihood_args) for i in range(self.output_dim):
dmu_dX[:,:,i] = self.kern.gradients_X(self.posterior.woodbury_vector[:,i:i+1].T, Xnew, self.X)
def getstate(self): # gradients wrt the diagonal part k_{xx}
return GPBase.getstate(self) dv_dX = self.kern.gradients_X(np.eye(Xnew.shape[0]), Xnew)
#grads wrt 'Schur' part K_{xf}K_{ff}^{-1}K_{fx}
alpha = -2.*np.dot(self.kern.K(Xnew, self.X),self.posterior.woodbury_inv)
dv_dX += self.kern.gradients_X(alpha, Xnew, self.X)
return dmu_dX, dv_dX
def setstate(self, state):
GPBase.setstate(self, state)
self._set_params(self._get_params())
def posterior_samples_f(self,X,size=10, full_cov=True):
"""
Samples the posterior GP at the points X.
:param X: The points at which to take the samples.
:type X: np.ndarray (Nnew x self.input_dim)
:param size: the number of a posteriori samples.
:type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:returns: Ysim: set of simulations
:rtype: np.ndarray (N x samples)
"""
m, v = self._raw_predict(X, full_cov=full_cov)
if self.normalizer is not None:
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
if not full_cov:
Ysim = np.random.multivariate_normal(m.flatten(), np.diag(v.flatten()), size).T
else:
Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
return Ysim
def posterior_samples(self, X, size=10, full_cov=False, Y_metadata=None):
"""
Samples the posterior GP at the points X.
:param X: the points at which to take the samples.
:type X: np.ndarray (Nnew x self.input_dim.)
:param size: the number of a posteriori samples.
:type size: int.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
:type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
Ysim = self.posterior_samples_f(X, size, full_cov=full_cov)
Ysim = self.likelihood.samples(Ysim, Y_metadata)
return Ysim
def plot_f(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=True,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
"""
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
This is a call to plot with plot_raw=True.
Data will not be plotted in this, as the GP's view of the world
may live in another space, or units then the data.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_ycols: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param Y_metadata: additional data associated with Y which may be needed
:type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
kw = {}
if linecol is not None:
kw['linecol'] = linecol
if fillcol is not None:
kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows,
which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw)
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols.
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice model.X, model.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_ycols: 'all' or a list of integers
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param linecol: color of line to plot [Tango.colorsHex['darkBlue']]
:type linecol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param fillcol: color of fill [Tango.colorsHex['lightBlue']]
:type fillcol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) as is standard in matplotlib
:param Y_metadata: additional data associated with Y which may be needed
:type Y_metadata: dict
:param data_symbol: symbol as used matplotlib, by default this is a black cross ('kx')
:type data_symbol: color either as Tango.colorsHex object or character ('r' is red, 'g' is green) alongside marker type, as is standard in matplotlib.
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import models_plots
kw = {}
if linecol is not None:
kw['linecol'] = linecol
if fillcol is not None:
kw['fillcol'] = fillcol
return models_plots.plot_fit(self, plot_limits, which_data_rows,
which_data_ycols, fixed_inputs,
levels, samples, fignum, ax, resolution,
plot_raw=plot_raw, Y_metadata=Y_metadata,
data_symbol=data_symbol, **kw)
def input_sensitivity(self, summarize=True):
"""
Returns the sensitivity for each dimension of this model
"""
return self.kern.input_sensitivity(summarize=summarize)
def optimize(self, optimizer=None, start=None, **kwargs):
"""
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations
:type max_f_eval: int
:messages: whether to display during optimisation
:type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer), a range of optimisers can be found in :module:`~GPy.inference.optimization`, they include 'scg', 'lbfgs', 'tnc'.
:type optimizer: string
"""
self.inference_method.on_optimization_start()
try:
super(GP, self).optimize(optimizer, start, **kwargs)
except KeyboardInterrupt:
print "KeyboardInterrupt caught, calling on_optimization_end() to round things up"
self.inference_method.on_optimization_end()
raise
def infer_newX(self, Y_new, optimize=True, ):
"""
Infer the distribution of X for the new observed data *Y_new*.
:param Y_new: the new observed data for inference
:type Y_new: numpy.ndarray
:param optimize: whether to optimize the location of new X (True by default)
:type optimize: boolean
:return: a tuple containing the posterior estimation of X and the model that optimize X
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
"""
from ..inference.latent_function_inference.inferenceX import infer_newX
return infer_newX(self, Y_new, optimize=optimize)

View file

@ -1,276 +0,0 @@
import numpy as np
from .. import kern
from ..util.plot import gpplot, Tango, x_frame1D, x_frame2D
import pylab as pb
from GPy.core.model import Model
import warnings
from ..likelihoods import Gaussian, Gaussian_Mixed_Noise
class GPBase(Model):
"""
Gaussian process base model for holding shared behaviour between
sparse_GP and GP models, and potentially other models in the future.
Here we define some functions that are use
"""
def __init__(self, X, likelihood, kernel, normalize_X=False):
if len(X.shape)==1:
X = X.reshape(-1,1)
warnings.warn("One dimension output (N,) being reshaped to (N,1)")
self.X = X
assert len(self.X.shape) == 2, "too many dimensions for X input"
self.num_data, self.input_dim = self.X.shape
assert isinstance(kernel, kern.kern)
self.kern = kernel
self.likelihood = likelihood
assert self.X.shape[0] == self.likelihood.data.shape[0]
self.num_data, self.output_dim = self.likelihood.data.shape
if normalize_X:
self._Xoffset = X.mean(0)[None, :]
self._Xscale = X.std(0)[None, :]
self._Xscale[np.where(self._Xscale==0)] = 1
self.X = (X.copy() - self._Xoffset) / self._Xscale
else:
self._Xoffset = np.zeros((1, self.input_dim))
self._Xscale = np.ones((1, self.input_dim))
super(GPBase, self).__init__()
# Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def posterior_samples_f(self,X,size=10,which_parts='all'):
"""
Samples the posterior GP at the points X.
:param X: The points at which to take the samples.
:type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples to plot.
:type size: int.
:param which_parts: which of the kernel functions to plot (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
m, v = self._raw_predict(X, which_parts=which_parts, full_cov=True)
v = v.reshape(m.size,-1) if len(v.shape)==3 else v
Ysim = np.random.multivariate_normal(m.flatten(), v, size).T
return Ysim
def posterior_samples(self,X,size=10,which_parts='all',noise_model=None):
"""
Samples the posterior GP at the points X.
:param X: the points at which to take the samples.
:type X: np.ndarray, Nnew x self.input_dim.
:param size: the number of a posteriori samples to plot.
:type size: int.
:param which_parts: which of the kernel functions to plot (additively).
:type which_parts: 'all', or list of bools.
:param full_cov: whether to return the full covariance matrix, or just the diagonal.
:type full_cov: bool.
:param noise_model: for mixed noise likelihood, the noise model to use in the samples.
:type noise_model: integer.
:returns: Ysim: set of simulations, a Numpy array (N x samples).
"""
Ysim = self.posterior_samples_f(X, size, which_parts=which_parts)
if isinstance(self.likelihood,Gaussian):
noise_std = np.sqrt(self.likelihood._get_params())
Ysim += np.random.normal(0,noise_std,Ysim.shape)
elif isinstance(self.likelihood,Gaussian_Mixed_Noise):
assert noise_model is not None, "A noise model must be specified."
noise_std = np.sqrt(self.likelihood._get_params()[noise_model])
Ysim += np.random.normal(0,noise_std,Ysim.shape)
else:
Ysim = self.likelihood.noise_model.samples(Ysim)
return Ysim
def plot_f(self, *args, **kwargs):
"""
Plot the GP's view of the world, where the data is normalized and before applying a likelihood.
This is a convenience function: we simply call self.plot with the
argument use_raw_predict set True. All args and kwargs are passed on to
plot.
see also: gp_base.plot
"""
kwargs['plot_raw'] = True
self.plot(*args, **kwargs)
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', fixed_inputs=[],
levels=20, samples=0, fignum=None, ax=None, resolution=None,
plot_raw=False,
linecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue']):
"""
Plot the posterior of the GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols and which_parts
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
"""
#deal with optional arguments
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(self.output_dim)
if len(which_data_ycols)==0:
raise ValueError('No data selected for plotting')
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
#one dimensional plotting
if len(free_dims) == 1:
#define the frame on which to plot
resolution = resolution or 200
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu[:,free_dims], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
#make a prediction on the frame and plot it
if plot_raw:
m, v = self._raw_predict(Xgrid, which_parts=which_parts)
lower = m - 2*np.sqrt(v)
upper = m + 2*np.sqrt(v)
Y = self.likelihood.Y
else:
m, v, lower, upper = self.predict(Xgrid, which_parts=which_parts, sampling=False) #Compute the exact mean
m_, v_, lower, upper = self.predict(Xgrid, which_parts=which_parts, sampling=True, num_samples=15000) #Apporximate the percentiles
Y = self.likelihood.data
for d in which_data_ycols:
gpplot(Xnew, m[:, d], lower[:, d], upper[:, d], axes=ax, edgecol=linecol, fillcol=fillcol)
ax.plot(Xu[which_data_rows,free_dims], Y[which_data_rows, d], 'kx', mew=1.5)
#optionally plot some samples
if samples: #NOTE not tested with fixed_inputs
Ysim = self.posterior_samples(Xgrid, samples, which_parts=which_parts)
for yi in Ysim.T:
ax.plot(Xnew, yi[:,None], Tango.colorsHex['darkBlue'], linewidth=0.25)
#ax.plot(Xnew, yi[:,None], marker='x', linestyle='--',color=Tango.colorsHex['darkBlue']) #TODO apply this line for discrete outputs.
#set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
ax.set_xlim(xmin, xmax)
ax.set_ylim(ymin, ymax)
#2D plotting
elif len(free_dims) == 2:
#define the frame for plotting on
resolution = resolution or 50
Xu = self.X * self._Xscale + self._Xoffset #NOTE self.X are the normalized values now
Xnew, _, _, xmin, xmax = x_frame2D(Xu[:,free_dims], plot_limits, resolution)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,free_dims] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
#predict on the frame and plot
if plot_raw:
m, _ = self._raw_predict(Xgrid, which_parts=which_parts)
Y = self.likelihood.Y
else:
m, _, _, _ = self.predict(Xgrid, which_parts=which_parts,sampling=False)
Y = self.likelihood.data
for d in which_data_ycols:
m_d = m[:,d].reshape(resolution, resolution).T
contour = ax.contour(x, y, m_d, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
scatter = ax.scatter(self.X[which_data_rows, free_dims[0]], self.X[which_data_rows, free_dims[1]], 40, Y[which_data_rows, d], cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
#set the limits of the plot to some sensible values
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
if samples:
warnings.warn("Samples are rather difficult to plot for 2D inputs...")
return contour, scatter
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def getstate(self):
"""
Get the curent state of the class. This is only used to efficiently
pickle the model. See also self.setstate
"""
return Model.getstate(self) + [self.X,
self.num_data,
self.input_dim,
self.kern,
self.likelihood,
self.output_dim,
self._Xoffset,
self._Xscale]
def setstate(self, state):
"""
Set the state of the model. Used for efficient pickling
"""
self._Xscale = state.pop()
self._Xoffset = state.pop()
self.output_dim = state.pop()
self.likelihood = state.pop()
self.kern = state.pop()
self.input_dim = state.pop()
self.num_data = state.pop()
self.X = state.pop()
Model.setstate(self, state)
def log_predictive_density(self, x_test, y_test):
"""
Calculation of the log predictive density
.. math:
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
:param x_test: test observations (x_{*})
:type x_test: (Nx1) array
:param y_test: test observations (y_{*})
:type y_test: (Nx1) array
"""
mu_star, var_star = self._raw_predict(x_test)
return self.likelihood.log_predictive_density(y_test, mu_star, var_star)

View file

@ -1,24 +1,19 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2013,2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from ..util.plot import Tango, x_frame1D, x_frame2D import sys
from parameterized import Parameterized from parameterization import Parameterized
import numpy as np import numpy as np
import pylab as pb
class Mapping(Parameterized): class Mapping(Parameterized):
""" """
Base model for shared behavior between models that can act like a mapping. Base model for shared behavior between models that can act like a mapping.
""" """
def __init__(self, input_dim, output_dim): def __init__(self, input_dim, output_dim, name='mapping'):
self.input_dim = input_dim self.input_dim = input_dim
self.output_dim = output_dim self.output_dim = output_dim
super(Mapping, self).__init__(name=name)
super(Mapping, self).__init__()
# Model.__init__(self)
# All leaf nodes should call self._set_params(self._get_params()) at
# the end
def f(self, X): def f(self, X):
raise NotImplementedError raise NotImplementedError
@ -35,7 +30,8 @@ class Mapping(Parameterized):
raise NotImplementedError raise NotImplementedError
def df_dtheta(self, dL_df, X): def df_dtheta(self, dL_df, X):
"""The gradient of the outputs of the multi-layer perceptron with respect to each of the parameters. """The gradient of the outputs of the mapping with respect to each of the parameters.
:param dL_df: gradient of the objective with respect to the function. :param dL_df: gradient of the objective with respect to the function.
:type dL_df: ndarray (num_data x output_dim) :type dL_df: ndarray (num_data x output_dim)
:param X: input locations where the function is evaluated. :param X: input locations where the function is evaluated.
@ -43,85 +39,42 @@ class Mapping(Parameterized):
:returns: Matrix containing gradients with respect to parameters of each output for each input data. :returns: Matrix containing gradients with respect to parameters of each output for each input data.
:rtype: ndarray (num_params length) :rtype: ndarray (num_params length)
""" """
raise NotImplementedError raise NotImplementedError
def plot(self, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20, samples=0, fignum=None, ax=None, fixed_inputs=[], linecol=Tango.colorsHex['darkBlue']): def plot(self, *args):
""" """
Plot the mapping.
Plots the mapping associated with the model. Plots the mapping associated with the model.
- In one dimension, the function is plotted. - In one dimension, the function is plotted.
- In two dimsensions, a contour-plot shows the function - In two dimensions, a contour-plot shows the function
- In higher dimensions, we've not implemented this yet !TODO! - In higher dimensions, we've not implemented this yet !TODO!
Can plot only part of the data and part of the posterior functions Can plot only part of the data and part of the posterior functions
using which_data and which_functions using which_data and which_functions
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits This is a convenience function: arguments are passed to
:type plot_limits: np.array GPy.plotting.matplot_dep.models_plots.plot_mapping
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param linecol: color of line to plot.
:type linecol:
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
""" """
# TODO include samples
if which_data == 'all':
which_data = slice(None)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
plotdims = self.input_dim - len(fixed_inputs)
if plotdims == 1:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
fixed_dims = np.array([i for i,v in fixed_inputs])
freedim = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
Xnew, xmin, xmax = x_frame1D(Xu[:,freedim], plot_limits=plot_limits)
Xgrid = np.empty((Xnew.shape[0],self.input_dim))
Xgrid[:,freedim] = Xnew
for i,v in fixed_inputs:
Xgrid[:,i] = v
f = self.predict(Xgrid, which_parts=which_parts)
for d in range(y.shape[1]):
ax.plot(Xnew, f[:, d], edgecol=linecol)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, _, _, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
f = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
ax.contour(x, y, f, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet) # @UndefinedVariable
ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1])
if "matplotlib" in sys.modules:
from ..plotting.matplot_dep import models_plots
mapping_plots.plot_mapping(self,*args)
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NameError, "matplotlib package has not been imported."
from GPy.core.model import Model class Bijective_mapping(Mapping):
"""
This is a mapping that is bijective, i.e. you can go from X to f and
also back from f to X. The inverse mapping is called g().
"""
def __init__(self, input_dim, output_dim, name='bijective_mapping'):
super(Bijective_apping, self).__init__(name=name)
def g(self, f):
"""Inverse mapping from output domain of the function to the inputs."""
raise NotImplementedError
from model import Model
class Mapping_check_model(Model): class Mapping_check_model(Model):
""" """

View file

@ -1,182 +1,35 @@
# Copyright (c) 2012, 2013, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from .. import likelihoods from .. import likelihoods
from ..inference import optimization from ..inference import optimization
from ..util.linalg import jitchol from ..util.misc import opt_wrapper
from GPy.util.misc import opt_wrapper from parameterization import Parameterized
from parameterized import Parameterized
import multiprocessing as mp import multiprocessing as mp
import numpy as np import numpy as np
from GPy.core.domains import POSITIVE, REAL
from numpy.linalg.linalg import LinAlgError from numpy.linalg.linalg import LinAlgError
import itertools
# import numdifftools as ndt # import numdifftools as ndt
class Model(Parameterized): class Model(Parameterized):
_fail_count = 0 # Count of failed optimization steps (see objective) _fail_count = 0 # Count of failed optimization steps (see objective)
_allowed_failures = 10 # number of allowed failures _allowed_failures = 10 # number of allowed failures
def __init__(self):
Parameterized.__init__(self) def __init__(self, name):
self.priors = None super(Model, self).__init__(name) # Parameterized.__init__(self)
self.optimization_runs = [] self.optimization_runs = []
self.sampling_runs = [] self.sampling_runs = []
self.preferred_optimizer = 'scg' self.preferred_optimizer = 'bfgs'
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes from .parameterization.ties_and_remappings import Tie
self.tie = Tie()
self.link_parameter(self.tie, -1)
self.add_observer(self.tie, self.tie._parameters_changed_notification, priority=-500)
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to use the model class" return self.gradient
def getstate(self):
"""
Get the current state of the class.
Inherited from Parameterized, so add those parameters to the state
:return: list of states from the model.
"""
return Parameterized.getstate(self) + \
[self.priors, self.optimization_runs,
self.sampling_runs, self.preferred_optimizer]
def setstate(self, state):
"""
set state from previous call to getstate
call Parameterized with the rest of the state
:param state: the state of the model.
:type state: list as returned from getstate.
"""
self.preferred_optimizer = state.pop()
self.sampling_runs = state.pop()
self.optimization_runs = state.pop()
self.priors = state.pop()
Parameterized.setstate(self, state)
def set_prior(self, regexp, what):
"""
Sets priors on the model parameters.
**Notes**
Asserts that the prior is suitable for the constraint. If the
wrong constraint is in place, an error is raised. If no
constraint is in place, one is added (warning printed).
For tied parameters, the prior will only be "counted" once, thus
a prior object is only inserted on the first tied index
:param regexp: regular expression of parameters on which priors need to be set.
:type param: string, regexp, or integer array
:param what: prior to set on parameter.
:type what: GPy.core.Prior type
"""
if self.priors is None:
self.priors = [None for i in range(self._get_params().size)]
which = self.grep_param_names(regexp)
# check tied situation
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
if len(tie_partial_matches):
raise ValueError, "cannot place prior across partial ties"
tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
if len(tie_matches) > 1:
raise ValueError, "cannot place prior across multiple ties"
elif len(tie_matches) == 1:
which = which[:1] # just place a prior object on the first parameter
# check constraints are okay
if what.domain is POSITIVE:
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain is POSITIVE]
if len(constrained_positive_indices):
constrained_positive_indices = np.hstack(constrained_positive_indices)
else:
constrained_positive_indices = np.zeros(shape=(0,))
bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
unconst = np.setdiff1d(which, constrained_positive_indices)
if len(unconst):
print "Warning: constraining parameters to be positive:"
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
print '\n'
self.constrain_positive(unconst)
elif what.domain is REAL:
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
else:
raise ValueError, "prior not recognised"
# store the prior in a local list
for w in which:
self.priors[w] = what
def get_gradient(self, name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
:param name: the name of parameters required (as a regular expression).
:type name: regular expression
:param return_names: whether or not to return the names matched (default False)
:type return_names: bool
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._log_likelihood_gradients()[matches]
else:
raise AttributeError, "no parameter matches %s" % name
def log_prior(self):
"""evaluate the prior"""
if self.priors is not None:
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
else:
return 0.
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
if self.priors is None:
return 0.
x = self._get_params()
ret = np.zeros(x.size)
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
return ret
def _transform_gradients(self, g):
x = self._get_params()
for index, constraint in zip(self.constrained_indices, self.constraints):
g[index] = g[index] * constraint.gradfactor(x[index])
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.fixed_indices):
to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
return np.delete(g, to_remove)
else:
return g
def randomize(self):
"""
Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1)
"""
# first take care of all parameters (from N(0,1))
x = self._get_params_transformed()
x = np.random.randn(x.size)
self._set_params_transformed(x)
# now draw from prior where possible
x = self._get_params()
if self.priors is not None:
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
self._set_params(x)
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): def optimize_restarts(self, num_restarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
""" """
@ -207,10 +60,12 @@ class Model(Parameterized):
:param messages: whether to display during optimisation :param messages: whether to display during optimisation
:type messages: bool :type messages: bool
.. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine. .. note:: If num_processes is None, the number of workes in the
multiprocessing pool is automatically set to the number of processors
on the current machine.
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self.optimizer_array.copy()
if parallel: if parallel:
try: try:
@ -221,8 +76,8 @@ class Model(Parameterized):
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs) job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job) jobs.append(job)
pool.close() # signal that no more data coming in pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete pool.join() # wait for all the tasks to complete
except KeyboardInterrupt: except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool." print "Ctrl+c received, terminating and joining pool."
pool.terminate() pool.terminate()
@ -246,11 +101,11 @@ class Model(Parameterized):
if len(self.optimization_runs): if len(self.optimization_runs):
i = np.argmin([o.f_opt for o in self.optimization_runs]) i = np.argmin([o.f_opt for o in self.optimization_runs])
self._set_params_transformed(self.optimization_runs[i].x_opt) self.optimizer_array = self.optimization_runs[i].x_opt
else: else:
self._set_params_transformed(initial_parameters) self.optimizer_array = initial_parameters
def ensure_default_constraints(self): def ensure_default_constraints(self, warning=True):
""" """
Ensure that any variables which should clearly be positive Ensure that any variables which should clearly be positive
have been constrained somehow. The method performs a regular have been constrained somehow. The method performs a regular
@ -258,183 +113,164 @@ class Model(Parameterized):
'variance', 'lengthscale', 'precision' and 'kappa'. If any of 'variance', 'lengthscale', 'precision' and 'kappa'. If any of
these terms are present in the name the parameter is these terms are present in the name the parameter is
constrained positive. constrained positive.
"""
positive_strings = ['variance', 'lengthscale', 'precision', 'decay', 'kappa']
# param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices()
to_make_positive = []
for s in positive_strings:
for i in self.grep_param_names(".*" + s):
if not (i in currently_constrained):
to_make_positive.append(i)
if len(to_make_positive):
self.constrain_positive(np.asarray(to_make_positive))
def objective_function(self, x): DEPRECATED.
"""
raise DeprecationWarning, 'parameters now have default constraints'
def objective_function(self):
"""
The objective function for the given algorithm.
This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need
to return the objective function here.
For probabilistic models this is the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not
probabilistic, just return your objective to minimize here!
"""
return -float(self.log_likelihood()) - self.log_prior()
def objective_function_gradients(self):
"""
The gradients for the objective function for the given algorithm.
The gradients are w.r.t. the *negative* objective function, as
this framework works with *negative* log-likelihoods as a default.
You can find the gradient for the parameters in self.gradient at all times.
This is the place, where gradients get stored for parameters.
This function is the true objective, which wants to be minimized.
Note that all parameters are already set and in place, so you just need
to return the gradient here.
For probabilistic models this is the gradient of the negative log_likelihood
(including the MAP prior), so we return it here. If your model is not
probabilistic, just return your *negative* gradient here!
"""
return -(self._log_likelihood_gradients() + self._log_prior_gradients())
def _grads(self, x):
"""
Gets the gradients from the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the gradients, and will raise the original exception if
the objective cannot be computed.
:param x: the parameters of the model.
:type x: np.array
"""
try:
# self._set_params_transformed(x)
self.optimizer_array = x
obj_grads = self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
return obj_grads
def _objective(self, x):
""" """
The objective function passed to the optimizer. It combines The objective function passed to the optimizer. It combines
the likelihood and the priors. the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to Failures are handled robustly. The algorithm will try several times to
return the objective, and will raise the original exception if it return the objective, and will raise the original exception if
the objective cannot be computed. the objective cannot be computed.
:param x: the parameters of the model. :param x: the parameters of the model.
:parameter type: np.array :parameter type: np.array
""" """
try: try:
self._set_params_transformed(x) self.optimizer_array = x
obj = self.objective_function()
self._fail_count = 0 self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e: except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures: if self._fail_count >= self._allowed_failures:
raise e raise
self._fail_count += 1 self._fail_count += 1
return np.inf return np.inf
return -self.log_likelihood() - self.log_prior() return obj
def objective_function_gradients(self, x): def _objective_grads(self, x):
"""
Gets the gradients from the likelihood and the priors.
Failures are handled robustly. The algorithm will try several times to
return the gradients, and will raise the original exception if it
the objective cannot be computed.
:param x: the parameters of the model.
:parameter type: np.array
"""
try: try:
self._set_params_transformed(x) self.optimizer_array = x
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()) obj_f, obj_grads = self.objective_function(), self._transform_gradients(self.objective_function_gradients())
self._fail_count = 0 self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError) as e: except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures: if self._fail_count >= self._allowed_failures:
raise e raise
self._fail_count += 1
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100)
return obj_grads
def objective_and_gradients(self, x):
"""
Compute the objective function of the model and the gradient of the model at the point given by x.
:param x: the point at which gradients are to be computed.
:type np.array:
"""
try:
self._set_params_transformed(x)
obj_f = -self.log_likelihood() - self.log_prior()
self._fail_count = 0
obj_grads = -self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
except (LinAlgError, ZeroDivisionError, ValueError) as e:
if self._fail_count >= self._allowed_failures:
raise e
self._fail_count += 1 self._fail_count += 1
obj_f = np.inf obj_f = np.inf
obj_grads = np.clip(-self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients()), -1e100, 1e100) obj_grads = np.clip(self._transform_gradients(self.objective_function_gradients()), -1e100, 1e100)
return obj_f, obj_grads return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
""" """
Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors. Optimize the model using self.log_likelihood and self.log_likelihood_gradient, as well as self.priors.
kwargs are passed to the optimizer. They can be: kwargs are passed to the optimizer. They can be:
:param max_f_eval: maximum number of function evaluations :param max_f_eval: maximum number of function evaluations
:type max_f_eval: int :type max_f_eval: int
:messages: whether to display during optimisation :messages: whether to display during optimisation
:type messages: bool :type messages: bool
:param optimzer: which optimizer to use (defaults to self.preferred optimizer) :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
:type optimzer: string TODO: valid strings? :type optimizer: string
Valid optimizers are:
- 'scg': scaled conjugate gradient method, recommended for stability.
See also GPy.inference.optimization.scg
- 'fmin_tnc': truncated Newton method (see scipy.optimize.fmin_tnc)
- 'simplex': the Nelder-Mead simplex method (see scipy.optimize.fmin),
- 'lbfgsb': the l-bfgs-b method (see scipy.optimize.fmin_l_bfgs_b),
- 'sgd': stochastic gradient decsent (see scipy.optimize.sgd). For experts only!
""" """
if self.is_fixed:
print 'nothing to optimize'
if self.size == 0:
print 'nothing to optimize'
if not self.update_model():
print "setting updates on again"
self.update_model(True)
if start == None:
start = self.optimizer_array
if optimizer is None: if optimizer is None:
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer
if start == None: if isinstance(optimizer, optimization.Optimizer):
start = self._get_params_transformed() opt = optimizer
opt.model = self
else:
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, **kwargs)
optimizer = optimization.get_optimizer(optimizer) opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)
opt = optimizer(start, model=self, **kwargs)
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
self.optimization_runs.append(opt) self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt) self.optimizer_array = opt.x_opt
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs): def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1" # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) # @UndefinedVariable
sgd.run() sgd.run()
self.optimization_runs.append(sgd) self.optimization_runs.append(sgd)
def Laplace_covariance(self): def _checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3, df_tolerance=1e-12):
"""return the covariance matrix of a Laplace approximation at the current (stationary) point."""
# TODO add in the prior contributions for MAP estimation
# TODO fix the hessian for tied, constrained and fixed components
if hasattr(self, 'log_likelihood_hessian'):
A = -self.log_likelihood_hessian()
else:
print "numerically calculating Hessian. please be patient!"
x = self._get_params()
def f(x):
self._set_params(x)
return self.log_likelihood()
h = ndt.Hessian(f) # @UndefinedVariable
A = -h(x)
self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky
aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
A[aa, aa] = 0.
return A
def Laplace_evidence(self):
"""Returns an estiamte of the model evidence based on the Laplace approximation.
Uses a numerical estimate of the Hessian if none is available analytically."""
A = self.Laplace_covariance()
try:
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
except:
return np.nan
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self):
s = Parameterized.__str__(self).split('\n')
#def __str__(self, names=None):
# if names is None:
# names = self._get_print_names()
#s = Parameterized.__str__(self, names=names).split('\n')
# add priors to the string
if self.priors is not None:
strs = [str(p) if p is not None else '' for p in self.priors]
else:
strs = [''] * len(self._get_params())
# strs = [''] * len(self._get_param_names())
# name_indices = self.grep_param_names("|".join(names))
# strs = np.array(strs)[name_indices]
width = np.array(max([len(p) for p in strs] + [5])) + 4
log_like = self.log_likelihood()
log_prior = self.log_prior()
obj_funct = '\nLog-likelihood: {0:.3e}'.format(log_like)
if len(''.join(strs)) != 0:
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
obj_funct += '\n\n'
s[0] = obj_funct + s[0]
s[0] += "|{h:^{col}}".format(h='prior', col=width)
s[1] += '-' * (width + 1)
for p in range(2, len(strs) + 2):
s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
return '\n'.join(s)
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of the ,odel by comparing to a numerical Check the gradient of the ,odel by comparing to a numerical
estimate. If the verbose flag is passed, invividual estimate. If the verbose flag is passed, individual
components are tested (and printed) components are tested (and printed)
:param verbose: If True, print a "full" checking of each parameter :param verbose: If True, print a "full" checking of each parameter
@ -447,37 +283,54 @@ class Model(Parameterized):
Note:- Note:-
The gradient is considered correct if the ratio of the analytical The gradient is considered correct if the ratio of the analytical
and numerical gradients is within <tolerance> of unity. and numerical gradients is within <tolerance> of unity.
"""
x = self._get_params_transformed().copy() The *dF_ratio* indicates the limit of numerical accuracy of numerical gradients.
If it is too small, e.g., smaller than 1e-12, the numerical gradients are usually
not accurate enough for the tests (shown with blue).
"""
x = self.optimizer_array.copy()
if not verbose: if not verbose:
# just check the global ratio # make sure only to test the selected parameters
if target_param is None:
#choose a random direction to find the linear approximation in transformed_index = range(len(x))
if x.size==2:
dx = step * np.ones(2) # random direction for 2 parameters can fail dure to symmetry
else: else:
dx = step * np.sign(np.random.uniform(-1, 1, x.size)) transformed_index = self._raveled_index_for(target_param)
if self._has_fixes():
indices = np.r_[:self.size]
which = (transformed_index[:, None] == indices[self._fixes_][None, :]).nonzero()
transformed_index = (indices - (~self._fixes_).cumsum())[transformed_index[which[0]]]
if transformed_index.size == 0:
print "No free parameters to check"
return
# just check the global ratio
dx = np.zeros(x.shape)
dx[transformed_index] = step * (np.sign(np.random.uniform(-1, 1, transformed_index.size)) if transformed_index.size != 2 else 1.)
# evaulate around the point x # evaulate around the point x
f1, g1 = self.objective_and_gradients(x + dx) f1 = self._objective(x + dx)
f2, g2 = self.objective_and_gradients(x - dx) f2 = self._objective(x - dx)
gradient = self.objective_function_gradients(x) gradient = self._grads(x)
numerical_gradient = (f1 - f2) / (2 * dx) dx = dx[transformed_index]
global_ratio = (f1 - f2) / (2 * np.dot(dx, np.where(gradient==0, 1e-32, gradient))) gradient = gradient[transformed_index]
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() < tolerance) denominator = (2 * np.dot(dx, gradient))
global_ratio = (f1 - f2) / np.where(denominator == 0., 1e-32, denominator)
global_diff = np.abs(f1 - f2) < tolerance and np.allclose(gradient, 0, atol=tolerance)
if global_ratio is np.nan:
global_ratio = 0
return np.abs(1. - global_ratio) < tolerance or global_diff
else: else:
# check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
names = self._get_param_names_transformed() names = self._get_param_names()
except NotImplementedError: except NotImplementedError:
names = ['Variable %i' % i for i in range(len(x))] names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing # Prepare for pretty-printing
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical'] header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical', 'dF_ratio']
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])]) max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
float_len = 10 float_len = 10
cols = [max_names] cols = [max_names]
@ -487,115 +340,77 @@ class Model(Parameterized):
header_string = map(lambda x: '|'.join(x), [header_string]) header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0]) separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator]) print '\n'.join([header_string[0], separator])
if target_param is None: if target_param is None:
param_list = range(len(x)) param_index = range(len(x))
transformed_index = param_index
else: else:
param_list = self.grep_param_names(target_param, transformed=True, search=True) param_index = self._raveled_index_for(target_param)
if not np.any(param_list): if self._has_fixes():
indices = np.r_[:self.size]
which = (param_index[:, None] == indices[self._fixes_][None, :]).nonzero()
param_index = param_index[which[0]]
transformed_index = (indices - (~self._fixes_).cumsum())[param_index]
# print param_index, transformed_index
else:
transformed_index = param_index
if param_index.size == 0:
print "No free parameters to check" print "No free parameters to check"
return return
gradient = self._grads(x).copy()
for i in param_list: np.where(gradient == 0, 1e-312, gradient)
ret = True
for nind, xind in itertools.izip(param_index, transformed_index):
xx = x.copy() xx = x.copy()
xx[i] += step xx[xind] += step
f1, g1 = self.objective_and_gradients(xx) f1 = self._objective(xx)
xx[i] -= 2.*step xx[xind] -= 2.*step
f2, g2 = self.objective_and_gradients(xx) f2 = self._objective(xx)
gradient = self.objective_function_gradients(x)[i] df_ratio = np.abs((f1-f2)/min(f1,f2))
df_unstable = df_ratio<df_tolerance
numerical_gradient = (f1 - f2) / (2 * step) numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * np.where(gradient==0, 1e-312, gradient)) if np.all(gradient[xind] == 0): ratio = (f1 - f2) == gradient[xind]
difference = np.abs((f1 - f2) / 2 / step - gradient) else: ratio = (f1 - f2) / (2 * step * gradient[xind])
difference = np.abs(numerical_gradient - gradient[xind])
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance: if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
formatted_name = "\033[92m {0} \033[0m".format(names[i]) formatted_name = "\033[92m {0} \033[0m".format(names[nind])
ret &= True
else: else:
formatted_name = "\033[91m {0} \033[0m".format(names[i]) formatted_name = "\033[91m {0} \033[0m".format(names[nind])
ret &= False
if df_unstable:
formatted_name = "\033[94m {0} \033[0m".format(names[nind])
r = '%.6f' % float(ratio) r = '%.6f' % float(ratio)
d = '%.6f' % float(difference) d = '%.6f' % float(difference)
g = '%.6f' % gradient g = '%.6f' % gradient[xind]
ng = '%.6f' % float(numerical_gradient) ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4]) df = '%1.e' % float(df_ratio)
grad_string = "{0:<{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}|{5:^{c5}}".format(formatted_name, r, d, g, ng, df, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4], c5=cols[5])
print grad_string print grad_string
def input_sensitivity(self): self.optimizer_array = x
""" return ret
return an array describing the sesitivity of the model to each input
NB. Right now, we're basing this on the lengthscales (or def _repr_html_(self):
variances) of the kernel. TODO: proper sensitivity analysis """Representation of the model in html for notebook display."""
where we integrate across the model inputs and evaluate the model_details = [['<b>Model</b>', self.name + '<br>'],
effect on the variance of the model output. """ ['<b>Log-likelihood</b>', '{}<br>'.format(float(self.log_likelihood()))],
["<b>Number of Parameters</b>", '{}<br>'.format(self.size)]]
from operator import itemgetter
to_print = [""] + ["{}: {}".format(name, detail) for name, detail in model_details] + ["<br><b>Parameters</b>:"]
to_print.append(super(Model, self)._repr_html_())
return "\n".join(to_print)
if not hasattr(self, 'kern'): def __str__(self):
raise ValueError, "this model has no kernel" model_details = [['Name', self.name],
['Log-likelihood', '{}'.format(float(self.log_likelihood()))],
["Number of Parameters", '{}'.format(self.size)]]
from operator import itemgetter
max_len = reduce(lambda a, b: max(len(b[0]), a), model_details, 0)
to_print = [""] + ["{0:{l}} : {1}".format(name, detail, l=max_len) for name, detail in model_details] + ["Parameters:"]
to_print.append(super(Model, self).__str__())
return "\n".join(to_print)
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear', 'rbf_inv']]
if (not len(k) == 1) or (not k[0].ARD):
raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0]
if k.name == 'rbf':
return 1. / k.lengthscale
elif k.name == 'rbf_inv':
return k.inv_lengthscale
elif k.name == 'linear':
return k.variances
def pseudo_EM(self, stop_crit=.1, **kwargs):
"""
EM - like algorithm for Expectation Propagation and Laplace approximation
:param stop_crit: convergence criterion
:type stop_crit: float
.. Note: kwargs are passed to update_likelihood and optimize functions.
"""
assert isinstance(self.likelihood, (likelihoods.EP, likelihoods.EP_Mixed_Noise, likelihoods.Laplace)), "pseudo_EM is only available for approximate likelihoods"
ll_change = stop_crit + 1.
iteration = 0
last_ll = -np.inf
convergence = False
alpha = 0
stop = False
#Handle **kwargs
ep_args = {}
for arg in kwargs.keys():
if arg in ('epsilon','power_ep'):
ep_args[arg] = kwargs[arg]
del kwargs[arg]
while not stop:
last_approximation = self.likelihood.copy()
last_params = self._get_params()
if len(ep_args) == 2:
self.update_likelihood_approximation(epsilon=ep_args['epsilon'],power_ep=ep_args['power_ep'])
elif len(ep_args) == 1:
if ep_args.keys()[0] == 'epsilon':
self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
elif ep_args.keys()[0] == 'power_ep':
self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
else:
self.update_likelihood_approximation()
new_ll = self.log_likelihood()
ll_change = new_ll - last_ll
if ll_change < 0:
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True
else:
self.optimize(**kwargs)
last_ll = self.log_likelihood()
if ll_change < stop_crit:
stop = True
iteration += 1
if stop:
print "%s iterations." % iteration
self.update_likelihood_approximation()

View file

@ -0,0 +1,5 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from param import Param, ObsAr
from parameterized import Parameterized

View file

@ -0,0 +1,25 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
(Hyper-)Parameter domains defined for :py:mod:`~GPy.core.priors` and :py:mod:`~GPy.kern`.
These domains specify the legitimate realm of the parameters to live in.
:const:`~GPy.core.domains._REAL` :
real domain, all values in the real numbers are allowed
:const:`~GPy.core.domains._POSITIVE`:
positive domain, only positive real values are allowed
:const:`~GPy.core.domains._NEGATIVE`:
same as :const:`~GPy.core.domains._POSITIVE`, but only negative values are allowed
:const:`~GPy.core.domains._BOUNDED`:
only values within the bounded range are allowed,
the bounds are specified withing the object with the bounded range
"""
_REAL = 'real'
_POSITIVE = "positive"
_NEGATIVE = 'negative'
_BOUNDED = 'bounded'

View file

@ -0,0 +1,302 @@
# Copyright (c) 2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy
from numpy.lib.function_base import vectorize
from lists_and_dicts import IntArrayDict
def extract_properties_to_index(index, props):
prop_index = dict()
for i, cl in enumerate(props):
for c in cl:
ind = prop_index.get(c, list())
ind.append(index[i])
prop_index[c] = ind
for c, i in prop_index.items():
prop_index[c] = numpy.array(i, dtype=int)
return prop_index
class ParameterIndexOperations(object):
"""
This object wraps a dictionary, whos keys are _operations_ that we'd like
to apply to a parameter array, and whose values are np integer arrays which
index the parameter array appropriately.
A model instance will contain one instance of this class for each thing
that needs indexing (i.e. constraints, ties and priors). Parameters within
the model constain instances of the ParameterIndexOperationsView class,
which can map from a 'local' index (starting 0) to this global index.
Here's an illustration:
#=======================================================================
model : 0 1 2 3 4 5 6 7 8 9
key1: 4 5
key2: 7 8
param1: 0 1 2 3 4 5
key1: 2 3
key2: 5
param2: 0 1 2 3 4
key1: 0
key2: 2 3
#=======================================================================
The views of this global index have a subset of the keys in this global
(model) index.
Adding a new key (e.g. a constraint) to a view will cause the view to pass
the new key to the global index, along with the local index and an offset.
This global index then stores the key and the appropriate global index
(which can be seen by the view).
See also:
ParameterIndexOperationsView
"""
_offset = 0
def __init__(self, constraints=None):
self._properties = IntArrayDict()
if constraints is not None:
for t, i in constraints.iteritems():
self.add(t, i)
def iteritems(self):
return self._properties.iteritems()
def items(self):
return self._properties.items()
def properties(self):
return self._properties.keys()
def iterproperties(self):
return self._properties.iterkeys()
def shift_right(self, start, size):
for ind in self.iterindices():
toshift = ind>=start
ind[toshift] += size
def shift_left(self, start, size):
for v, ind in self.items():
todelete = (ind>=start) * (ind<start+size)
if todelete.size != 0:
ind = ind[~todelete]
toshift = ind>=start
if toshift.size != 0:
ind[toshift] -= size
if ind.size != 0: self._properties[v] = ind
else: del self._properties[v]
def clear(self):
self._properties.clear()
@property
def size(self):
return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
def iterindices(self):
return self._properties.itervalues()
def indices(self):
return self._properties.values()
def properties_for(self, index):
"""
Returns a list of properties, such that each entry in the list corresponds
to the element of the index given.
Example:
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
>>> properties_for([2,3,5])
[['one'], ['one', 'two'], ['two']]
"""
return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
def properties_to_index_dict(self, index):
"""
Return a dictionary, containing properties as keys and indices as index
Thus, the indices for each constraint, which is contained will be collected as
one dictionary
Example:
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
>>> properties_to_index_dict([2,3,5])
{'one':[2,3], 'two':[3,5]}
"""
props = self.properties_for(index)
prop_index = extract_properties_to_index(index, props)
return prop_index
def add(self, prop, indices):
self._properties[prop] = combine_indices(self._properties[prop], indices)
def remove(self, prop, indices):
if prop in self._properties:
diff = remove_indices(self[prop], indices)
removed = numpy.intersect1d(self[prop], indices, True)
if not index_empty(diff):
self._properties[prop] = diff
else:
del self._properties[prop]
return removed.astype(int)
return numpy.array([]).astype(int)
def update(self, parameter_index_view, offset=0):
for i, v in parameter_index_view.iteritems():
self.add(i, v+offset)
def copy(self):
return self.__deepcopy__(None)
def __deepcopy__(self, memo):
return ParameterIndexOperations(dict(self.iteritems()))
def __getitem__(self, prop):
return self._properties[prop]
def __delitem__(self, prop):
del self._properties[prop]
def __str__(self, *args, **kwargs):
import pprint
return pprint.pformat(dict(self._properties))
def combine_indices(arr1, arr2):
return numpy.union1d(arr1, arr2)
def remove_indices(arr, to_remove):
return numpy.setdiff1d(arr, to_remove, True)
def index_empty(index):
return numpy.size(index) == 0
class ParameterIndexOperationsView(object):
def __init__(self, param_index_operations, offset, size):
self._param_index_ops = param_index_operations
self._offset = offset
self._size = size
def __getstate__(self):
return [self._param_index_ops, self._offset, self._size]
def __setstate__(self, state):
self._param_index_ops = state[0]
self._offset = state[1]
self._size = state[2]
def _filter_index(self, ind):
return ind[(ind >= self._offset) * (ind < (self._offset + self._size))] - self._offset
def iteritems(self):
for i, ind in self._param_index_ops.iteritems():
ind2 = self._filter_index(ind)
if ind2.size > 0:
yield i, ind2
def items(self):
return [[i,v] for i,v in self.iteritems()]
def properties(self):
return [i for i in self.iterproperties()]
def iterproperties(self):
for i, _ in self.iteritems():
yield i
def shift_right(self, start, size):
self._param_index_ops.shift_right(start+self._offset, size)
def shift_left(self, start, size):
self._param_index_ops.shift_left(start+self._offset, size)
def clear(self):
for i, ind in self.items():
self._param_index_ops.remove(i, ind+self._offset)
@property
def size(self):
return reduce(lambda a,b: a+b.size, self.iterindices(), 0)
def iterindices(self):
for _, ind in self.iteritems():
yield ind
def indices(self):
return [ind for ind in self.iterindices()]
def properties_for(self, index):
"""
Returns a list of properties, such that each entry in the list corresponds
to the element of the index given.
Example:
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
>>> properties_for([2,3,5])
[['one'], ['one', 'two'], ['two']]
"""
return vectorize(lambda i: [prop for prop in self.iterproperties() if i in self[prop]], otypes=[list])(index)
def properties_to_index_dict(self, index):
"""
Return a dictionary, containing properties as keys and indices as index
Thus, the indices for each constraint, which is contained will be collected as
one dictionary
Example:
let properties: 'one':[1,2,3,4], 'two':[3,5,6]
>>> properties_to_index_dict([2,3,5])
{'one':[2,3], 'two':[3,5]}
"""
return extract_properties_to_index(index, self.properties_for(index))
def add(self, prop, indices):
self._param_index_ops.add(prop, indices+self._offset)
def remove(self, prop, indices):
removed = self._param_index_ops.remove(prop, numpy.array(indices)+self._offset)
if removed.size > 0:
return removed-self._offset
return removed
def __getitem__(self, prop):
ind = self._filter_index(self._param_index_ops[prop])
return ind
def __delitem__(self, prop):
self.remove(prop, self[prop])
def __str__(self, *args, **kwargs):
import pprint
return pprint.pformat(dict(self.iteritems()))
def update(self, parameter_index_view, offset=0):
for i, v in parameter_index_view.iteritems():
self.add(i, v+offset)
def copy(self):
return self.__deepcopy__(None)
def __deepcopy__(self, memo):
return ParameterIndexOperations(dict(self.iteritems()))
pass

View file

@ -0,0 +1,139 @@
# Copyright (c) 2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from collections import defaultdict
import weakref
def intarray_default_factory():
import numpy as np
return np.int_([])
class IntArrayDict(defaultdict):
def __init__(self, default_factory=None):
"""
Default will be self._default, if not set otherwise
"""
defaultdict.__init__(self, intarray_default_factory)
class ArrayList(list):
"""
List to store ndarray-likes in.
It will look for 'is' instead of calling __eq__ on each element.
"""
def __contains__(self, other):
for el in self:
if el is other:
return True
return False
def index(self, item):
index = 0
for el in self:
if el is item:
return index
index += 1
raise ValueError, "{} is not in list".format(item)
pass
class ObserverList(object):
"""
A list which containts the observables.
It only holds weak references to observers, such that unbound
observers dont dangle in memory.
"""
def __init__(self):
self._poc = []
def __getitem__(self, ind):
p,o,c = self._poc[ind]
return p, o(), c
def remove(self, priority, observer, callble):
"""
Remove one observer, which had priority and callble.
"""
self.flush()
for i in range(len(self) - 1, -1, -1):
p,o,c = self[i]
if priority==p and observer==o and callble==c:
del self._poc[i]
def __repr__(self):
return self._poc.__repr__()
def add(self, priority, observer, callble):
"""
Add an observer with priority and callble
"""
if observer is not None:
ins = 0
for pr, _, _ in self:
if priority > pr:
break
ins += 1
self._poc.insert(ins, (priority, weakref.ref(observer), callble))
def __str__(self):
from . import ObsAr, Param
from parameter_core import Parameterizable
ret = []
curr_p = None
def frmt(o):
if isinstance(o, ObsAr):
return 'ObsArr <{}>'.format(hex(id(o)))
elif isinstance(o, (Param,Parameterizable)):
return '{}'.format(o.hierarchy_name())
else:
return repr(o)
for p, o, c in self:
curr = ''
if curr_p != p:
pre = "{!s}: ".format(p)
curr_pre = pre
else: curr_pre = " "*len(pre)
curr_p = p
curr += curr_pre
ret.append(curr + ", ".join([frmt(o), str(c)]))
return '\n'.join(ret)
def flush(self):
"""
Make sure all weak references, which point to nothing are flushed (deleted)
"""
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
def __iter__(self):
self.flush()
for p, o, c in self._poc:
yield p, o(), c
def __len__(self):
self.flush()
return self._poc.__len__()
def __deepcopy__(self, memo):
s = ObserverList()
for p,o,c in self:
import copy
s.add(p, copy.deepcopy(o, memo), copy.deepcopy(c, memo))
s.flush()
return s
def __getstate__(self):
self.flush()
from ...util.caching import Cacher
obs = []
for p, o, c in self:
if (getattr(o, c.__name__, None) is not None
and not isinstance(o, Cacher)):
obs.append((p,o,c.__name__))
return obs
def __setstate__(self, state):
self._poc = []
for p, o, c in state:
self.add(p,o,getattr(o, c))
pass

View file

@ -0,0 +1,66 @@
# Copyright (c) 2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
class Observable(object):
"""
Observable pattern for parameterization.
This Object allows for observers to register with self and a (bound!) function
as an observer. Every time the observable changes, it sends a notification with
self as only argument to all its observers.
"""
def __init__(self, *args, **kwargs):
super(Observable, self).__init__()
from lists_and_dicts import ObserverList
self.observers = ObserverList()
def add_observer(self, observer, callble, priority=0):
"""
Add an observer `observer` with the callback `callble`
and priority `priority` to this observers list.
"""
self.observers.add(priority, observer, callble)
def remove_observer(self, observer, callble=None):
"""
Either (if callble is None) remove all callables,
which were added alongside observer,
or remove callable `callble` which was added alongside
the observer `observer`.
"""
to_remove = []
for poc in self.observers:
_, obs, clble = poc
if callble is not None:
if (obs is observer) and (callble == clble):
to_remove.append(poc)
else:
if obs is observer:
to_remove.append(poc)
for r in to_remove:
self.observers.remove(*r)
def notify_observers(self, which=None, min_priority=None):
"""
Notifies all observers. Which is the element, which kicked off this
notification loop. The first argument will be self, the second `which`.
NOTE: notifies only observers with priority p > min_priority!
^^^^^^^^^^^^^^^^
:param min_priority: only notify observers with priority > min_priority
if min_priority is None, notify all observers in order
"""
if which is None:
which = self
if min_priority is None:
[callble(self, which=which) for _, _, callble in self.observers]
else:
for p, _, callble in self.observers:
if p <= min_priority:
break
callble(self, which=which)
def change_priority(self, observer, callble, priority):
self.remove_observer(observer, callble)
self.add_observer(observer, callble, priority)

View file

@ -0,0 +1,147 @@
# Copyright (c) 2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from parameter_core import Pickleable
from observable import Observable
class ObsAr(np.ndarray, Pickleable, Observable):
"""
An ndarray which reports changes to its observers.
The observers can add themselves with a callable, which
will be called every time this array changes. The callable
takes exactly one argument, which is this array itself.
"""
__array_priority__ = -1 # Never give back ObsAr
def __new__(cls, input_array, *a, **kw):
# allways make a copy of input paramters, as we need it to be in C order:
if not isinstance(input_array, ObsAr):
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array
super(ObsAr, obj).__init__(*a, **kw)
return obj
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None: return
self.observers = getattr(obj, 'observers', None)
def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray)
def _setup_observers(self):
# do not setup anything, as observable arrays do not have default observers
pass
@property
def values(self):
return self.view(np.ndarray)
def copy(self):
from lists_and_dicts import ObserverList
memo = {}
memo[id(self)] = self
memo[id(self.observers)] = ObserverList()
return self.__deepcopy__(memo)
def __deepcopy__(self, memo):
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
memo[id(self)] = s
import copy
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s
def __reduce__(self):
func, args, state = super(ObsAr, self).__reduce__()
return func, args, (state, Pickleable.__getstate__(self))
def __setstate__(self, state):
np.ndarray.__setstate__(self, state[0])
Pickleable.__setstate__(self, state[1])
def __setitem__(self, s, val):
super(ObsAr, self).__setitem__(s, val)
self.notify_observers()
def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop))
def __setslice__(self, start, stop, val):
return self.__setitem__(slice(start, stop), val)
def __ilshift__(self, *args, **kwargs):
r = np.ndarray.__ilshift__(self, *args, **kwargs)
self.notify_observers()
return r
def __irshift__(self, *args, **kwargs):
r = np.ndarray.__irshift__(self, *args, **kwargs)
self.notify_observers()
return r
def __ixor__(self, *args, **kwargs):
r = np.ndarray.__ixor__(self, *args, **kwargs)
self.notify_observers()
return r
def __ipow__(self, *args, **kwargs):
r = np.ndarray.__ipow__(self, *args, **kwargs)
self.notify_observers()
return r
def __ifloordiv__(self, *args, **kwargs):
r = np.ndarray.__ifloordiv__(self, *args, **kwargs)
self.notify_observers()
return r
def __isub__(self, *args, **kwargs):
r = np.ndarray.__isub__(self, *args, **kwargs)
self.notify_observers()
return r
def __ior__(self, *args, **kwargs):
r = np.ndarray.__ior__(self, *args, **kwargs)
self.notify_observers()
return r
def __itruediv__(self, *args, **kwargs):
r = np.ndarray.__itruediv__(self, *args, **kwargs)
self.notify_observers()
return r
def __idiv__(self, *args, **kwargs):
r = np.ndarray.__idiv__(self, *args, **kwargs)
self.notify_observers()
return r
def __iand__(self, *args, **kwargs):
r = np.ndarray.__iand__(self, *args, **kwargs)
self.notify_observers()
return r
def __imod__(self, *args, **kwargs):
r = np.ndarray.__imod__(self, *args, **kwargs)
self.notify_observers()
return r
def __iadd__(self, *args, **kwargs):
r = np.ndarray.__iadd__(self, *args, **kwargs)
self.notify_observers()
return r
def __imul__(self, *args, **kwargs):
r = np.ndarray.__imul__(self, *args, **kwargs)
self.notify_observers()
return r

View file

@ -0,0 +1,476 @@
# Copyright (c) 2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import itertools
import numpy
np = numpy
from parameter_core import Parameterizable, adjust_name_for_printing, Pickleable
from observable_array import ObsAr
###### printing
__constraints_name__ = "Constraint"
__index_name__ = "Index"
__tie_name__ = "Tied to"
__priors_name__ = "Prior"
__precision__ = numpy.get_printoptions()['precision'] # numpy printing precision used, sublassing numpy ndarray after all
__print_threshold__ = 5
######
class Param(Parameterizable, ObsAr):
"""
Parameter object for GPy models.
:param str name: name of the parameter to be printed
:param input_array: array which this parameter handles
:type input_array: numpy.ndarray
:param default_constraint: The default constraint for this parameter
:type default_constraint:
You can add/remove constraints by calling constrain on the parameter itself, e.g:
- self[:,1].constrain_positive()
- self[0].tie_to(other)
- self.untie()
- self[:3,:].unconstrain()
- self[1].fix()
Fixing parameters will fix them to the value they are right now. If you change
the fixed value, it will be fixed to the new value!
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
"""
__array_priority__ = -1 # Never give back Param
_fixes_ = None
parameters = []
def __new__(cls, name, input_array, default_constraint=None):
obj = numpy.atleast_1d(super(Param, cls).__new__(cls, input_array=input_array))
obj._current_slice_ = (slice(obj.shape[0]),)
obj._realshape_ = obj.shape
obj._realsize_ = obj.size
obj._realndim_ = obj.ndim
obj._original_ = obj
return obj
def __init__(self, name, input_array, default_constraint=None, *a, **kw):
self._in_init_ = True
super(Param, self).__init__(name=name, default_constraint=default_constraint, *a, **kw)
self._in_init_ = False
def build_pydot(self,G):
import pydot
node = pydot.Node(id(self), shape='trapezium', label=self.name)#, fontcolor='white', color='white')
G.add_node(node)
for _, o, _ in self.observers:
label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node)
edge = pydot.Edge(str(id(self)), str(id(o)), color='darkorange2', arrowhead='vee')
G.add_edge(edge)
return node
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None: return
super(Param, self).__array_finalize__(obj)
self._parent_ = getattr(obj, '_parent_', None)
self._parent_index_ = getattr(obj, '_parent_index_', None)
self._default_constraint_ = getattr(obj, '_default_constraint_', None)
self._current_slice_ = getattr(obj, '_current_slice_', None)
self._realshape_ = getattr(obj, '_realshape_', None)
self._realsize_ = getattr(obj, '_realsize_', None)
self._realndim_ = getattr(obj, '_realndim_', None)
self._original_ = getattr(obj, '_original_', None)
self._name = getattr(obj, '_name', None)
self._gradient_array_ = getattr(obj, '_gradient_array_', None)
self.constraints = getattr(obj, 'constraints', None)
self.priors = getattr(obj, 'priors', None)
@property
def param_array(self):
"""
As we are a leaf, this just returns self
"""
return self
@property
def values(self):
"""
Return self as numpy array view
"""
return self.view(np.ndarray)
@property
def gradient(self):
"""
Return a view on the gradient, which is in the same shape as this parameter is.
Note: this is not the real gradient array, it is just a view on it.
To work on the real gradient array use: self.full_gradient
"""
if getattr(self, '_gradient_array_', None) is None:
self._gradient_array_ = numpy.empty(self._realshape_, dtype=numpy.float64)
return self._gradient_array_#[self._current_slice_]
@gradient.setter
def gradient(self, val):
self._gradient_array_[:] = val
#===========================================================================
# Array operations -> done
#===========================================================================
def __getitem__(self, s, *args, **kwargs):
if not isinstance(s, tuple):
s = (s,)
#if not reduce(lambda a, b: a or numpy.any(b is Ellipsis), s, False) and len(s) <= self.ndim:
# s += (Ellipsis,)
new_arr = super(Param, self).__getitem__(s, *args, **kwargs)
try:
new_arr._current_slice_ = s
new_arr._gradient_array_ = self.gradient[s]
new_arr._original_ = self._original_
except AttributeError: pass # returning 0d array or float, double etc
return new_arr
def _raveled_index(self, slice_index=None):
# return an index array on the raveled array, which is formed by the current_slice
# of this object
extended_realshape = numpy.cumprod((1,) + self._realshape_[:0:-1])[::-1]
ind = self._indices(slice_index)
if ind.ndim < 2: ind = ind[:, None]
return numpy.asarray(numpy.apply_along_axis(lambda x: numpy.sum(extended_realshape * x), 1, ind), dtype=int)
def _raveled_index_for(self, obj):
return self._raveled_index()
#===========================================================================
# Constrainable
#===========================================================================
def _ensure_fixes(self):
if not self._has_fixes(): self._fixes_ = numpy.ones(self._realsize_, dtype=bool)
#===========================================================================
# Convenience
#===========================================================================
@property
def is_fixed(self):
from transformations import __fixed__
return self.constraints[__fixed__].size == self.size
def _get_original(self, param):
return self._original_
#===========================================================================
# Pickling and copying
#===========================================================================
def copy(self):
return Parameterizable.copy(self, which=self)
def __deepcopy__(self, memo):
s = self.__new__(self.__class__, name=self.name, input_array=self.view(numpy.ndarray).copy())
memo[id(self)] = s
import copy
Pickleable.__setstate__(s, copy.deepcopy(self.__getstate__(), memo))
return s
def _setup_observers(self):
"""
Setup the default observers
1: pass through to parent, if present
"""
if self.has_parent():
self.add_observer(self._parent_, self._parent_._pass_through_notify_observers, -np.inf)
#===========================================================================
# Printing -> done
#===========================================================================
@property
def _description_str(self):
if self.size <= 1:
return [str(self.view(numpy.ndarray)[0])]
else: return [str(self.shape)]
def parameter_names(self, add_self=False, adjust_for_printing=False, recursive=True):
# this is just overwrighting the parameterized calls to parameter names, in order to maintain OOP
if adjust_for_printing:
return [adjust_name_for_printing(self.name)]
return [self.name]
@property
def flattened_parameters(self):
return [self]
@property
def parameter_shapes(self):
return [self.shape]
@property
def num_params(self):
return 0
@property
def _constraints_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.constraints.iteritems()))]
@property
def _priors_str(self):
return [' '.join(map(lambda c: str(c[0]) if c[1].size == self._realsize_ else "{" + str(c[0]) + "}", self.priors.iteritems()))]
@property
def _ties_str(self):
return ['']
def _ties_for(self, ravi):
return [['N/A']]*ravi.size
def __repr__(self, *args, **kwargs):
name = "\033[1m{x:s}\033[0;0m:\n".format(
x=self.hierarchy_name())
return name + super(Param, self).__repr__(*args, **kwargs)
def _indices(self, slice_index=None):
# get a int-array containing all indices in the first axis.
if slice_index is None:
slice_index = self._current_slice_
try:
indices = np.indices(self._realshape_, dtype=int)
indices = indices[(slice(None),)+slice_index]
indices = np.rollaxis(indices, 0, indices.ndim).reshape(-1,self._realndim_)
#print indices_
#if not np.all(indices==indices__):
# import ipdb; ipdb.set_trace()
except:
indices = np.indices(self._realshape_, dtype=int)
indices = indices[(slice(None),)+slice_index]
indices = np.rollaxis(indices, 0, indices.ndim)
return indices
def _max_len_names(self, gen, header):
gen = map(lambda x: " ".join(map(str, x)), gen)
return reduce(lambda a, b:max(a, len(b)), gen, len(header))
def _max_len_values(self):
return reduce(lambda a, b:max(a, len("{x:=.{0}g}".format(__precision__, x=b))), self.flat, len(self.hierarchy_name()))
def _max_len_index(self, ind):
return reduce(lambda a, b:max(a, len(str(b))), ind, len(__index_name__))
def _short(self):
# short string to print
name = self.hierarchy_name()
if self._realsize_ < 2:
return name
ind = self._indices()
if ind.size > 4: indstr = ','.join(map(str, ind[:2])) + "..." + ','.join(map(str, ind[-2:]))
else: indstr = ','.join(map(str, ind))
return name + '[' + indstr + ']'
def _repr_html_(self, constr_matrix=None, indices=None, prirs=None, ties=None):
"""Representation of the parameter in html for notebook display."""
filter_ = self._current_slice_
vals = self.flat
if indices is None: indices = self._indices(filter_)
ravi = self._raveled_index(filter_)
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
if prirs is None: prirs = self.priors.properties_for(ravi)
if ties is None: ties = self._ties_for(ravi)
ties = [' '.join(map(lambda x: x, t)) for t in ties]
header_format = """
<tr>
<td><b>{i}</b></td>
<td><b>{x}</b></td>
<td><b>{c}</b></td>
<td><b>{p}</b></td>
<td><b>{t}</b></td>
</tr>"""
header = header_format.format(x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle([''])
return "\n".join(['<table>'] + [header] + ["<tr><td>{i}</td><td align=\"right\">{x}</td><td>{c}</td><td>{p}</td><td>{t}</td></tr>".format(x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)] + ["</table>"])
def __str__(self, constr_matrix=None, indices=None, prirs=None, ties=None, lc=None, lx=None, li=None, lp=None, lt=None, only_name=False):
filter_ = self._current_slice_
vals = self.flat
if indices is None: indices = self._indices(filter_)
ravi = self._raveled_index(filter_)
if constr_matrix is None: constr_matrix = self.constraints.properties_for(ravi)
if prirs is None: prirs = self.priors.properties_for(ravi)
if ties is None: ties = self._ties_for(ravi)
ties = [' '.join(map(lambda x: x, t)) for t in ties]
if lc is None: lc = self._max_len_names(constr_matrix, __constraints_name__)
if lx is None: lx = self._max_len_values()
if li is None: li = self._max_len_index(indices)
if lt is None: lt = self._max_len_names(ties, __tie_name__)
if lp is None: lp = self._max_len_names(prirs, __tie_name__)
sep = '-'
header_format = " {i:{5}^{2}s} | \033[1m{x:{5}^{1}s}\033[0;0m | {c:{5}^{0}s} | {p:{5}^{4}s} | {t:{5}^{3}s}"
if only_name: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=sep*lc, i=sep*li, t=sep*lt, p=sep*lp) # nice header for printing
else: header = header_format.format(lc, lx, li, lt, lp, ' ', x=self.hierarchy_name(), c=__constraints_name__, i=__index_name__, t=__tie_name__, p=__priors_name__) # nice header for printing
if not ties: ties = itertools.cycle([''])
return "\n".join([header] + [" {i!s:^{3}s} | {x: >{1}.{2}g} | {c:^{0}s} | {p:^{5}s} | {t:^{4}s} ".format(lc, lx, __precision__, li, lt, lp, x=x, c=" ".join(map(str, c)), p=" ".join(map(str, p)), t=(t or ''), i=i) for i, x, c, t, p in itertools.izip(indices, vals, constr_matrix, ties, prirs)]) # return all the constraints with right indices
# except: return super(Param, self).__str__()
class ParamConcatenation(object):
def __init__(self, params):
"""
Parameter concatenation for convenience of printing regular expression matched arrays
you can index this concatenation as if it was the flattened concatenation
of all the parameters it contains, same for setting parameters (Broadcasting enabled).
See :py:class:`GPy.core.parameter.Param` for more details on constraining.
"""
# self.params = params
from lists_and_dicts import ArrayList
self.params = ArrayList([])
for p in params:
for p in p.flattened_parameters:
if p not in self.params:
self.params.append(p)
self._param_sizes = [p.size for p in self.params]
startstops = numpy.cumsum([0] + self._param_sizes)
self._param_slices_ = [slice(start, stop) for start,stop in zip(startstops, startstops[1:])]
parents = dict()
for p in self.params:
if p.has_parent():
parent = p._parent_
level = 0
while parent is not None:
if parent in parents:
parents[parent] = max(level, parents[parent])
else:
parents[parent] = level
level += 1
parent = parent._parent_
import operator
self.parents = map(lambda x: x[0], sorted(parents.iteritems(), key=operator.itemgetter(1)))
#===========================================================================
# Get/set items, enable broadcasting
#===========================================================================
def __getitem__(self, s):
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
params = [p.param_array.flat[ind[ps]] for p,ps in zip(self.params, self._param_slices_) if numpy.any(p.param_array.flat[ind[ps]])]
if len(params)==1: return params[0]
return ParamConcatenation(params)
def __setitem__(self, s, val, update=True):
if isinstance(val, ParamConcatenation):
val = val.values()
ind = numpy.zeros(sum(self._param_sizes), dtype=bool); ind[s] = True;
vals = self.values(); vals[s] = val
for p, ps in zip(self.params, self._param_slices_):
p.flat[ind[ps]] = vals[ps]
if update:
self.update_all_params()
def values(self):
return numpy.hstack([p.param_array.flat for p in self.params])
#===========================================================================
# parameter operations:
#===========================================================================
def update_all_params(self):
for par in self.parents:
par.notify_observers()
def constrain(self, constraint, warning=True):
[param.constrain(constraint, trigger_parent=False) for param in self.params]
self.update_all_params()
constrain.__doc__ = Param.constrain.__doc__
def constrain_positive(self, warning=True):
[param.constrain_positive(warning, trigger_parent=False) for param in self.params]
self.update_all_params()
constrain_positive.__doc__ = Param.constrain_positive.__doc__
def constrain_fixed(self, value=None, warning=True, trigger_parent=True):
[param.constrain_fixed(value, warning, trigger_parent) for param in self.params]
constrain_fixed.__doc__ = Param.constrain_fixed.__doc__
fix = constrain_fixed
def constrain_negative(self, warning=True):
[param.constrain_negative(warning, trigger_parent=False) for param in self.params]
self.update_all_params()
constrain_negative.__doc__ = Param.constrain_negative.__doc__
def constrain_bounded(self, lower, upper, warning=True):
[param.constrain_bounded(lower, upper, warning, trigger_parent=False) for param in self.params]
self.update_all_params()
constrain_bounded.__doc__ = Param.constrain_bounded.__doc__
def unconstrain(self, *constraints):
[param.unconstrain(*constraints) for param in self.params]
unconstrain.__doc__ = Param.unconstrain.__doc__
def unconstrain_negative(self):
[param.unconstrain_negative() for param in self.params]
unconstrain_negative.__doc__ = Param.unconstrain_negative.__doc__
def unconstrain_positive(self):
[param.unconstrain_positive() for param in self.params]
unconstrain_positive.__doc__ = Param.unconstrain_positive.__doc__
def unconstrain_fixed(self):
[param.unconstrain_fixed() for param in self.params]
unconstrain_fixed.__doc__ = Param.unconstrain_fixed.__doc__
unfix = unconstrain_fixed
def unconstrain_bounded(self, lower, upper):
[param.unconstrain_bounded(lower, upper) for param in self.params]
unconstrain_bounded.__doc__ = Param.unconstrain_bounded.__doc__
def untie(self, *ties):
[param.untie(*ties) for param in self.params]
def checkgrad(self, verbose=0, step=1e-6, tolerance=1e-3):
return self.params[0]._highest_parent_._checkgrad(self, verbose, step, tolerance)
#checkgrad.__doc__ = Gradcheckable.checkgrad.__doc__
__lt__ = lambda self, val: self.values() < val
__le__ = lambda self, val: self.values() <= val
__eq__ = lambda self, val: self.values() == val
__ne__ = lambda self, val: self.values() != val
__gt__ = lambda self, val: self.values() > val
__ge__ = lambda self, val: self.values() >= val
def __str__(self, *args, **kwargs):
def f(p):
ind = p._raveled_index()
return p.constraints.properties_for(ind), p._ties_for(ind), p.priors.properties_for(ind)
params = self.params
constr_matrices, ties_matrices, prior_matrices = zip(*map(f, params))
indices = [p._indices() for p in params]
lc = max([p._max_len_names(cm, __constraints_name__) for p, cm in itertools.izip(params, constr_matrices)])
lx = max([p._max_len_values() for p in params])
li = max([p._max_len_index(i) for p, i in itertools.izip(params, indices)])
lt = max([p._max_len_names(tm, __tie_name__) for p, tm in itertools.izip(params, ties_matrices)])
lp = max([p._max_len_names(pm, __constraints_name__) for p, pm in itertools.izip(params, prior_matrices)])
strings = []
start = True
for p, cm, i, tm, pm in itertools.izip(params,constr_matrices,indices,ties_matrices,prior_matrices):
strings.append(p.__str__(constr_matrix=cm, indices=i, prirs=pm, ties=tm, lc=lc, lx=lx, li=li, lp=lp, lt=lt, only_name=(1-start)))
start = False
return "\n".join(strings)
def __repr__(self):
return "\n".join(map(repr,self.params))
def __ilshift__(self, *args, **kwargs):
self[:] = np.ndarray.__ilshift__(self.values(), *args, **kwargs)
def __irshift__(self, *args, **kwargs):
self[:] = np.ndarray.__irshift__(self.values(), *args, **kwargs)
def __ixor__(self, *args, **kwargs):
self[:] = np.ndarray.__ixor__(self.values(), *args, **kwargs)
def __ipow__(self, *args, **kwargs):
self[:] = np.ndarray.__ipow__(self.values(), *args, **kwargs)
def __ifloordiv__(self, *args, **kwargs):
self[:] = np.ndarray.__ifloordiv__(self.values(), *args, **kwargs)
def __isub__(self, *args, **kwargs):
self[:] = np.ndarray.__isub__(self.values(), *args, **kwargs)
def __ior__(self, *args, **kwargs):
self[:] = np.ndarray.__ior__(self.values(), *args, **kwargs)
def __itruediv__(self, *args, **kwargs):
self[:] = np.ndarray.__itruediv__(self.values(), *args, **kwargs)
def __idiv__(self, *args, **kwargs):
self[:] = np.ndarray.__idiv__(self.values(), *args, **kwargs)
def __iand__(self, *args, **kwargs):
self[:] = np.ndarray.__iand__(self.values(), *args, **kwargs)
def __imod__(self, *args, **kwargs):
self[:] = np.ndarray.__imod__(self.values(), *args, **kwargs)
def __iadd__(self, *args, **kwargs):
self[:] = np.ndarray.__iadd__(self.values(), *args, **kwargs)
def __imul__(self, *args, **kwargs):
self[:] = np.ndarray.__imul__(self.values(), *args, **kwargs)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,418 @@
# Copyright (c) 2014, Max Zwiessele, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy; np = numpy
import itertools
from re import compile, _pattern_type
from param import ParamConcatenation
from parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
import logging
from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
logger = logging.getLogger("parameters changed meta")
class ParametersChangedMeta(type):
def __call__(self, *args, **kw):
self._in_init_ = True
#import ipdb;ipdb.set_trace()
self = super(ParametersChangedMeta, self).__call__(*args, **kw)
logger.debug("finished init")
self._in_init_ = False
logger.debug("connecting parameters")
self._highest_parent_._connect_parameters()
#self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
logger.debug("calling parameters changed")
self.parameters_changed()
return self
class Parameterized(Parameterizable):
"""
Parameterized class
Say m is a handle to a parameterized class.
Printing parameters:
- print m: prints a nice summary over all parameters
- print m.name: prints details for param with name 'name'
- print m[regexp]: prints details for all the parameters
which match (!) regexp
- print m['']: prints details for all parameters
Fields:
Name: The name of the param, can be renamed!
Value: Shape or value, if one-valued
Constrain: constraint of the param, curly "{c}" brackets indicate
some parameters are constrained by c. See detailed print
to get exact constraints.
Tied_to: which paramter it is tied to.
Getting and setting parameters:
Set all values in param to one:
m.name.to.param = 1
Handling of constraining, fixing and tieing parameters:
You can constrain parameters by calling the constrain on the param itself, e.g:
- m.name[:,1].constrain_positive()
- m.name[0].tie_to(m.name[1])
Fixing parameters will fix them to the value they are right now. If you change
the parameters value, the param will be fixed to the new value!
If you want to operate on all parameters use m[''] to wildcard select all paramters
and concatenate them. Printing m[''] will result in printing of all parameters in detail.
"""
#===========================================================================
# Metaclass for parameters changed after init.
# This makes sure, that parameters changed will always be called after __init__
# **Never** call parameters_changed() yourself
__metaclass__ = ParametersChangedMeta
#===========================================================================
def __init__(self, name=None, parameters=[], *a, **kw):
super(Parameterized, self).__init__(name=name, *a, **kw)
self.size = sum(p.size for p in self.parameters)
self.add_observer(self, self._parameters_changed_notification, -100)
if not self._has_fixes():
self._fixes_ = None
self._param_slices_ = []
#self._connect_parameters()
self.link_parameters(*parameters)
def build_pydot(self, G=None):
import pydot # @UnresolvedImport
iamroot = False
if G is None:
G = pydot.Dot(graph_type='digraph', bgcolor=None)
iamroot=True
node = pydot.Node(id(self), shape='box', label=self.name)#, color='white')
G.add_node(node)
for child in self.parameters:
child_node = child.build_pydot(G)
G.add_edge(pydot.Edge(node, child_node))#, color='white'))
for _, o, _ in self.observers:
label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node)
edge = pydot.Edge(str(id(self)), str(id(o)), color='darkorange2', arrowhead='vee')
G.add_edge(edge)
if iamroot:
return G
return node
#===========================================================================
# Add remove parameters:
#===========================================================================
def link_parameter(self, param, index=None, _ignore_added_names=False):
"""
:param parameters: the parameters to add
:type parameters: list of or one :py:class:`GPy.core.param.Param`
:param [index]: index of where to put parameters
:param bool _ignore_added_names: whether the name of the parameter overrides a possibly existing field
Add all parameters to this param class, you can insert parameters
at any given index using the :func:`list.insert` syntax
"""
if param in self.parameters and index is not None:
self.unlink_parameter(param)
self.link_parameter(param, index)
# elif param.has_parent():
# raise HierarchyError, "parameter {} already in another model ({}), create new object (or copy) for adding".format(param._short(), param._highest_parent_._short())
elif param not in self.parameters:
if param.has_parent():
def visit(parent, self):
if parent is self:
raise HierarchyError, "You cannot add a parameter twice into the hierarchy"
param.traverse_parents(visit, self)
param._parent_.unlink_parameter(param)
# make sure the size is set
if index is None:
start = sum(p.size for p in self.parameters)
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, self.size)
self.priors.update(param.priors, self.size)
param._parent_ = self
param._parent_index_ = len(self.parameters)
self.parameters.append(param)
else:
start = sum(p.size for p in self.parameters[:index])
self.constraints.shift_right(start, param.size)
self.priors.shift_right(start, param.size)
self.constraints.update(param.constraints, start)
self.priors.update(param.priors, start)
param._parent_ = self
param._parent_index_ = index if index>=0 else len(self.parameters[:index])
for p in self.parameters[index:]:
p._parent_index_ += 1
self.parameters.insert(index, param)
param.add_observer(self, self._pass_through_notify_observers, -np.inf)
parent = self
while parent is not None:
parent.size += param.size
parent = parent._parent_
self._notify_parent_change()
if not self._in_init_:
#self._connect_parameters()
#self._notify_parent_change()
self._highest_parent_._connect_parameters(ignore_added_names=_ignore_added_names)
self._highest_parent_._notify_parent_change()
self._highest_parent_._connect_fixes()
else:
raise HierarchyError, """Parameter exists already, try making a copy"""
def link_parameters(self, *parameters):
"""
convenience method for adding several
parameters without gradient specification
"""
[self.link_parameter(p) for p in parameters]
def unlink_parameter(self, param):
"""
:param param: param object to remove from being a parameter of this parameterized object.
"""
if not param in self.parameters:
try:
raise RuntimeError, "{} does not belong to this object {}, remove parameters directly from their respective parents".format(param._short(), self.name)
except AttributeError:
raise RuntimeError, "{} does not seem to be a parameter, remove parameters directly from their respective parents".format(str(param))
start = sum([p.size for p in self.parameters[:param._parent_index_]])
self._remove_parameter_name(param)
self.size -= param.size
del self.parameters[param._parent_index_]
param._disconnect_parent()
param.remove_observer(self, self._pass_through_notify_observers)
self.constraints.shift_left(start, param.size)
self._connect_parameters()
self._notify_parent_change()
parent = self._parent_
while parent is not None:
parent.size -= param.size
parent = parent._parent_
self._highest_parent_._connect_parameters()
self._highest_parent_._connect_fixes()
self._highest_parent_._notify_parent_change()
def add_parameter(self, *args, **kwargs):
raise DeprecationWarning, "add_parameter was renamed to link_parameter to avoid confusion of setting variables"
def remove_parameter(self, *args, **kwargs):
raise DeprecationWarning, "remove_parameter was renamed to link_parameter to avoid confusion of setting variables"
def _connect_parameters(self, ignore_added_names=False):
# connect parameterlist to this parameterized object
# This just sets up the right connection for the params objects
# to be used as parameters
# it also sets the constraints for each parameter to the constraints
# of their respective parents
if not hasattr(self, "parameters") or len(self.parameters) < 1:
# no parameters for this class
return
if self.param_array.size != self.size:
self._param_array_ = np.empty(self.size, dtype=np.float64)
if self.gradient.size != self.size:
self._gradient_array_ = np.empty(self.size, dtype=np.float64)
old_size = 0
self._param_slices_ = []
for i, p in enumerate(self.parameters):
if not p.param_array.flags['C_CONTIGUOUS']:
raise ValueError, "This should not happen! Please write an email to the developers with the code, which reproduces this error. All parameter arrays must be C_CONTIGUOUS"
p._parent_ = self
p._parent_index_ = i
pslice = slice(old_size, old_size + p.size)
# first connect all children
p._propagate_param_grad(self.param_array[pslice], self.gradient_full[pslice])
# then connect children to self
self.param_array[pslice] = p.param_array.flat # , requirements=['C', 'W']).ravel(order='C')
self.gradient_full[pslice] = p.gradient_full.flat # , requirements=['C', 'W']).ravel(order='C')
p.param_array.data = self.param_array[pslice].data
p.gradient_full.data = self.gradient_full[pslice].data
self._param_slices_.append(pslice)
self._add_parameter_name(p, ignore_added_names=ignore_added_names)
old_size += p.size
#===========================================================================
# Get/set parameters:
#===========================================================================
def grep_param_names(self, regexp):
"""
create a list of parameters, matching regular expression regexp
"""
if not isinstance(regexp, _pattern_type): regexp = compile(regexp)
found_params = []
for n, p in itertools.izip(self.parameter_names(False, False, True), self.flattened_parameters):
if regexp.match(n) is not None:
found_params.append(p)
return found_params
def __getitem__(self, name, paramlist=None):
if isinstance(name, (int, slice, tuple, np.ndarray)):
return self.param_array[name]
else:
if paramlist is None:
paramlist = self.grep_param_names(name)
if len(paramlist) < 1: raise AttributeError, name
if len(paramlist) == 1:
if isinstance(paramlist[-1], Parameterized):
paramlist = paramlist[-1].flattened_parameters
if len(paramlist) != 1:
return ParamConcatenation(paramlist)
return paramlist[-1]
return ParamConcatenation(paramlist)
def __setitem__(self, name, value, paramlist=None):
if value is None:
return # nothing to do here
if isinstance(name, (slice, tuple, np.ndarray)):
try:
self.param_array[name] = value
except:
raise ValueError, "Setting by slice or index only allowed with array-like"
self._trigger_params_changed()
else:
try: param = self.__getitem__(name, paramlist)
except: raise
param[:] = value
def __setattr__(self, name, val):
# override the default behaviour, if setting a param, so broadcasting can by used
if hasattr(self, "parameters"):
try:
pnames = self.parameter_names(False, adjust_for_printing=True, recursive=False)
if name in pnames:
param = self.parameters[pnames.index(name)]
param[:] = val; return
except AttributeError:
pass
object.__setattr__(self, name, val);
#===========================================================================
# Pickling
#===========================================================================
def __setstate__(self, state):
super(Parameterized, self).__setstate__(state)
try:
self._connect_parameters()
self._connect_fixes()
self._notify_parent_change()
self.parameters_changed()
except Exception as e:
print "WARNING: caught exception {!s}, trying to continue".format(e)
def copy(self, memo=None):
if memo is None:
memo = {}
memo[id(self.optimizer_array)] = None # and param_array
memo[id(self.param_array)] = None # and param_array
copy = super(Parameterized, self).copy(memo)
copy._connect_parameters()
copy._connect_fixes()
copy._notify_parent_change()
return copy
#===========================================================================
# Printing:
#===========================================================================
def _short(self):
return self.hierarchy_name()
@property
def flattened_parameters(self):
return [xi for x in self.parameters for xi in x.flattened_parameters]
@property
def _parameter_sizes_(self):
return [x.size for x in self.parameters]
@property
def parameter_shapes(self):
return [xi for x in self.parameters for xi in x.parameter_shapes]
@property
def _constraints_str(self):
return [cs for p in self.parameters for cs in p._constraints_str]
@property
def _priors_str(self):
return [cs for p in self.parameters for cs in p._priors_str]
@property
def _description_str(self):
return [xi for x in self.parameters for xi in x._description_str]
@property
def _ties_str(self):
return [','.join(x._ties_str) for x in self.flattened_parameters]
def _repr_html_(self, header=True):
"""Representation of the parameters in html for notebook display."""
name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str;
ts = self._ties_str
prirs = self._priors_str
desc = self._description_str; names = self.parameter_names()
nl = max([len(str(x)) for x in names + [name]])
sl = max([len(str(x)) for x in desc + ["Value"]])
cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
format_spec = "<tr><td>{{name:<{0}s}}</td><td align=\"right\">{{desc:>{1}s}}</td><td>{{const:^{2}s}}</td><td>{{pri:^{3}s}}</td><td>{{t:^{4}s}}</td></tr>".format(nl, sl, cl, pl, tl)
to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
if header:
header = """
<tr>
<td><b>{name}</b>
<td><b>Value</b></td>
<td><b>Constraint</b></td>
<td><b>Prior</b></td>
<td><b>Tied to</b></td>""".format(name=name)
to_print.insert(0, header)
return '<table>' + '\n'.format(sep).join(to_print) + '\n</table>'
def __str__(self, header=True):
name = adjust_name_for_printing(self.name) + "."
constrs = self._constraints_str;
ts = self._ties_str
prirs = self._priors_str
desc = self._description_str; names = self.parameter_names()
nl = max([len(str(x)) for x in names + [name]])
sl = max([len(str(x)) for x in desc + ["Value"]])
cl = max([len(str(x)) if x else 0 for x in constrs + ["Constraint"]])
tl = max([len(str(x)) if x else 0 for x in ts + ["Tied to"]])
pl = max([len(str(x)) if x else 0 for x in prirs + ["Prior"]])
format_spec = " \033[1m{{name:<{0}s}}\033[0;0m | {{desc:>{1}s}} | {{const:^{2}s}} | {{pri:^{3}s}} | {{t:^{4}s}}".format(nl, sl, cl, pl, tl)
to_print = []
for n, d, c, t, p in itertools.izip(names, desc, constrs, ts, prirs):
to_print.append(format_spec.format(name=n, desc=d, const=c, t=t, pri=p))
sep = '-' * (nl + sl + cl + + pl + tl + 8 * 2 + 3)
if header:
header = " {{0:<{0}s}} | {{1:^{1}s}} | {{2:^{2}s}} | {{3:^{3}s}} | {{4:^{4}s}}".format(nl, sl, cl, pl, tl).format(name, "Value", "Constraint", "Prior", "Tied to")
to_print.insert(0, header)
return '\n'.format(sep).join(to_print)
pass

View file

@ -0,0 +1,771 @@
# Copyright (c) 2012 - 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy.special import gammaln, digamma
from ...util.linalg import pdinv
from domains import _REAL, _POSITIVE
import warnings
import weakref
class Prior(object):
domain = None
_instance = None
def __new__(cls, *args, **kwargs):
if not cls._instance or cls._instance.__class__ is not cls:
cls._instance = super(Prior, cls).__new__(cls, *args, **kwargs)
return cls._instance
def pdf(self, x):
return np.exp(self.lnpdf(x))
def plot(self):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import priors_plots
priors_plots.univariate_plot(self)
def __repr__(self, *args, **kwargs):
return self.__str__()
class Gaussian(Prior):
"""
Implementation of the univariate Gaussian probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = _REAL
_instances = []
def __new__(cls, mu=0, sigma=1): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().mu == mu and instance().sigma == sigma:
return instance()
o = super(Prior, cls).__new__(cls, mu, sigma)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma)
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "N({:.2g}, {:.2g})".format(self.mu, self.sigma)
def lnpdf(self, x):
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
def lnpdf_grad(self, x):
return -(x - self.mu) / self.sigma2
def rvs(self, n):
return np.random.randn(n) * self.sigma + self.mu
# def __getstate__(self):
# return self.mu, self.sigma
#
# def __setstate__(self, state):
# self.mu = state[0]
# self.sigma = state[1]
# self.sigma2 = np.square(self.sigma)
# self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
class Uniform(Prior):
domain = _REAL
_instances = []
def __new__(cls, lower=0, upper=1): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().lower == lower and instance().upper == upper:
return instance()
o = super(Prior, cls).__new__(cls, lower, upper)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, lower, upper):
self.lower = float(lower)
self.upper = float(upper)
def __str__(self):
return "[{:.2g}, {:.2g}]".format(self.lower, self.upper)
def lnpdf(self, x):
region = (x >= self.lower) * (x <= self.upper)
return region
def lnpdf_grad(self, x):
return np.zeros(x.shape)
def rvs(self, n):
return np.random.uniform(self.lower, self.upper, size=n)
# def __getstate__(self):
# return self.lower, self.upper
#
# def __setstate__(self, state):
# self.lower = state[0]
# self.upper = state[1]
class LogGaussian(Gaussian):
"""
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = _POSITIVE
_instances = []
def __new__(cls, mu=0, sigma=1): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().mu == mu and instance().sigma == sigma:
return instance()
o = super(Prior, cls).__new__(cls, mu, sigma)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma)
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "lnN({:.2g}, {:.2g})".format(self.mu, self.sigma)
def lnpdf(self, x):
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
def lnpdf_grad(self, x):
return -((np.log(x) - self.mu) / self.sigma2 + 1.) / x
def rvs(self, n):
return np.exp(np.random.randn(n) * self.sigma + self.mu)
class MultivariateGaussian(Prior):
"""
Implementation of the multivariate Gaussian probability function, coupled with random variables.
:param mu: mean (N-dimensional array)
:param var: covariance matrix (NxN)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = _REAL
_instances = []
def __new__(cls, mu=0, var=1): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu == mu) and np.all(instance().var == var):
return instance()
o = super(Prior, cls).__new__(cls, mu, var)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu, var):
self.mu = np.array(mu).flatten()
self.var = np.array(var)
assert len(self.var.shape) == 2
assert self.var.shape[0] == self.var.shape[1]
assert self.var.shape[0] == self.mu.size
self.input_dim = self.mu.size
self.inv, self.hld = pdinv(self.var)
self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld
def summary(self):
raise NotImplementedError
def pdf(self, x):
return np.exp(self.lnpdf(x))
def lnpdf(self, x):
d = x - self.mu
return self.constant - 0.5 * np.sum(d * np.dot(d, self.inv), 1)
def lnpdf_grad(self, x):
d = x - self.mu
return -np.dot(self.inv, d)
def rvs(self, n):
return np.random.multivariate_normal(self.mu, self.var, n)
def plot(self):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import priors_plots
priors_plots.multivariate_plot(self)
def __getstate__(self):
return self.mu, self.var
def __setstate__(self, state):
self.mu = state[0]
self.var = state[1]
assert len(self.var.shape) == 2
assert self.var.shape[0] == self.var.shape[1]
assert self.var.shape[0] == self.mu.size
self.input_dim = self.mu.size
self.inv, self.hld = pdinv(self.var)
self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld
def gamma_from_EV(E, V):
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
return Gamma.from_EV(E, V)
class Gamma(Prior):
"""
Implementation of the Gamma probability function, coupled with random variables.
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = _POSITIVE
_instances = []
def __new__(cls, a=1, b=.5): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().a == a and instance().b == b:
return instance()
o = super(Prior, cls).__new__(cls, a, b)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "Ga({:.2g}, {:.2g})".format(self.a, self.b)
def summary(self):
ret = {"E[x]": self.a / self.b, \
"E[ln x]": digamma(self.a) - np.log(self.b), \
"var[x]": self.a / self.b / self.b, \
"Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
if self.a > 1:
ret['Mode'] = (self.a - 1.) / self.b
else:
ret['mode'] = np.nan
return ret
def lnpdf(self, x):
return self.constant + (self.a - 1) * np.log(x) - self.b * x
def lnpdf_grad(self, x):
return (self.a - 1.) / x - self.b
def rvs(self, n):
return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
@staticmethod
def from_EV(E, V):
"""
Creates an instance of a Gamma Prior by specifying the Expected value(s)
and Variance(s) of the distribution.
:param E: expected value
:param V: variance
"""
a = np.square(E) / V
b = E / V
return Gamma(a, b)
def __getstate__(self):
return self.a, self.b
def __setstate__(self, state):
self.a = state[0]
self.b = state[1]
self.constant = -gammaln(self.a) + self.a * np.log(self.b)
class InverseGamma(Gamma):
"""
Implementation of the inverse-Gamma probability function, coupled with random variables.
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = _POSITIVE
_instances = []
def __new__(cls, a=1, b=.5): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().a == a and instance().b == b:
return instance()
o = super(Prior, cls).__new__(cls, a, b)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "iGa({:.2g}, {:.2g})".format(self.a, self.b)
def lnpdf(self, x):
return self.constant - (self.a + 1) * np.log(x) - self.b / x
def lnpdf_grad(self, x):
return -(self.a + 1.) / x + self.b / x ** 2
def rvs(self, n):
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
class DGPLVM_KFDA(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable function using
Kernel Fisher Discriminant Analysis by Seung-Jean Kim for implementing Face paper
by Chaochao Lu.
:param lambdaa: constant
:param sigma2: constant
.. Note:: Surpassing Human-Level Face paper dgplvm implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, lambdaa, sigma2): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, lambdaa, sigma2, lbl, kern, x_shape):
"""A description for init"""
self.datanum = lbl.shape[0]
self.classnum = lbl.shape[1]
self.lambdaa = lambdaa
self.sigma2 = sigma2
self.lbl = lbl
self.kern = kern
lst_ni = self.compute_lst_ni()
self.a = self.compute_a(lst_ni)
self.A = self.compute_A(lst_ni)
self.x_shape = x_shape
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
if len(cls) > 2:
for i in range(2, self.classnum):
del cls[i]
return cls
def x_reduced(self, cls):
x1 = cls[0]
x2 = cls[1]
x = np.concatenate((x1, x2), axis=0)
return x
def compute_lst_ni(self):
lst_ni = []
lst_ni1 = []
lst_ni2 = []
f1 = (np.where(self.lbl[:, 0] == 1)[0])
f2 = (np.where(self.lbl[:, 1] == 1)[0])
for idx in f1:
lst_ni1.append(idx)
for idx in f2:
lst_ni2.append(idx)
lst_ni.append(len(lst_ni1))
lst_ni.append(len(lst_ni2))
return lst_ni
def compute_a(self, lst_ni):
a = np.ones((self.datanum, 1))
count = 0
for N_i in lst_ni:
if N_i == lst_ni[0]:
a[count:count + N_i] = (float(1) / N_i) * a[count]
count += N_i
else:
if N_i == lst_ni[1]:
a[count: count + N_i] = -(float(1) / N_i) * a[count]
count += N_i
return a
def compute_A(self, lst_ni):
A = np.zeros((self.datanum, self.datanum))
idx = 0
for N_i in lst_ni:
B = float(1) / np.sqrt(N_i) * (np.eye(N_i) - ((float(1) / N_i) * np.ones((N_i, N_i))))
A[idx:idx + N_i, idx:idx + N_i] = B
idx += N_i
return A
# Here log function
def lnpdf(self, x):
x = x.reshape(self.x_shape)
K = self.kern.K(x)
a_trans = np.transpose(self.a)
paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
inv_part = pdinv(paran)[0]
J = a_trans.dot(K).dot(self.a) - a_trans.dot(K).dot(self.A).dot(inv_part).dot(self.A).dot(K).dot(self.a)
J_star = (1. / self.lambdaa) * J
return (-1. / self.sigma2) * J_star
# Here gradient function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
K = self.kern.K(x)
paran = self.lambdaa * np.eye(x.shape[0]) + self.A.dot(K).dot(self.A)
inv_part = pdinv(paran)[0]
b = self.A.dot(inv_part).dot(self.A).dot(K).dot(self.a)
a_Minus_b = self.a - b
a_b_trans = np.transpose(a_Minus_b)
DJ_star_DK = (1. / self.lambdaa) * (a_Minus_b.dot(a_b_trans))
DJ_star_DX = self.kern.gradients_X(DJ_star_DK, x)
return (-1. / self.sigma2) * DJ_star_DX
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior'
def __getstate___(self):
return self.lbl, self.lambdaa, self.sigma2, self.kern, self.x_shape
def __setstate__(self, state):
lbl, lambdaa, sigma2, kern, a, A, x_shape = state
self.datanum = lbl.shape[0]
self.classnum = lbl.shape[1]
self.lambdaa = lambdaa
self.sigma2 = sigma2
self.lbl = lbl
self.kern = kern
lst_ni = self.compute_lst_ni()
self.a = self.compute_a(lst_ni)
self.A = self.compute_A(lst_ni)
self.x_shape = x_shape
class DGPLVM(Prior):
"""
Implementation of the Discriminative Gaussian Process Latent Variable model paper, by Raquel.
:param sigma2: constant
.. Note:: DGPLVM for Classification paper implementation
"""
domain = _REAL
# _instances = []
# def __new__(cls, mu, sigma): # Singleton:
# if cls._instances:
# cls._instances[:] = [instance for instance in cls._instances if instance()]
# for instance in cls._instances:
# if instance().mu == mu and instance().sigma == sigma:
# return instance()
# o = super(Prior, cls).__new__(cls, mu, sigma)
# cls._instances.append(weakref.ref(o))
# return cls._instances[-1]()
def __init__(self, sigma2, lbl, x_shape):
self.sigma2 = sigma2
# self.x = x
self.lbl = lbl
self.classnum = lbl.shape[1]
self.datanum = lbl.shape[0]
self.x_shape = x_shape
self.dim = x_shape[1]
def get_class_label(self, y):
for idx, v in enumerate(y):
if v == 1:
return idx
return -1
# This function assigns each data point to its own class
# and returns the dictionary which contains the class name and parameters.
def compute_cls(self, x):
cls = {}
# Appending each data point to its proper class
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in cls:
cls[class_label] = []
cls[class_label].append(x[j])
return cls
# This function computes mean of each class. The mean is calculated through each dimension
def compute_Mi(self, cls):
M_i = np.zeros((self.classnum, self.dim))
for i in cls:
# Mean of each class
M_i[i] = np.mean(cls[i], axis=0)
return M_i
# Adding data points as tuple to the dictionary so that we can access indices
def compute_indices(self, x):
data_idx = {}
for j in xrange(self.datanum):
class_label = self.get_class_label(self.lbl[j])
if class_label not in data_idx:
data_idx[class_label] = []
t = (j, x[j])
data_idx[class_label].append(t)
return data_idx
# Adding indices to the list so we can access whole the indices
def compute_listIndices(self, data_idx):
lst_idx = []
lst_idx_all = []
for i in data_idx:
if len(lst_idx) == 0:
pass
#Do nothing, because it is the first time list is created so is empty
else:
lst_idx = []
# Here we put indices of each class in to the list called lst_idx_all
for m in xrange(len(data_idx[i])):
lst_idx.append(data_idx[i][m][0])
lst_idx_all.append(lst_idx)
return lst_idx_all
# This function calculates between classes variances
def compute_Sb(self, cls, M_i, M_0):
Sb = np.zeros((self.dim, self.dim))
for i in cls:
B = (M_i[i] - M_0).reshape(self.dim, 1)
B_trans = B.transpose()
Sb += (float(len(cls[i])) / self.datanum) * B.dot(B_trans)
return Sb
# This function calculates within classes variances
def compute_Sw(self, cls, M_i):
Sw = np.zeros((self.dim, self.dim))
for i in cls:
N_i = float(len(cls[i]))
W_WT = np.zeros((self.dim, self.dim))
for xk in cls[i]:
W = (xk - M_i[i])
W_WT += np.outer(W, W)
Sw += (N_i / self.datanum) * ((1. / N_i) * W_WT)
return Sw
# Calculating beta and Bi for Sb
def compute_sig_beta_Bi(self, data_idx, M_i, M_0, lst_idx_all):
# import pdb
# pdb.set_trace()
B_i = np.zeros((self.classnum, self.dim))
Sig_beta_B_i_all = np.zeros((self.datanum, self.dim))
for i in data_idx:
# pdb.set_trace()
# Calculating Bi
B_i[i] = (M_i[i] - M_0).reshape(1, self.dim)
for k in xrange(self.datanum):
for i in data_idx:
N_i = float(len(data_idx[i]))
if k in lst_idx_all[i]:
beta = (float(1) / N_i) - (float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
else:
beta = -(float(1) / self.datanum)
Sig_beta_B_i_all[k] += float(N_i) / self.datanum * (beta * B_i[i])
Sig_beta_B_i_all = Sig_beta_B_i_all.transpose()
return Sig_beta_B_i_all
# Calculating W_j s separately so we can access all the W_j s anytime
def compute_wj(self, data_idx, M_i):
W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
xj = tpl[1]
j = tpl[0]
W_i[j] = (xj - M_i[i])
return W_i
# Calculating alpha and Wj for Sw
def compute_sig_alpha_W(self, data_idx, lst_idx_all, W_i):
Sig_alpha_W_i = np.zeros((self.datanum, self.dim))
for i in data_idx:
N_i = float(len(data_idx[i]))
for tpl in data_idx[i]:
k = tpl[0]
for j in lst_idx_all[i]:
if k == j:
alpha = 1 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
else:
alpha = 0 - (float(1) / N_i)
Sig_alpha_W_i[k] += (alpha * W_i[j])
Sig_alpha_W_i = (1. / self.datanum) * np.transpose(Sig_alpha_W_i)
return Sig_alpha_W_i
# This function calculates log of our prior
def lnpdf(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
#Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
return (-1 / self.sigma2) * np.trace(Sb_inv_N.dot(Sw))
# This function calculates derivative of the log of prior function
def lnpdf_grad(self, x):
x = x.reshape(self.x_shape)
cls = self.compute_cls(x)
M_0 = np.mean(x, axis=0)
M_i = self.compute_Mi(cls)
Sb = self.compute_Sb(cls, M_i, M_0)
Sw = self.compute_Sw(cls, M_i)
data_idx = self.compute_indices(x)
lst_idx_all = self.compute_listIndices(data_idx)
Sig_beta_B_i_all = self.compute_sig_beta_Bi(data_idx, M_i, M_0, lst_idx_all)
W_i = self.compute_wj(data_idx, M_i)
Sig_alpha_W_i = self.compute_sig_alpha_W(data_idx, lst_idx_all, W_i)
# Calculating inverse of Sb and its transpose and minus
# Sb_inv_N = np.linalg.inv(Sb + np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))
# Sb_inv_N = np.linalg.inv(Sb+np.eye(Sb.shape[0])*0.1)
Sb_inv_N = pdinv(Sb+ np.eye(Sb.shape[0]) * (np.diag(Sb).min() * 0.1))[0]
Sb_inv_N_trans = np.transpose(Sb_inv_N)
Sb_inv_N_trans_minus = -1 * Sb_inv_N_trans
Sw_trans = np.transpose(Sw)
# Calculating DJ/DXk
DJ_Dxk = 2 * (
Sb_inv_N_trans_minus.dot(Sw_trans).dot(Sb_inv_N_trans).dot(Sig_beta_B_i_all) + Sb_inv_N_trans.dot(
Sig_alpha_W_i))
# Calculating derivative of the log of the prior
DPx_Dx = ((-1 / self.sigma2) * DJ_Dxk)
return DPx_Dx.T
# def frb(self, x):
# from functools import partial
# from GPy.models import GradientChecker
# f = partial(self.lnpdf)
# df = partial(self.lnpdf_grad)
# grad = GradientChecker(f, df, x, 'X')
# grad.checkgrad(verbose=1)
def rvs(self, n):
return np.random.rand(n) # A WRONG implementation
def __str__(self):
return 'DGPLVM_prior'
class HalfT(Prior):
"""
Implementation of the half student t probability function, coupled with random variables.
:param A: scale parameter
:param nu: degrees of freedom
"""
domain = _POSITIVE
_instances = []
def __new__(cls, A, nu): # Singleton:
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().A == A and instance().nu == nu:
return instance()
o = super(Prior, cls).__new__(cls, A, nu)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, A, nu):
self.A = float(A)
self.nu = float(nu)
self.constant = gammaln(.5*(self.nu+1.)) - gammaln(.5*self.nu) - .5*np.log(np.pi*self.A*self.nu)
def __str__(self):
return "hT({:.2g}, {:.2g})".format(self.A, self.nu)
def lnpdf(self,theta):
return (theta>0) * ( self.constant -.5*(self.nu+1) * np.log( 1.+ (1./self.nu) * (theta/self.A)**2 ) )
#theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
#lnpdfs = np.zeros_like(theta)
#theta = np.array([theta])
#above_zero = theta.flatten()>1e-6
#v = self.nu
#sigma2=self.A
#stop
#lnpdfs[above_zero] = (+ gammaln((v + 1) * 0.5)
# - gammaln(v * 0.5)
# - 0.5*np.log(sigma2 * v * np.pi)
# - 0.5*(v + 1)*np.log(1 + (1/np.float(v))*((theta[above_zero][0]**2)/sigma2))
#)
#return lnpdfs
def lnpdf_grad(self,theta):
theta = theta if isinstance(theta,np.ndarray) else np.array([theta])
grad = np.zeros_like(theta)
above_zero = theta>1e-6
v = self.nu
sigma2=self.A
grad[above_zero] = -0.5*(v+1)*(2*theta[above_zero])/(v*sigma2 + theta[above_zero][0]**2)
return grad
def rvs(self, n):
#return np.random.randn(n) * self.sigma + self.mu
from scipy.stats import t
#[np.abs(x) for x in t.rvs(df=4,loc=0,scale=50, size=10000)])
ret = t.rvs(self.nu,loc=0,scale=self.A, size=n)
ret[ret<0] = 0
return ret

View file

@ -0,0 +1,225 @@
# Copyright (c) 2014, James Hensman, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from parameterized import Parameterized
from param import Param
class Remapping(Parameterized):
def mapping(self):
"""
The return value of this function gives the values which the re-mapped
parameters should take. Implement in sub-classes.
"""
raise NotImplementedError
def callback(self):
raise NotImplementedError
def __str__(self):
return self.name
def parameters_changed(self):
#ensure all out parameters have the correct value, as specified by our mapping
index = self._highest_parent_.constraints[self]
self._highest_parent_.param_array[index] = self.mapping()
[p.notify_observers(which=self) for p in self.tied_parameters]
class Fix(Remapping):
pass
class Tie(Parameterized):
"""
The new parameter tie framework. (under development)
All the parameters tied together get a new parameter inside the *Tie* object.
Its value should always be equal to all the tied parameters, and its gradient
is the sum of all the tied parameters.
=====Implementation Details=====
The *Tie* object should only exist on the top of param tree (the highest parent).
self.label_buf:
It uses a label buffer that has the same length as all the parameters (self._highest_parent_.param_array).
The buffer keeps track of all the tied parameters. All the tied parameters have a label (an interger) higher
than 0, and the parameters that have the same label are tied together.
self.buf_index:
An auxiliary index list for the global index of the tie parameter inside the *Tie* object.
================================
TODO:
* EVERYTHING
"""
def __init__(self, name='tie'):
super(Tie, self).__init__(name)
self.tied_param = None
# The buffer keeps track of tie status
self.label_buf = None
# The global indices of the 'tied' param
self.buf_idx = None
# A boolean array indicating non-tied parameters
self._tie_ = None
def getTieFlag(self, p=None):
if self.tied_param is None:
if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
if p is not None:
return self._tie_[p._highest_parent_._raveled_index_for(p)]
return self._tie_
def _init_labelBuf(self):
if self.label_buf is None:
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
if self._tie_ is None or self._tie_.size != self._highest_parent_.param_array.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
def _updateTieFlag(self):
if self._tie_.size != self.label_buf.size:
self._tie_ = np.ones((self._highest_parent_.param_array.size,),dtype=np.bool)
self._tie_[self.label_buf>0] = False
self._tie_[self.buf_idx] = True
def add_tied_parameter(self, p, p2=None):
"""
Tie the list of parameters p together (p2==None) or
Tie the list of parameters p with the list of parameters p2 (p2!=None)
"""
self._init_labelBuf()
if p2 is None:
idx = self._highest_parent_._raveled_index_for(p)
val = self._sync_val_group(idx)
if np.all(self.label_buf[idx]==0):
# None of p has been tied before.
tie_idx = self._expandTieParam(1)
print tie_idx
tie_id = self.label_buf.max()+1
self.label_buf[tie_idx] = tie_id
else:
b = self.label_buf[idx]
ids = np.unique(b[b>0])
tie_id, tie_idx = self._merge_tie_param(ids)
self._highest_parent_.param_array[tie_idx] = val
idx = self._highest_parent_._raveled_index_for(p)
self.label_buf[idx] = tie_id
else:
pass
self._updateTieFlag()
def _merge_tie_param(self, ids):
"""Merge the tie parameters with ids in the list."""
if len(ids)==1:
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==ids[0]][0]
return ids[0],id_final_idx
id_final = ids[0]
ids_rm = ids[1:]
label_buf_param = self.label_buf[self.buf_idx]
idx_param = [np.where(label_buf_param==i)[0][0] for i in ids_rm]
self._removeTieParam(idx_param)
[np.put(self.label_buf, np.where(self.label_buf==i), id_final) for i in ids_rm]
id_final_idx = self.buf_idx[self.label_buf[self.buf_idx]==id_final][0]
return id_final, id_final_idx
def _sync_val_group(self, idx):
self._highest_parent_.param_array[idx] = self._highest_parent_.param_array[idx].mean()
return self._highest_parent_.param_array[idx][0]
def _expandTieParam(self, num):
"""Expand the tie param with the number of *num* parameters"""
if self.tied_param is None:
new_buf = np.empty((num,))
else:
new_buf = np.empty((self.tied_param.size+num,))
new_buf[:self.tied_param.size] = self.tied_param.param_array.copy()
self.remove_parameter(self.tied_param)
self.tied_param = Param('tied',new_buf)
self.add_parameter(self.tied_param)
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
self._expand_label_buf(self.buf_idx, buf_idx_new)
self.buf_idx = buf_idx_new
return self.buf_idx[-num:]
def _removeTieParam(self, idx):
"""idx within tied_param"""
new_buf = np.empty((self.tied_param.size-len(idx),))
bool_list = np.ones((self.tied_param.size,),dtype=np.bool)
bool_list[idx] = False
new_buf[:] = self.tied_param.param_array[bool_list]
self.remove_parameter(self.tied_param)
self.tied_param = Param('tied',new_buf)
self.add_parameter(self.tied_param)
buf_idx_new = self._highest_parent_._raveled_index_for(self.tied_param)
self._shrink_label_buf(self.buf_idx, buf_idx_new, bool_list)
self.buf_idx = buf_idx_new
def _expand_label_buf(self, idx_old, idx_new):
"""Expand label buffer accordingly"""
if idx_old is None:
self.label_buf = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
else:
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
bool_old[idx_old] = True
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
bool_new[idx_new] = True
label_buf_new = np.zeros(self._highest_parent_.param_array.shape, dtype=np.int)
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
label_buf_new[idx_new[:len(idx_old)]] = self.label_buf[idx_old]
self.label_buf = label_buf_new
def _shrink_label_buf(self, idx_old, idx_new, bool_list):
bool_old = np.zeros((self.label_buf.size,),dtype=np.bool)
bool_old[idx_old] = True
bool_new = np.zeros((self._highest_parent_.param_array.size,),dtype=np.bool)
bool_new[idx_new] = True
label_buf_new = np.empty(self._highest_parent_.param_array.shape, dtype=np.int)
label_buf_new[np.logical_not(bool_new)] = self.label_buf[np.logical_not(bool_old)]
label_buf_new[idx_new] = self.label_buf[idx_old[bool_list]]
self.label_buf = label_buf_new
def _check_change(self):
changed = False
if self.tied_param is not None:
for i in xrange(self.tied_param.size):
b0 = self.label_buf==self.label_buf[self.buf_idx[i]]
b = self._highest_parent_.param_array[b0]!=self.tied_param[i]
if b.sum()==0:
print 'XXX'
continue
elif b.sum()==1:
print '!!!'
val = self._highest_parent_.param_array[b0][b][0]
self._highest_parent_.param_array[b0] = val
else:
print '@@@'
self._highest_parent_.param_array[b0] = self.tied_param[i]
changed = True
return changed
def parameters_changed(self):
#ensure all out parameters have the correct value, as specified by our mapping
changed = self._check_change()
if changed:
self._highest_parent_._trigger_params_changed()
self.collate_gradient()
def collate_gradient(self):
if self.tied_param is not None:
self.tied_param.gradient = 0.
[np.put(self.tied_param.gradient, i, self._highest_parent_.gradient[self.label_buf==self.label_buf[self.buf_idx[i]]].sum())
for i in xrange(self.tied_param.size)]
def propagate_val(self):
if self.tied_param is not None:
for i in xrange(self.tied_param.size):
self._highest_parent_.param_array[self.label_buf==self.label_buf[self.buf_idx[i]]] = self.tied_param[i]

View file

@ -0,0 +1,433 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
import weakref
import sys
_exp_lim_val = np.finfo(np.float64).max
_lim_val = 36.0
epsilon = np.finfo(np.float64).resolution
#===============================================================================
# Fixing constants
__fixed__ = "fixed"
FIXED = False
UNFIXED = True
#===============================================================================
class Transformation(object):
domain = None
_instance = None
def __new__(cls, *args, **kwargs):
if not cls._instance or cls._instance.__class__ is not cls:
cls._instance = super(Transformation, cls).__new__(cls, *args, **kwargs)
return cls._instance
def f(self, opt_param):
raise NotImplementedError
def finv(self, model_param):
raise NotImplementedError
def gradfactor(self, model_param, dL_dmodel_param):
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
i.e.:
define
.. math::
\frac{\frac{\partial L}{\partial f}\left(\left.\partial f(x)}{\partial x}\right|_{x=f^{-1}(f)\right)}
"""
raise NotImplementedError
def initialize(self, f):
""" produce a sensible initial value for f(x)"""
raise NotImplementedError
def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw):
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
import matplotlib.pyplot as plt
from ...plotting.matplot_dep import base_plots
x = np.linspace(-8,8)
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
axes = plt.gca()
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
def __str__(self):
raise NotImplementedError
def __repr__(self):
return self.__class__.__name__
class Logexp(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x>_lim_val, x, np.log1p(np.exp(np.clip(x, -_lim_val, _lim_val)))) + epsilon
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>_lim_val, f, np.log(np.exp(f+1e-20) - 1.))
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, np.where(f>_lim_val, 1., 1. - np.exp(-f)))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve'
class NormalTheta(Transformation):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def f(self, theta):
# In here abs is only a trick to make sure the numerics are ok.
# The variance will never go below zero, but at initialization we need to make sure
# that the values are ok
# Before:
theta[self.var_indices] = np.abs(-.5/theta[self.var_indices])
theta[self.mu_indices] *= theta[self.var_indices]
return theta # which is now {mu, var}
def finv(self, muvar):
# before:
varp = muvar[self.var_indices]
muvar[self.mu_indices] /= varp
muvar[self.var_indices] = -.5/varp
return muvar # which is now {theta1, theta2}
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# theta gradients
# This works and the gradient checks!
dmuvar[self.mu_indices] *= var
dmuvar[self.var_indices] *= 2*(var)**2
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
#=======================================================================
return dmuvar # which is now the gradient multiplicator for {theta1, theta2}
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
f[self.var_indices] = np.abs(f[self.var_indices])
return f
def __str__(self):
return "theta"
def __getstate__(self):
return [self.mu_indices, self.var_indices]
def __setstate__(self, state):
self.mu_indices = state[0]
self.var_indices = state[1]
class NormalNaturalAntti(NormalTheta):
_instances = []
_logexp = Logexp()
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# theta gradients
# This works and the gradient checks!
dmuvar[self.mu_indices] *= var
dmuvar[self.var_indices] *= 2*var**2#np.einsum('i,i,i,i->i', dmuvar[self.var_indices], [2], var, var)
#=======================================================================
return dmuvar # which is now the gradient multiplicator
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
f[self.var_indices] = np.abs(f[self.var_indices])
return f
def __str__(self):
return "natantti"
class NormalEta(Transformation):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def f(self, theta):
theta[self.var_indices] = np.abs(theta[self.var_indices] - theta[self.mu_indices]**2)
return theta # which is now {mu, var}
def finv(self, muvar):
muvar[self.var_indices] += muvar[self.mu_indices]**2
return muvar # which is now {eta1, eta2}
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
#=======================================================================
# Lets try natural gradients instead: Not working with bfgs... try stochastic!
dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices]
#=======================================================================
return dmuvar # which is now the gradient multiplicator
def initialize(self, f):
if np.any(f[self.var_indices] < 0.):
print "Warning: changing parameters to satisfy constraints"
f[self.var_indices] = np.abs(f[self.var_indices])
return f
def __str__(self):
return "eta"
class NormalNaturalThroughTheta(NormalTheta):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# This is just eta direction:
dmuvar[self.mu_indices] -= 2*mu*dmuvar[self.var_indices]
#=======================================================================
#=======================================================================
# This is by going through theta fully and then going into eta direction:
#dmu = dmuvar[self.mu_indices]
#dmuvar[self.var_indices] += dmu*mu*(var + 4/var)
#=======================================================================
return dmuvar # which is now the gradient multiplicator
def __str__(self):
return "natgrad"
class NormalNaturalThroughEta(NormalEta):
_instances = []
def __new__(cls, mu_indices, var_indices):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if np.all(instance().mu_indices==mu_indices, keepdims=False) and np.all(instance().var_indices==var_indices, keepdims=False):
return instance()
o = super(Transformation, cls).__new__(cls, mu_indices, var_indices)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, mu_indices, var_indices):
self.mu_indices = mu_indices
self.var_indices = var_indices
def gradfactor(self, muvar, dmuvar):
mu = muvar[self.mu_indices]
var = muvar[self.var_indices]
#=======================================================================
# theta gradients
# This works and the gradient checks!
dmuvar[self.mu_indices] *= var
dmuvar[self.var_indices] *= 2*(var)**2
dmuvar[self.var_indices] += 2*dmuvar[self.mu_indices]*mu
#=======================================================================
return dmuvar
def __str__(self):
return "natgrad"
class LogexpNeg(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x>_lim_val, -x, -np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val))))
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, np.where(f>_lim_val, -1, -1 + np.exp(-f)))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeLogexp(Transformation):
domain = _NEGATIVE
logexp = Logexp()
def f(self, x):
return -self.logexp.f(x) # np.log(1. + np.exp(x))
def finv(self, f):
return self.logexp.finv(-f) # np.log(np.exp(-f) - 1.)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, -self.logexp.gradfactor(-f))
def initialize(self, f):
return -self.logexp.initialize(f) # np.abs(f)
def __str__(self):
return '-ve'
class LogexpClipped(Logexp):
max_bound = 1e100
min_bound = 1e-10
log_max_bound = np.log(max_bound)
log_min_bound = np.log(min_bound)
domain = _POSITIVE
_instances = []
def __new__(cls, lower=1e-6, *args, **kwargs):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().lower == lower:
return instance()
o = super(Transformation, cls).__new__(cls, lower, *args, **kwargs)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, lower=1e-6):
self.lower = lower
def f(self, x):
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
f = np.log(1. + exp)
# if np.isnan(f).any():
# import ipdb;ipdb.set_trace()
return np.clip(f, self.min_bound, self.max_bound)
def finv(self, f):
return np.log(np.exp(f - 1.))
def gradfactor(self, f, df):
ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
gf = (ef - 1.) / ef
return np.einsum('i,i->i', df, gf) # np.where(f < self.lower, 0, gf)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve_c'
class Exponent(Transformation):
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
domain = _POSITIVE
def f(self, x):
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeExponent(Exponent):
domain = _NEGATIVE
def f(self, x):
return -Exponent.f(x)
def finv(self, f):
return Exponent.finv(-f)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, f)
def initialize(self, f):
return -Exponent.initialize(f) #np.abs(f)
def __str__(self):
return '-ve'
class Square(Transformation):
domain = _POSITIVE
def f(self, x):
return x ** 2
def finv(self, x):
return np.sqrt(x)
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, 2 * np.sqrt(f))
def initialize(self, f):
return np.abs(f)
def __str__(self):
return '+sq'
class Logistic(Transformation):
domain = _BOUNDED
_instances = []
def __new__(cls, lower=1e-6, upper=1e-6, *args, **kwargs):
if cls._instances:
cls._instances[:] = [instance for instance in cls._instances if instance()]
for instance in cls._instances:
if instance().lower == lower and instance().upper == upper:
return instance()
o = super(Transformation, cls).__new__(cls, lower, upper, *args, **kwargs)
cls._instances.append(weakref.ref(o))
return cls._instances[-1]()
def __init__(self, lower, upper):
assert lower < upper
self.lower, self.upper = float(lower), float(upper)
self.difference = self.upper - self.lower
def f(self, x):
if (x<-300.).any():
x = x.copy()
x[x<-300.] = -300.
return self.lower + self.difference / (1. + np.exp(-x))
def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f, df):
return np.einsum('i,i->i', df, (f - self.lower) * (self.upper - f) / self.difference)
def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints"
#return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
#FIXME: Max, zeros_like right?
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(np.zeros_like(f)), f)
def __str__(self):
return '{},{}'.format(self.lower, self.upper)

View file

@ -0,0 +1,63 @@
'''
Created on 11 Nov 2014
@author: maxz
'''
from observable import Observable
class Updateable(Observable):
"""
A model can be updated or not.
Make sure updates can be switched on and off.
"""
_updates = True
def __init__(self, *args, **kwargs):
super(Updateable, self).__init__(*args, **kwargs)
@property
def updates(self):
raise DeprecationWarning("updates is now a function, see update(True|False|None)")
@updates.setter
def updates(self, ups):
raise DeprecationWarning("updates is now a function, see update(True|False|None)")
def update_model(self, updates=None):
"""
Get or set, whether automatic updates are performed. When updates are
off, the model might be in a non-working state. To make the model work
turn updates on again.
:param bool|None updates:
bool: whether to do updates
None: get the current update state
"""
if updates is None:
p = getattr(self, '_highest_parent_', None)
if p is not None:
self._updates = p._updates
return self._updates
assert isinstance(updates, bool), "updates are either on (True) or off (False)"
p = getattr(self, '_highest_parent_', None)
if p is not None:
p._updates = updates
self._updates = updates
self.trigger_update()
def toggle_update(self):
self.update_model(not self.update_model())
def trigger_update(self, trigger_parent=True):
"""
Update the model from the current state.
Make sure that updates are on, otherwise this
method will do nothing
:param bool trigger_parent: Whether to trigger the parent, after self has updated
"""
if not self.update_model() or (hasattr(self, "_in_init_") and self._in_init_):
#print "Warning: updates are off, updating the model will do nothing"
return
self._trigger_params_changed(trigger_parent)

View file

@ -0,0 +1,220 @@
'''
Created on 6 Nov 2013
@author: maxz
'''
import numpy as np
from parameterized import Parameterized
from param import Param
from transformations import Logexp, Logistic,__fixed__
from GPy.util.misc import param_to_array
from GPy.util.caching import Cache_this
class VariationalPrior(Parameterized):
def __init__(self, name='latent space', **kw):
super(VariationalPrior, self).__init__(name=name, **kw)
def KL_divergence(self, variational_posterior):
raise NotImplementedError, "override this for variational inference of latent space"
def update_gradients_KL(self, variational_posterior):
"""
updates the gradients for mean and variance **in place**
"""
raise NotImplementedError, "override this for variational inference of latent space"
class NormalPrior(VariationalPrior):
def KL_divergence(self, variational_posterior):
var_mean = np.square(variational_posterior.mean).sum()
var_S = (variational_posterior.variance - np.log(variational_posterior.variance)).sum()
return 0.5 * (var_mean + var_S) - 0.5 * variational_posterior.input_dim * variational_posterior.num_data
def update_gradients_KL(self, variational_posterior):
# dL:
variational_posterior.mean.gradient -= variational_posterior.mean
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class SpikeAndSlabPrior(VariationalPrior):
def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw):
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
self.variance = Param('variance',variance)
self.learnPi = learnPi
if learnPi:
self.pi = Param('Pi', pi, Logistic(1e-10,1.-1e-10))
else:
self.pi = Param('Pi', pi, __fixed__)
self.link_parameter(self.pi)
def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities()
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
var_mean = np.square(mu)/self.variance
var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*(log_gamma-np.log(pi))).sum()+(gamma1*(log_gamma1-np.log(1-pi))).sum()
return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma,gamma1 = variational_posterior.gamma_probabilities()
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
if len(self.pi.shape)==2:
idx = np.unique(gamma._raveled_index()/gamma.shape[-1])
pi = self.pi[idx]
else:
pi = self.pi
variational_posterior.binary_prob.gradient -= (np.log((1-pi)/pi)+log_gamma-log_gamma1+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.)*gamma*gamma1
mu.gradient -= gamma*mu/self.variance
S.gradient -= (1./self.variance - 1./S) * gamma /2.
if self.learnPi:
if len(self.pi)==1:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()
elif len(self.pi.shape)==1:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
else:
self.pi[idx].gradient = (gamma/self.pi[idx] - (1.-gamma)/(1.-self.pi[idx]))
class VariationalPosterior(Parameterized):
def __init__(self, means=None, variances=None, name='latent space', *a, **kw):
super(VariationalPosterior, self).__init__(name=name, *a, **kw)
self.mean = Param("mean", means)
self.variance = Param("variance", variances, Logexp())
self.ndim = self.mean.ndim
self.shape = self.mean.shape
self.num_data, self.input_dim = self.mean.shape
self.link_parameters(self.mean, self.variance)
self.num_data, self.input_dim = self.mean.shape
if self.has_uncertain_inputs():
assert self.variance.shape == self.mean.shape, "need one variance per sample and dimenion"
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient = grad
def _raveled_index(self):
index = np.empty(dtype=int, shape=0)
size = 0
for p in self.parameters:
index = np.hstack((index, p._raveled_index()+size))
size += p._realsize_ if hasattr(p, '_realsize_') else p.size
return index
def has_uncertain_inputs(self):
return not self.variance is None
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc)
n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(VariationalPosterior, self).__getitem__(s)
class NormalPosterior(VariationalPosterior):
'''
NormalPosterior distribution for variational approximations.
holds the means and variances for a factorizing multivariate normal distribution
'''
def plot(self, *args):
"""
Plot latent space X in 1D:
See GPy.plotting.matplot_dep.variational_plots
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots
import matplotlib
return variational_plots.plot(self,*args)
class SpikeAndSlabPosterior(VariationalPosterior):
'''
The SpikeAndSlab distribution for variational approximations.
'''
def __init__(self, means, variances, binary_prob, name='latent space'):
"""
binary_prob : the probability of the distribution on the slab part.
"""
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
self.gamma = Param("binary_prob",binary_prob)
self.link_parameter(self.gamma)
@Cache_this(limit=5)
def gamma_probabilities(self):
prob = np.zeros_like(param_to_array(self.gamma))
prob[self.gamma>-710] = 1./(1.+np.exp(-self.gamma[self.gamma>-710]))
prob1 = -np.zeros_like(param_to_array(self.gamma))
prob1[self.gamma<710] = 1./(1.+np.exp(self.gamma[self.gamma<710]))
return prob, prob1
@Cache_this(limit=5)
def gamma_log_prob(self):
loggamma = param_to_array(self.gamma).copy()
loggamma[loggamma>-40] = -np.log1p(np.exp(-loggamma[loggamma>-40]))
loggamma1 = -param_to_array(self.gamma).copy()
loggamma1[loggamma1>-40] = -np.log1p(np.exp(-loggamma1[loggamma1>-40]))
return loggamma,loggamma1
def set_gradients(self, grad):
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
def __getitem__(self, s):
if isinstance(s, (int, slice, tuple, list, np.ndarray)):
import copy
n = self.__new__(self.__class__, self.name)
dc = self.__dict__.copy()
dc['mean'] = self.mean[s]
dc['variance'] = self.variance[s]
dc['binary_prob'] = self.binary_prob[s]
dc['parameters'] = copy.copy(self.parameters)
n.__dict__.update(dc)
n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize
n.ndim = n.mean.ndim
n.shape = n.mean.shape
n.num_data = n.mean.shape[0]
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
return n
else:
return super(VariationalPrior, self).__getitem__(s)
def plot(self, *args, **kwargs):
"""
Plot latent space X in 1D:
See GPy.plotting.matplot_dep.variational_plots
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots
return variational_plots.plot_SpikeSlab(self,*args, **kwargs)

View file

@ -1,465 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import re
import copy
import cPickle
import warnings
import transformations
class Parameterized(object):
def __init__(self):
"""
This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
"""
self.tied_indices = []
self.fixed_indices = []
self.fixed_values = []
self.constrained_indices = []
self.constraints = []
def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
def _get_param_names(self):
raise NotImplementedError, "this needs to be implemented to use the Parameterized class"
#def _get_print_names(self):
# """ Override for which names to print out, when using print m """
# return self._get_param_names()
def pickle(self, filename, protocol=-1):
with open(filename, 'wb') as f:
cPickle.dump(self, f, protocol=protocol)
def copy(self):
"""Returns a (deep) copy of the current model """
return copy.deepcopy(self)
def __getstate__(self):
if self._has_get_set_state():
return self.getstate()
return self.__dict__
def __setstate__(self, state):
if self._has_get_set_state():
self.setstate(state) # set state
self._set_params(self._get_params()) # restore all values
return
self.__dict__ = state
def _has_get_set_state(self):
return 'getstate' in vars(self.__class__) and 'setstate' in vars(self.__class__)
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
For inheriting from Parameterized:
Allways append the state of the inherited object
and call down to the inherited object in setstate!!
"""
return [self.tied_indices,
self.fixed_indices,
self.fixed_values,
self.constrained_indices,
self.constraints]
def setstate(self, state):
self.constraints = state.pop()
self.constrained_indices = state.pop()
self.fixed_values = state.pop()
self.fixed_indices = state.pop()
self.tied_indices = state.pop()
def __getitem__(self, regexp, return_names=False):
"""
Get a model parameter by name. The name is applied as a regular
expression and all parameters that match that regular expression are
returned.
"""
matches = self.grep_param_names(regexp)
if len(matches):
if return_names:
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._get_params()[matches]
else:
raise AttributeError, "no parameter matches %s" % regexp
def __setitem__(self, name, val):
"""
Set model parameter(s) by name. The name is provided as a regular
expression. All parameters matching that regular expression are set to
the given value.
"""
matches = self.grep_param_names(name)
if len(matches):
val = np.array(val)
assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
x = self._get_params()
x[matches] = val
self._set_params(x)
else:
raise AttributeError, "no parameter matches %s" % name
def tie_params(self, regexp):
"""
Tie (all!) parameters matching the regular expression `regexp`.
"""
matches = self.grep_param_names(regexp)
assert matches.size > 0, "need at least something to tie together"
if len(self.tied_indices):
assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
self.tied_indices.append(matches)
# TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
if hasattr(self, 'prior'):
pass
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
def untie_everything(self):
"""Unties all parameters by setting tied_indices to an empty list."""
self.tied_indices = []
def grep_param_names(self, regexp, transformed=False, search=False):
"""
:param regexp: regular expression to select parameter names
:type regexp: re | str | int
:rtype: the indices of self._get_param_names which match the regular expression.
Note:-
Other objects are passed through - i.e. integers which weren't meant for grepping
"""
if transformed:
names = self._get_param_names_transformed()
else:
names = self._get_param_names()
if type(regexp) in [str, np.string_, np.str]:
regexp = re.compile(regexp)
elif type(regexp) is re._pattern_type:
pass
else:
return regexp
if search:
return np.nonzero([regexp.search(name) for name in names])[0]
else:
return np.nonzero([regexp.match(name) for name in names])[0]
def num_params_transformed(self):
removed = 0
for tie in self.tied_indices:
removed += tie.size - 1
for fix in self.fixed_indices:
removed += fix.size
return len(self._get_params()) - removed
def unconstrain(self, regexp):
"""Unconstrain matching parameters. Does not untie parameters"""
matches = self.grep_param_names(regexp)
# tranformed contraints:
for match in matches:
self.constrained_indices = [i[i <> match] for i in self.constrained_indices]
# remove empty constraints
tmp = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
if tmp:
self.constrained_indices, self.constraints = zip(*[(i, t) for i, t in zip(self.constrained_indices, self.constraints) if len(i)])
self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints)
# fixed:
self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices, values in zip(self.fixed_indices, self.fixed_values)]
self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices]
# remove empty elements
tmp = [(i, v) for i, v in zip(self.fixed_indices, self.fixed_values) if len(i)]
if tmp:
self.fixed_indices, self.fixed_values = zip(*tmp)
self.fixed_indices, self.fixed_values = list(self.fixed_indices), list(self.fixed_values)
else:
self.fixed_indices, self.fixed_values = [], []
def constrain_negative(self, regexp, warning=True):
""" Set negative constraints. """
self.constrain(regexp, transformations.negative_logexp(), warning=warning)
def constrain_positive(self, regexp, warning=True):
""" Set positive constraints. """
self.constrain(regexp, transformations.logexp(), warning=warning)
def constrain_bounded(self, regexp, lower, upper, warning=True):
""" Set bounded constraints. """
self.constrain(regexp, transformations.logistic(lower, upper), warning=warning)
def all_constrained_indices(self):
if len(self.constrained_indices) or len(self.fixed_indices):
return np.hstack(self.constrained_indices + self.fixed_indices)
else:
return np.empty(shape=(0,))
def constrain(self, regexp, transform, warning=True):
assert isinstance(transform, transformations.transformation)
matches = self.grep_param_names(regexp)
if warning:
overlap = set(matches).intersection(set(self.all_constrained_indices()))
if overlap:
self.unconstrain(np.asarray(list(overlap)))
print 'Warning: re-constraining these parameters'
pn = self._get_param_names()
for i in overlap:
print pn[i]
self.constrained_indices.append(matches)
self.constraints.append(transform)
x = self._get_params()
x[matches] = transform.initialize(x[matches])
self._set_params(x)
def constrain_fixed(self, regexp, value=None, warning=True):
"""
:param regexp: which parameters need to be fixed.
:type regexp: ndarray(dtype=int) or regular expression object or string
:param value: the vlaue to fix the parameters to. If the value is not specified,
the parameter is fixed to the current value
:type value: float
**Notes**
Fixing a parameter which is tied to another, or constrained in some way will result in an error.
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes.
"""
matches = self.grep_param_names(regexp)
if warning:
overlap = set(matches).intersection(set(self.all_constrained_indices()))
if overlap:
self.unconstrain(np.asarray(list(overlap)))
print 'Warning: re-constraining these parameters'
pn = self._get_param_names()
for i in overlap:
print pn[i]
self.fixed_indices.append(matches)
if value != None:
self.fixed_values.append(value)
else:
self.fixed_values.append(self._get_params()[self.fixed_indices[-1]])
# self.fixed_values.append(value)
self._set_params_transformed(self._get_params_transformed())
def _get_params_transformed(self):
"""use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
x = self._get_params()
[np.put(x, i, t.finv(x[i])) for i, t in zip(self.constrained_indices, self.constraints)]
to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices]
if len(to_remove):
return np.delete(x, np.hstack(to_remove))
else:
return x
def _set_params_transformed(self, x):
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
self._set_params(self._untransform_params(x))
def _untransform_params(self, x):
"""
The transformation required for _set_params_transformed.
This moves the vector x seen by the optimiser (unconstrained) to the
valid parameter vector seen by the model
Note:
- This function is separate from _set_params_transformed for downstream flexibility
"""
# work out how many places are fixed, and where they are. tricky logic!
fix_places = self.fixed_indices + [t[1:] for t in self.tied_indices]
if len(fix_places):
fix_places = np.hstack(fix_places)
Nfix_places = fix_places.size
else:
Nfix_places = 0
free_places = np.setdiff1d(np.arange(Nfix_places + x.size, dtype=np.int), fix_places)
# put the models values in the vector xx
xx = np.zeros(Nfix_places + free_places.size, dtype=np.float64)
xx[free_places] = x
[np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)]
[np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
[np.put(xx, i, t.f(xx[i])) for i, t in zip(self.constrained_indices, self.constraints)]
if hasattr(self, 'debug'):
stop # @UndefinedVariable
return xx
def _get_param_names_transformed(self):
"""
Returns the parameter names as propagated after constraining,
tying or fixing, i.e. a list of the same length as _get_params_transformed()
"""
n = self._get_param_names()
# remove/concatenate the tied parameter names
if len(self.tied_indices):
for t in self.tied_indices:
n[t[0]] = "<tie>".join([n[tt] for tt in t])
remove = np.hstack([t[1:] for t in self.tied_indices])
else:
remove = np.empty(shape=(0,), dtype=np.int)
# also remove the fixed params
if len(self.fixed_indices):
remove = np.hstack((remove, np.hstack(self.fixed_indices)))
# add markers to show that some variables are constrained
for i, t in zip(self.constrained_indices, self.constraints):
for ii in i:
n[ii] = n[ii] + t.__str__()
n = [nn for i, nn in enumerate(n) if not i in remove]
return n
#@property
#def all(self):
# return self.__str__(self._get_param_names())
#def __str__(self, names=None, nw=30):
def __str__(self, nw=30):
"""
Return a string describing the parameter names and their ties and constraints
"""
names = self._get_param_names()
#if names is None:
# names = self._get_print_names()
#name_indices = self.grep_param_names("|".join(names))
N = len(names)
if not N:
return "This object has no free parameters."
header = ['Name', 'Value', 'Constraints', 'Ties']
values = self._get_params() # map(str,self._get_params())
#values = self._get_params()[name_indices] # map(str,self._get_params())
# sort out the constraints
constraints = [''] * len(names)
#constraints = [''] * len(self._get_param_names())
for i, t in zip(self.constrained_indices, self.constraints):
for ii in i:
constraints[ii] = t.__str__()
for i in self.fixed_indices:
for ii in i:
constraints[ii] = 'Fixed'
# sort out the ties
ties = [''] * len(names)
for i, tie in enumerate(self.tied_indices):
for j in tie:
ties[j] = '(' + str(i) + ')'
if values.size == 1:
values = ['%.4f' %float(values)]
else:
values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
# columns = cols.sum()
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0])
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
return ('\n'.join([header_string[0], separator] + param_string)) + '\n'
def grep_model(self,regexp):
regexp_indices = self.grep_param_names(regexp)
all_names = self._get_param_names()
names = [all_names[pj] for pj in regexp_indices]
N = len(names)
if not N:
return "Match not found."
header = ['Name', 'Value', 'Constraints', 'Ties']
all_values = self._get_params()
values = np.array([all_values[pj] for pj in regexp_indices])
constraints = [''] * len(names)
_constrained_indices,aux = self._pick_elements(regexp_indices,self.constrained_indices)
_constraints = [self.constraints[pj] for pj in aux]
for i, t in zip(_constrained_indices, _constraints):
for ii in i:
iii = regexp_indices.tolist().index(ii)
constraints[iii] = t.__str__()
_fixed_indices,aux = self._pick_elements(regexp_indices,self.fixed_indices)
for i in _fixed_indices:
for ii in i:
iii = regexp_indices.tolist().index(ii)
constraints[ii] = 'Fixed'
_tied_indices,aux = self._pick_elements(regexp_indices,self.tied_indices)
ties = [''] * len(names)
for i,ti in zip(_tied_indices,aux):
for ii in i:
iii = regexp_indices.tolist().index(ii)
ties[iii] = '(' + str(ti) + ')'
if values.size == 1:
values = ['%.4f' %float(values)]
else:
values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
max_values = max([len(values[i]) for i in range(len(values))] + [len(header[1])])
max_constraint = max([len(constraints[i]) for i in range(len(constraints))] + [len(header[2])])
max_ties = max([len(ties[i]) for i in range(len(ties))] + [len(header[3])])
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-' * len(header_string[0])
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
print header_string[0]
print separator
for string in param_string:
print string
def _pick_elements(self,regexp_ind,array_list):
"""Removes from array_list the elements different from regexp_ind"""
new_array_list = [] #New list with elements matching regexp_ind
array_indices = [] #Indices that matches the arrays in new_array_list and array_list
array_index = 0
for array in array_list:
_new = []
for ai in array:
if ai in regexp_ind:
_new.append(ai)
if len(_new):
new_array_list.append(np.array(_new))
array_indices.append(array_index)
array_index += 1
return new_array_list, array_indices

View file

@ -1,217 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from scipy.special import gammaln, digamma
from ..util.linalg import pdinv
from GPy.core.domains import REAL, POSITIVE
import warnings
class Prior:
domain = None
def pdf(self, x):
return np.exp(self.lnpdf(x))
def plot(self):
rvs = self.rvs(1000)
pb.hist(rvs, 100, normed=True)
xmin, xmax = pb.xlim()
xx = np.linspace(xmin, xmax, 1000)
pb.plot(xx, self.pdf(xx), 'r', linewidth=2)
class Gaussian(Prior):
"""
Implementation of the univariate Gaussian probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = REAL
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma)
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "N(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
def lnpdf(self, x):
return self.constant - 0.5 * np.square(x - self.mu) / self.sigma2
def lnpdf_grad(self, x):
return -(x - self.mu) / self.sigma2
def rvs(self, n):
return np.random.randn(n) * self.sigma + self.mu
class LogGaussian(Prior):
"""
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = POSITIVE
def __init__(self, mu, sigma):
self.mu = float(mu)
self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma)
self.constant = -0.5 * np.log(2 * np.pi * self.sigma2)
def __str__(self):
return "lnN(" + str(np.round(self.mu)) + ', ' + str(np.round(self.sigma2)) + ')'
def lnpdf(self, x):
return self.constant - 0.5 * np.square(np.log(x) - self.mu) / self.sigma2 - np.log(x)
def lnpdf_grad(self, x):
return -((np.log(x) - self.mu) / self.sigma2 + 1.) / x
def rvs(self, n):
return np.exp(np.random.randn(n) * self.sigma + self.mu)
class MultivariateGaussian:
"""
Implementation of the multivariate Gaussian probability function, coupled with random variables.
:param mu: mean (N-dimensional array)
:param var: covariance matrix (NxN)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = REAL
def __init__(self, mu, var):
self.mu = np.array(mu).flatten()
self.var = np.array(var)
assert len(self.var.shape) == 2
assert self.var.shape[0] == self.var.shape[1]
assert self.var.shape[0] == self.mu.size
self.input_dim = self.mu.size
self.inv, self.hld = pdinv(self.var)
self.constant = -0.5 * self.input_dim * np.log(2 * np.pi) - self.hld
def summary(self):
raise NotImplementedError
def pdf(self, x):
return np.exp(self.lnpdf(x))
def lnpdf(self, x):
d = x - self.mu
return self.constant - 0.5 * np.sum(d * np.dot(d, self.inv), 1)
def lnpdf_grad(self, x):
d = x - self.mu
return -np.dot(self.inv, d)
def rvs(self, n):
return np.random.multivariate_normal(self.mu, self.var, n)
def plot(self):
if self.input_dim == 2:
rvs = self.rvs(200)
pb.plot(rvs[:, 0], rvs[:, 1], 'kx', mew=1.5)
xmin, xmax = pb.xlim()
ymin, ymax = pb.ylim()
xx, yy = np.mgrid[xmin:xmax:100j, ymin:ymax:100j]
xflat = np.vstack((xx.flatten(), yy.flatten())).T
zz = self.pdf(xflat).reshape(100, 100)
pb.contour(xx, yy, zz, linewidths=2)
def gamma_from_EV(E, V):
warnings.warn("use Gamma.from_EV to create Gamma Prior", FutureWarning)
return Gamma.from_EV(E, V)
class Gamma(Prior):
"""
Implementation of the Gamma probability function, coupled with random variables.
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = POSITIVE
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "Ga(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
def summary(self):
ret = {"E[x]": self.a / self.b, \
"E[ln x]": digamma(self.a) - np.log(self.b), \
"var[x]": self.a / self.b / self.b, \
"Entropy": gammaln(self.a) - (self.a - 1.) * digamma(self.a) - np.log(self.b) + self.a}
if self.a > 1:
ret['Mode'] = (self.a - 1.) / self.b
else:
ret['mode'] = np.nan
return ret
def lnpdf(self, x):
return self.constant + (self.a - 1) * np.log(x) - self.b * x
def lnpdf_grad(self, x):
return (self.a - 1.) / x - self.b
def rvs(self, n):
return np.random.gamma(scale=1. / self.b, shape=self.a, size=n)
@staticmethod
def from_EV(E, V):
"""
Creates an instance of a Gamma Prior by specifying the Expected value(s)
and Variance(s) of the distribution.
:param E: expected value
:param V: variance
"""
a = np.square(E) / V
b = E / V
return Gamma(a, b)
class inverse_gamma(Prior):
"""
Implementation of the inverse-Gamma probability function, coupled with random variables.
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
"""
domain = POSITIVE
def __init__(self, a, b):
self.a = float(a)
self.b = float(b)
self.constant = -gammaln(self.a) + a * np.log(b)
def __str__(self):
return "iGa(" + str(np.round(self.a)) + ', ' + str(np.round(self.b)) + ')'
def lnpdf(self, x):
return self.constant - (self.a + 1) * np.log(x) - self.b / x
def lnpdf_grad(self, x):
return -(self.a + 1.) / x + self.b / x ** 2
def rvs(self, n):
return 1. / np.random.gamma(scale=1. / self.b, shape=self.a, size=n)

View file

@ -1,16 +1,28 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import pylab as pb from gp import GP
from ..util.linalg import mdot, jitchol, tdot, symmetrify, backsub_both_sides, chol_inv, dtrtrs, dpotrs, dpotri from parameterization.param import Param
from scipy import linalg from ..inference.latent_function_inference import var_dtc
from ..likelihoods import Gaussian, EP,EP_Mixed_Noise from .. import likelihoods
from gp_base import GPBase from parameterization.variational import VariationalPosterior
class SparseGP(GPBase): import logging
from GPy.inference.latent_function_inference.posterior import Posterior
from GPy.inference.optimization.stochastics import SparseGPStochastics,\
SparseGPMissing
#no stochastics.py file added! from GPy.inference.optimization.stochastics import SparseGPStochastics,\
#SparseGPMissing
logger = logging.getLogger("sparse gp")
class SparseGP(GP):
""" """
Variational sparse GP model A general purpose Sparse GP model
This model allows (approximate) inference using variational DTC or FITC
(Gaussian likelihoods) as well as non-conjugate sparse methods based on
these.
:param X: inputs :param X: inputs
:type X: np.ndarray (num_data x input_dim) :type X: np.ndarray (num_data x input_dim)
@ -20,478 +32,101 @@ class SparseGP(GPBase):
:type kernel: a GPy.kern.kern instance :type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance) :param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None :type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs
:type Z: np.ndarray (num_inducing x input_dim) | None :type Z: np.ndarray (num_inducing x input_dim)
:param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None) :param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type num_inducing: int :type num_inducing: int
:param normalize_(X|Y): whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
""" """
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
GPBase.__init__(self, X, likelihood, kernel, normalize_X=normalize_X) name='sparse gp', Y_metadata=None, normalizer=False):
#pick a sensible inference method
if inference_method is None:
if isinstance(likelihood, likelihoods.Gaussian):
inference_method = var_dtc.VarDTC(limit=1 if not self.missing_data else Y.shape[1])
else:
#inference_method = ??
raise NotImplementedError, "what to do what to do?"
print "defaulting to ", inference_method, "for latent function inference"
self.Z = Z self.Z = Param('inducing inputs', Z)
self.num_inducing = Z.shape[0] self.num_inducing = Z.shape[0]
self.backsub = 0
GP.__init__(self, X, Y, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
if X_variance is None:
self.has_uncertain_inputs = False logger.info("Adding Z as parameter")
self.X_variance = None self.link_parameter(self.Z, index=0)
self.posterior = None
def has_uncertain_inputs(self):
return isinstance(self.X, VariationalPosterior)
def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior):
#gradients wrt kernel
dL_dKmm = self.grad_dict['dL_dKmm']
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
kerngrad = self.kern.gradient.copy()
self.kern.update_gradients_expectations(variational_posterior=self.X,
Z=self.Z,
dL_dpsi0=self.grad_dict['dL_dpsi0'],
dL_dpsi1=self.grad_dict['dL_dpsi1'],
dL_dpsi2=self.grad_dict['dL_dpsi2'])
self.kern.gradient += kerngrad
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi0'],
self.grad_dict['dL_dpsi1'],
self.grad_dict['dL_dpsi2'],
Z=self.Z,
variational_posterior=self.X)
else: else:
assert X_variance.shape == X.shape #gradients wrt kernel
self.has_uncertain_inputs = True self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
self.X_variance = X_variance kerngrad = self.kern.gradient.copy()
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
if normalize_X: kerngrad += self.kern.gradient
self.Z = (self.Z.copy() - self._Xoffset) / self._Xscale self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern.gradient += kerngrad
# normalize X uncertainty also #gradients wrt Z
if self.has_uncertain_inputs: self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.X_variance /= np.square(self._Xscale) self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
self._const_jitter = None
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X, self.X_variance)
self.psi1 = self.kern.psi1(self.Z, self.X, self.X_variance)
self.psi2 = self.kern.psi2(self.Z, self.X, self.X_variance)
else:
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.X, self.Z)
self.psi2 = None
def _computations(self):
if self._const_jitter is None or not(self._const_jitter.shape[0] == self.num_inducing):
self._const_jitter = np.eye(self.num_inducing) * 1e-7
# factor Kmm
self._Lm = jitchol(self.Kmm + self._const_jitter)
if not self.backsub:
self._LmInv = linalg.lapack.dtrtri(self._Lm, lower=1)[0] # TODO: not needed in old version
# The rather complex computations of self._A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.num_data, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
if self.backsub:
evals, evecs = linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
if not np.array_equal(evals, clipped_evals):
pass # print evals
tmp = evecs * np.sqrt(clipped_evals)
tmp = tmp.T
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
self._A = tdot(tmp)
else:
self._A = np.dot(np.dot(self._LmInv,
psi2_beta),
self._LmInv.T)
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision.flatten().reshape(self.num_data, 1)))
else:
tmp = self.psi1 * (np.sqrt(self.likelihood.precision))
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(tmp.T), lower=1)
self._A = tdot(tmp)
# factor B
self.B = np.eye(self.num_inducing) + self._A
self.LB = jitchol(self.B)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
self.psi1Vf = np.dot(self.psi1.T, self.likelihood.VVT_factor)
if 1:#self.backsub:
# back substutue C into psi1Vf
tmp, info1 = dtrtrs(self._Lm, np.asfortranarray(self.psi1Vf), lower=1, trans=0)
self._LBi_Lmi_psi1Vf, _ = dtrtrs(self.LB, np.asfortranarray(tmp), lower=1, trans=0)
# tmp, info2 = dpotrs(self.LB, tmp, lower=1)
tmp, info2 = dtrtrs(self.LB, self._LBi_Lmi_psi1Vf, lower=1, trans=1)
self.Cpsi1Vf, info3 = dtrtrs(self._Lm, tmp, lower=1, trans=1)
else:
# slower, but more stable (?) version:
tmp = np.dot(self._LmInv, self.psi1Vf)
self._LBInv = linalg.lapack.dtrtri(self.LB, lower=True)[0]
self._LBi_Lmi_psi1Vf = np.dot(self._LBInv, tmp)
tmp = np.dot(self._LBInv.T, self._LBi_Lmi_psi1Vf)
self.Cpsi1Vf = np.dot(self._LmInv.T, tmp)
#import ipdb;ipdb.set_trace()
# Compute dL_dKmm
tmp = tdot(self._LBi_Lmi_psi1Vf)
self.data_fit = np.trace(tmp)
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.output_dim * np.eye(self.num_inducing) + tmp)
tmp = -0.5 * self.DBi_plus_BiPBi
tmp += -0.5 * self.B * self.output_dim
tmp += self.output_dim * np.eye(self.num_inducing)
self.dL_dKmm = backsub_both_sides(self._Lm, tmp)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
self.dL_dpsi0 = -0.5 * self.output_dim * (self.likelihood.precision * np.ones([self.num_data, 1])).flatten()
self.dL_dpsi1 = np.dot(self.likelihood.VVT_factor, self.Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(self._Lm, self.output_dim * np.eye(self.num_inducing) - self.DBi_plus_BiPBi)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = self.likelihood.precision.flatten()[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (self.psi1 * self.likelihood.precision.reshape(self.num_data, 1)).T).T
self.dL_dpsi2 = None
else:
dL_dpsi2 = self.likelihood.precision * dL_dpsi2_beta
if self.has_uncertain_inputs:
# repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], self.num_data, axis=0)
else:
# subsume back into psi1 (==Kmn)
self.dL_dpsi1 += 2.*np.dot(self.psi1, dL_dpsi2)
self.dL_dpsi2 = None
# the partial derivative vector for the likelihood def _raw_predict(self, Xnew, full_cov=False, kern=None):
if self.likelihood.num_params == 0:
# save computation here.
self.partial_for_likelihood = None
elif self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else:
LBi = chol_inv(self.LB)
Lmi_psi1, nil = dtrtrs(self._Lm, np.asfortranarray(self.psi1.T), lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(self.LB, np.asfortranarray(Lmi_psi1), lower=1, trans=0)
self.partial_for_likelihood = -0.5 * self.likelihood.precision + 0.5 * self.likelihood.V**2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0 - np.sum(Lmi_psi1**2,0))[:,None] * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*self.likelihood.precision**2
self.partial_for_likelihood += -np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * self.likelihood.Y * self.likelihood.precision**2
self.partial_for_likelihood += 0.5*np.dot(self._LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * self.likelihood.precision**2
else:
# likelihood is not heteroscedatic
self.partial_for_likelihood = -0.5 * self.num_data * self.output_dim * self.likelihood.precision + 0.5 * self.likelihood.trYYT * self.likelihood.precision ** 2
self.partial_for_likelihood += 0.5 * self.output_dim * (self.psi0.sum() * self.likelihood.precision ** 2 - np.trace(self._A) * self.likelihood.precision)
self.partial_for_likelihood += self.likelihood.precision * (0.5 * np.sum(self._A * self.DBi_plus_BiPBi) - self.data_fit)
def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """
if self.likelihood.is_heteroscedastic:
A = -0.5 * self.num_data * self.output_dim * np.log(2.*np.pi) + 0.5 * np.sum(np.log(self.likelihood.precision)) - 0.5 * np.sum(self.likelihood.V * self.likelihood.Y)
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision.flatten() * self.psi0) - np.trace(self._A))
else:
A = -0.5 * self.num_data * self.output_dim * (np.log(2.*np.pi) - np.log(self.likelihood.precision)) - 0.5 * self.likelihood.precision * self.likelihood.trYYT
B = -0.5 * self.output_dim * (np.sum(self.likelihood.precision * self.psi0) - np.trace(self._A))
C = -self.output_dim * (np.sum(np.log(np.diag(self.LB)))) # + 0.5 * self.num_inducing * np.log(sf2))
D = 0.5 * self.data_fit
self._A_part, self._B_part, self._C_part, self._D_part = A, B, C, D
return A + B + C + D + self.likelihood.Z
def _set_params(self, p):
self.Z = p[:self.num_inducing * self.input_dim].reshape(self.num_inducing, self.input_dim)
self.kern._set_params(p[self.Z.size:self.Z.size + self.kern.num_params])
self.likelihood._set_params(p[self.Z.size + self.kern.num_params:])
self._compute_kernel_matrices()
self._computations()
self.Cpsi1V = None
def _get_params(self):
return np.hstack([self.Z.flatten(), self.kern._get_params_transformed(), self.likelihood._get_params()])
def _get_param_names(self):
return sum([['iip_%i_%i' % (i, j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])], [])\
+ self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
#def _get_print_names(self):
# return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def update_likelihood_approximation(self, **kwargs):
""" """
Approximates a non-gaussian likelihood using Expectation Propagation Make a prediction for the latent function values
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
if not isinstance(self.likelihood, Gaussian): # Updates not needed for Gaussian likelihood
self.likelihood.restart()
if self.has_uncertain_inputs:
Lmi = chol_inv(self._Lm)
Kmmi = tdot(Lmi.T)
diag_tr_psi2Kmmi = np.array([np.trace(psi2_Kmmi) for psi2_Kmmi in np.dot(self.psi2, Kmmi)])
self.likelihood.fit_FITC(self.Kmm, self.psi1.T, diag_tr_psi2Kmmi, **kwargs) # This uses the fit_FITC code, but does not perfomr a FITC-EP.#TODO solve potential confusion
# raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm, self.psi1.T, **kwargs)
# self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dZ().flatten(), self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))
def dL_dtheta(self):
"""
Compute and return the derivative of the log marginal likelihood wrt the parameters of the kernel
"""
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X)
return dL_dtheta
def dL_dZ(self):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = self.kern.dK_dX(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1.T, self.Z, self.X)
return dL_dZ
def _raw_predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False):
"""
Internal helper function for making predictions, does not account for
normalization or likelihood function
""" """
Bi, _ = dpotri(self.LB, lower=0) # WTH? this lower switch should be 1, but that doesn't work! if kern is None: kern = self.kern
symmetrify(Bi)
Kmmi_LmiBLmi = backsub_both_sides(self._Lm, np.eye(self.num_inducing) - Bi)
if self.Cpsi1V is None: if not isinstance(Xnew, VariationalPosterior):
psi1V = np.dot(self.psi1.T, self.likelihood.V) Kx = kern.K(self.Z, Xnew)
tmp, _ = dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) mu = np.dot(Kx.T, self.posterior.woodbury_vector)
tmp, _ = dpotrs(self.LB, tmp, lower=1)
self.Cpsi1V, _ = dtrtrs(self._Lm, tmp, lower=1, trans=1)
if X_variance_new is None:
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
mu = np.dot(Kx.T, self.Cpsi1V)
if full_cov: if full_cov:
Kxx = self.kern.K(Xnew, which_parts=which_parts) Kxx = kern.K(Xnew)
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) # NOTE this won't work for plotting if self.posterior.woodbury_inv.ndim == 2:
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
elif self.posterior.woodbury_inv.ndim == 3:
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
var = var
else: else:
Kxx = self.kern.Kdiag(Xnew, which_parts=which_parts) Kxx = kern.Kdiag(Xnew)
var = Kxx - np.sum(Kx * np.dot(Kmmi_LmiBLmi, Kx), 0) var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
else: else:
# assert which_parts=='all', "swithching out parts of variational kernels is not implemented" Kx = kern.psi1(self.Z, Xnew)
Kx = self.kern.psi1(self.Z, Xnew, X_variance_new) # , which_parts=which_parts) TODO: which_parts mu = np.dot(Kx, self.posterior.woodbury_vector)
mu = np.dot(Kx, self.Cpsi1V)
if full_cov: if full_cov:
raise NotImplementedError, "TODO" raise NotImplementedError, "TODO"
else: else:
Kxx = self.kern.psi0(self.Z, Xnew, X_variance_new) Kxx = kern.psi0(self.Z, Xnew)
psi2 = self.kern.psi2(self.Z, Xnew, X_variance_new) psi2 = kern.psi2(self.Z, Xnew)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var
return mu, var[:, None]
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False, **likelihood_args):
"""
Predict the function(s) at the new point(s) Xnew.
**Arguments**
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.input_dim
:param X_variance_new: The uncertainty in the prediction points
:type X_variance_new: np.ndarray, Nnew x self.input_dim
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the full covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.input_dim
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
if X_variance_new is not None:
X_variance_new = X_variance_new / self._Xscale ** 2
# here's the actual prediction by the GP model
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, **likelihood_args)
return mean, var, _025pm, _975pm
def plot_f(self, samples=0, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', resolution=None,
full_cov=False, fignum=None, ax=None):
"""
Plot the GP's view of the world, where the data is normalized and the
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- Not implemented in higher dimensions
:param samples: the number of a posteriori samples to plot
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_data_rows: which if the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param full_cov:
:type full_cov: bool
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:param output: which output to plot (for multiple output models only)
:type output: integer (first output is 0)
"""
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
if fignum is None and ax is None:
fignum = fig.num
if which_data_rows is 'all':
which_data_rows = slice(None)
GPBase.plot_f(self, samples=samples, plot_limits=plot_limits, which_data_rows=which_data_rows, which_data_ycols=which_data_ycols, which_parts=which_parts, resolution=resolution, fignum=fignum, ax=ax)
if self.X.shape[1] == 1:
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self, plot_limits=None, which_data_rows='all',
which_data_ycols='all', which_parts='all', fixed_inputs=[],
plot_raw=False,
levels=20, samples=0, fignum=None, ax=None, resolution=None):
"""
Plot the posterior of the sparse GP.
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
- In two dimsensions, a contour-plot shows the mean predicted function
- In higher dimensions, use fixed_inputs to plot the GP with some of the inputs fixed.
Can plot only part of the data and part of the posterior functions
using which_data_rowsm which_data_ycols and which_parts
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:type plot_limits: np.array
:param which_data_rows: which of the training data to plot (default all)
:type which_data_rows: 'all' or a slice object to slice self.X, self.Y
:param which_data_ycols: when the data has several columns (independant outputs), only plot these
:type which_data_rows: 'all' or a list of integers
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param fixed_inputs: a list of tuple [(i,v), (i,v)...], specifying that input index i should be set to value v.
:type fixed_inputs: a list of tuples
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
:type resolution: int
:param levels: number of levels to plot in a contour plot.
:type levels: int
:param samples: the number of a posteriori samples to plot
:type samples: int
:param fignum: figure to plot on.
:type fignum: figure number
:param ax: axes to plot on.
:type ax: axes handle
:type output: integer (first output is 0)
:param linecol: color of line to plot.
:type linecol:
:param fillcol: color of fill
:param levels: for 2D plotting, the number of contour levels to use is ax is None, create a new figure
"""
#deal work out which ax to plot on
#Need these because we use which_data_rows in this function not just base
if which_data_rows == 'all':
which_data_rows = slice(None)
if which_data_ycols == 'all':
which_data_ycols = np.arange(self.output_dim)
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
#work out what the inputs are for plotting (1D or 2D)
fixed_dims = np.array([i for i,v in fixed_inputs])
free_dims = np.setdiff1d(np.arange(self.input_dim),fixed_dims)
#call the base plotting
GPBase.plot(self, samples=samples, plot_limits=plot_limits,
which_data_rows=which_data_rows,
which_data_ycols=which_data_ycols, fixed_inputs=fixed_inputs,
which_parts=which_parts, resolution=resolution, levels=20,
fignum=fignum, ax=ax)
if len(free_dims) == 1:
#plot errorbars for the uncertain inputs
if self.has_uncertain_inputs:
Xu = self.X * self._Xscale + self._Xoffset # NOTE self.X are the normalized values now
ax.errorbar(Xu[which_data_rows, 0], self.likelihood.data[which_data_rows, 0],
xerr=2 * np.sqrt(self.X_variance[which_data_rows, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#plot the inducing inputs
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu, np.zeros_like(Zu) + ax.get_ylim()[0], 'r|', mew=1.5, markersize=12)
elif len(free_dims) == 2:
Zu = self.Z * self._Xscale + self._Xoffset
ax.plot(Zu[:, 0], Zu[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return GPBase.getstate(self) + [self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance]
def setstate(self, state):
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
GPBase.setstate(self, state)

120
GPy/core/sparse_gp_mpi.py Normal file
View file

@ -0,0 +1,120 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from sparse_gp import SparseGP
from numpy.linalg.linalg import LinAlgError
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
import logging
logger = logging.getLogger("sparse gp mpi")
class SparseGP_MPI(SparseGP):
"""
A general purpose Sparse GP model with MPI parallelization support
This model allows (approximate) inference using variational DTC or FITC
(Gaussian likelihoods) as well as non-conjugate sparse methods based on
these.
:param X: inputs
:type X: np.ndarray (num_data x input_dim)
:param likelihood: a likelihood instance, containing the observed data
:type likelihood: GPy.likelihood.(Gaussian | EP | Laplace)
:param kernel: the kernel (covariance function). See link kernels
:type kernel: a GPy.kern.kern instance
:param X_variance: The uncertainty in the measurements of X (Gaussian variance)
:type X_variance: np.ndarray (num_data x input_dim) | None
:param Z: inducing inputs
:type Z: np.ndarray (num_inducing x input_dim)
:param num_inducing: Number of inducing points (optional, default 10. Ignored if Z is not None)
:type num_inducing: int
:param mpi_comm: The communication group of MPI, e.g. mpi4py.MPI.COMM_WORLD
:type mpi_comm: mpi4py.MPI.Intracomm
"""
def __init__(self, X, Y, Z, kernel, likelihood, variational_prior=None, inference_method=None, name='sparse gp mpi', Y_metadata=None, mpi_comm=None, normalizer=False):
self._IN_OPTIMIZATION_ = False
if mpi_comm != None:
if inference_method is None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else:
assert isinstance(inference_method, VarDTC_minibatch), 'inference_method has to support MPI!'
super(SparseGP_MPI, self).__init__(X, Y, Z, kernel, likelihood, inference_method=inference_method, name=name, Y_metadata=Y_metadata, normalizer=normalizer)
self.update_model(False)
if variational_prior is not None:
self.link_parameter(variational_prior)
self.mpi_comm = mpi_comm
# Manage the data (Y) division
if mpi_comm != None:
from ..util.parallel import divide_data
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm.rank, mpi_comm.size)
self.N_range = (N_start, N_end)
self.N_list = np.array(N_list)
self.Y_local = self.Y[N_start:N_end]
print 'MPI RANK '+str(self.mpi_comm.rank)+' with the data range '+str(self.N_range)
mpi_comm.Bcast(self.param_array, root=0)
self.update_model(True)
def __getstate__(self):
dc = super(SparseGP_MPI, self).__getstate__()
dc['mpi_comm'] = None
if self.mpi_comm != None:
del dc['N_range']
del dc['N_list']
del dc['Y_local']
if 'normalizer' not in dc:
dc['normalizer'] = None
dc['Y_normalized'] = dc['Y']
return dc
#=====================================================
# The MPI parallelization
# - can move to model at some point
#=====================================================
@SparseGP.optimizer_array.setter
def optimizer_array(self, p):
if self.mpi_comm != None:
if self._IN_OPTIMIZATION_ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
SparseGP.optimizer_array.fset(self,p)
def optimize(self, optimizer=None, start=None, **kwargs):
self._IN_OPTIMIZATION_ = True
if self.mpi_comm==None:
super(SparseGP_MPI, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(SparseGP_MPI, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self.optimizer_array.copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
try:
self.optimizer_array = x
self._fail_count = 0
except (LinAlgError, ZeroDivisionError, ValueError):
if self._fail_count >= self._allowed_failures:
raise
self._fail_count += 1
elif flag==-1:
break
else:
self._IN_OPTIMIZATION_ = False
raise Exception("Unrecognizable flag for synchronization!")
self._IN_OPTIMIZATION_ = False
def parameters_changed(self):
if isinstance(self.inference_method,VarDTC_minibatch):
update_gradients(self, mpi_comm=self.mpi_comm)
else:
super(SparseGP_MPI,self).parameters_changed()

View file

@ -1,512 +0,0 @@
# Copyright (c) 2012, James Hensman and Nicolo' Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from .. import kern
from ..util.linalg import pdinv, mdot, tdot, dpotrs, dtrtrs, jitchol, backsub_both_sides
from ..likelihoods import EP
from gp_base import GPBase
from model import Model
import time
import sys
class SVIGP(GPBase):
"""
Stochastic Variational inference in a Gaussian Process
:param X: inputs
:type X: np.ndarray (num_data x num_inputs)
:param Y: observed data
:type Y: np.ndarray of observations (num_data x output_dim)
:param batchsize: the size of a minibatch
:param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray
:param kernel: the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs
:type Z: np.ndarray (num_inducing x num_inputs)
"""
def __init__(self, X, likelihood, kernel, Z, q_u=None, batchsize=10, X_variance=None):
GPBase.__init__(self, X, likelihood, kernel, normalize_X=False)
self.batchsize=batchsize
self.Y = self.likelihood.Y.copy()
self.Z = Z
self.num_inducing = Z.shape[0]
self.batchcounter = 0
self.epochs = 0
self.iterations = 0
self.vb_steplength = 0.05
self.param_steplength = 1e-5
self.momentum = 0.9
if X_variance is None:
self.has_uncertain_inputs = False
else:
self.has_uncertain_inputs = True
self.X_variance = X_variance
if q_u is None:
q_u = np.hstack((np.random.randn(self.num_inducing*self.output_dim),-.5*np.eye(self.num_inducing).flatten()))
self.set_vb_param(q_u)
self._permutation = np.random.permutation(self.num_data)
self.load_batch()
self._param_trace = []
self._ll_trace = []
self._grad_trace = []
#set the adaptive steplength parameters
self.hbar_t = 0.0
self.tau_t = 100.0
self.gbar_t = 0.0
self.gbar_t1 = 0.0
self.gbar_t2 = 0.0
self.hbar_tp = 0.0
self.tau_tp = 10000.0
self.gbar_tp = 0.0
self.adapt_param_steplength = True
self.adapt_vb_steplength = True
self._param_steplength_trace = []
self._vb_steplength_trace = []
self.ensure_default_constraints()
def getstate(self):
steplength_params = [self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength]
return GPBase.getstate(self) + \
[self.get_vb_param(),
self.Z,
self.num_inducing,
self.has_uncertain_inputs,
self.X_variance,
self.X_batch,
self.X_variance_batch,
steplength_params,
self.batchcounter,
self.batchsize,
self.epochs,
self.momentum,
self.data_prop,
self._param_trace,
self._param_steplength_trace,
self._vb_steplength_trace,
self._ll_trace,
self._grad_trace,
self.Y,
self._permutation,
self.iterations
]
def setstate(self, state):
self.iterations = state.pop()
self._permutation = state.pop()
self.Y = state.pop()
self._grad_trace = state.pop()
self._ll_trace = state.pop()
self._vb_steplength_trace = state.pop()
self._param_steplength_trace = state.pop()
self._param_trace = state.pop()
self.data_prop = state.pop()
self.momentum = state.pop()
self.epochs = state.pop()
self.batchsize = state.pop()
self.batchcounter = state.pop()
steplength_params = state.pop()
(self.hbar_t, self.tau_t, self.gbar_t, self.gbar_t1, self.gbar_t2, self.hbar_tp, self.tau_tp, self.gbar_tp, self.adapt_param_steplength, self.adapt_vb_steplength, self.vb_steplength, self.param_steplength) = steplength_params
self.X_variance_batch = state.pop()
self.X_batch = state.pop()
self.X_variance = state.pop()
self.has_uncertain_inputs = state.pop()
self.num_inducing = state.pop()
self.Z = state.pop()
vb_param = state.pop()
GPBase.setstate(self, state)
self.set_vb_param(vb_param)
def _compute_kernel_matrices(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
self.psi0 = self.kern.psi0(self.Z, self.X_batch, self.X_variance_batch)
self.psi1 = self.kern.psi1(self.Z, self.X_batch, self.X_variance_batch)
self.psi2 = self.kern.psi2(self.Z, self.X_batch, self.X_variance_batch)
else:
self.psi0 = self.kern.Kdiag(self.X_batch)
self.psi1 = self.kern.K(self.X_batch, self.Z)
self.psi2 = None
def dL_dtheta(self):
dL_dtheta = self.kern.dK_dtheta(self.dL_dKmm, self.Z)
if self.has_uncertain_inputs:
dL_dtheta += self.kern.dpsi0_dtheta(self.dL_dpsi0, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi1_dtheta(self.dL_dpsi1, self.Z, self.X_batch, self.X_variance_batch)
dL_dtheta += self.kern.dpsi2_dtheta(self.dL_dpsi2, self.Z, self.X_batch, self.X_variance_batch)
else:
dL_dtheta += self.kern.dK_dtheta(self.dL_dpsi1, self.X_batch, self.Z)
dL_dtheta += self.kern.dKdiag_dtheta(self.dL_dpsi0, self.X_batch)
return dL_dtheta
def _set_params(self, p, computations=True):
self.kern._set_params_transformed(p[:self.kern.num_params])
self.likelihood._set_params(p[self.kern.num_params:])
if computations:
self._compute_kernel_matrices()
self._computations()
def _get_params(self):
return np.hstack((self.kern._get_params_transformed() , self.likelihood._get_params()))
def _get_param_names(self):
return self.kern._get_param_names_transformed() + self.likelihood._get_param_names()
def load_batch(self):
"""
load a batch of data (set self.X_batch and self.likelihood.Y from self.X, self.Y)
"""
#if we've seen all the data, start again with them in a new random order
if self.batchcounter+self.batchsize > self.num_data:
self.batchcounter = 0
self.epochs += 1
self._permutation = np.random.permutation(self.num_data)
this_perm = self._permutation[self.batchcounter:self.batchcounter+self.batchsize]
self.X_batch = self.X[this_perm]
self.likelihood.set_data(self.Y[this_perm])
if self.has_uncertain_inputs:
self.X_variance_batch = self.X_variance[this_perm]
self.batchcounter += self.batchsize
self.data_prop = float(self.batchsize)/self.num_data
self._compute_kernel_matrices()
self._computations()
def _computations(self,do_Kmm=True, do_Kmm_grad=True):
"""
All of the computations needed. Some are optional, see kwargs.
"""
if do_Kmm:
self.Lm = jitchol(self.Kmm)
# The rather complex computations of self.A
if self.has_uncertain_inputs:
if self.likelihood.is_heteroscedastic:
psi2_beta = (self.psi2 * (self.likelihood.precision.flatten().reshape(self.batchsize, 1, 1))).sum(0)
else:
psi2_beta = self.psi2.sum(0) * self.likelihood.precision
evals, evecs = np.linalg.eigh(psi2_beta)
clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
tmp = evecs * np.sqrt(clipped_evals)
else:
if self.likelihood.is_heteroscedastic:
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision.flatten().reshape(1, self.batchsize)))
else:
tmp = self.psi1.T * (np.sqrt(self.likelihood.precision))
tmp, _ = dtrtrs(self.Lm, np.asfortranarray(tmp), lower=1)
self.A = tdot(tmp)
self.V = self.likelihood.precision*self.likelihood.Y
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
self.psi1V = np.dot(self.psi1.T, self.V)
self.B = np.eye(self.num_inducing)*self.data_prop + self.A
self.Lambda = backsub_both_sides(self.Lm, self.B.T)
self.LQL = backsub_both_sides(self.Lm,self.q_u_expectation[1].T,transpose='right')
self.trace_K = self.psi0.sum() - np.trace(self.A)/self.likelihood.precision
self.Kmmi_m, _ = dpotrs(self.Lm, self.q_u_expectation[0], lower=1)
self.projected_mean = np.dot(self.psi1,self.Kmmi_m)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.output_dim * self.likelihood.precision * np.ones(self.batchsize)
self.dL_dpsi1, _ = dpotrs(self.Lm,np.asfortranarray(self.VmT.T),lower=1)
self.dL_dpsi1 = self.dL_dpsi1.T
dL_dpsi2 = -0.5 * self.likelihood.precision * backsub_both_sides(self.Lm, self.LQL - self.output_dim * np.eye(self.num_inducing))
if self.has_uncertain_inputs:
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.batchsize,axis=0)
else:
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1.T).T
self.dL_dpsi2 = None
# Compute dL_dKmm
if do_Kmm_grad:
tmp = np.dot(self.LQL,self.A) - backsub_both_sides(self.Lm,np.dot(self.q_u_expectation[0],self.psi1V.T),transpose='right')
tmp += tmp.T
tmp += -self.output_dim*self.B
tmp += self.data_prop*self.LQL
self.dL_dKmm = 0.5*backsub_both_sides(self.Lm,tmp)
#Compute the gradient of the log likelihood wrt noise variance
self.partial_for_likelihood = -0.5*(self.batchsize*self.output_dim - np.sum(self.A*self.LQL))*self.likelihood.precision
self.partial_for_likelihood += (0.5*self.output_dim*self.trace_K + 0.5 * self.likelihood.trYYT - np.sum(self.likelihood.Y*self.projected_mean))*self.likelihood.precision**2
def log_likelihood(self):
"""
As for uncollapsed sparse GP, but account for the proportion of data we're looking at right now.
NB. self.batchsize is the size of the batch, not the size of X_all
"""
assert not self.likelihood.is_heteroscedastic
A = -0.5*self.batchsize*self.output_dim*(np.log(2.*np.pi) - np.log(self.likelihood.precision))
B = -0.5*self.likelihood.precision*self.output_dim*self.trace_K
Kmm_logdet = 2.*np.sum(np.log(np.diag(self.Lm)))
C = -0.5*self.output_dim*self.data_prop*(Kmm_logdet-self.q_u_logdet - self.num_inducing)
C += -0.5*np.sum(self.LQL * self.B)
D = -0.5*self.likelihood.precision*self.likelihood.trYYT
E = np.sum(self.V*self.projected_mean)
return (A+B+C+D+E)/self.data_prop
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dtheta(), self.likelihood._gradients(partial=self.partial_for_likelihood)))/self.data_prop
def vb_grad_natgrad(self):
"""
Compute the gradients of the lower bound wrt the canonical and
Expectation parameters of u.
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
"""
# Gradient for eta
dL_dmmT_S = -0.5*self.Lambda/self.data_prop + 0.5*self.q_u_prec
Kmmipsi1V,_ = dpotrs(self.Lm,self.psi1V,lower=1)
dL_dm = (Kmmipsi1V - np.dot(self.Lambda,self.q_u_mean))/self.data_prop
# Gradients for theta
S = self.q_u_cov
Si = self.q_u_prec
m = self.q_u_mean
dL_dSi = -mdot(S,dL_dmmT_S, S)
dL_dmhSi = -2*dL_dSi
dL_dSim = np.dot(dL_dSi,m) + np.dot(Si, dL_dm)
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) , np.hstack((dL_dSim.flatten(), dL_dmhSi.flatten()))
def optimize(self, iterations, print_interval=10, callback=lambda:None, callback_interval=5):
param_step = 0.
#Iterate!
for i in range(iterations):
#store the current configuration for plotting later
self._param_trace.append(self._get_params())
self._ll_trace.append(self.log_likelihood() + self.log_prior())
#load a batch and do the appropriate computations (kernel matrices, etc)
self.load_batch()
#compute the (stochastic) gradient
natgrads = self.vb_grad_natgrad()
grads = self._transform_gradients(self._log_likelihood_gradients() + self._log_prior_gradients())
self._grad_trace.append(grads)
#compute the steps in all parameters
vb_step = self.vb_steplength*natgrads[0]
#only move the parameters after the first epoch and only if the steplength is nonzero
if (self.epochs>=1) and (self.param_steplength > 0):
param_step = self.momentum*param_step + self.param_steplength*grads
else:
param_step = 0.
self.set_vb_param(self.get_vb_param() + vb_step)
#Note: don't recompute everything here, wait until the next iteration when we have a new batch
self._set_params(self._untransform_params(self._get_params_transformed() + param_step), computations=False)
#print messages if desired
if i and (not i%print_interval):
print i, np.mean(self._ll_trace[-print_interval:]) #, self.log_likelihood()
print np.round(np.mean(self._grad_trace[-print_interval:],0),3)
sys.stdout.flush()
#callback
if i and not i%callback_interval:
callback(self) # Change this to callback()
time.sleep(0.01)
if self.epochs > 10:
self._adapt_steplength()
self._vb_steplength_trace.append(self.vb_steplength)
self._param_steplength_trace.append(self.param_steplength)
self.iterations += 1
def _adapt_steplength(self):
if self.adapt_vb_steplength:
# self._adaptive_vb_steplength()
self._adaptive_vb_steplength_KL()
#self._vb_steplength_trace.append(self.vb_steplength)
assert self.vb_steplength >= 0
if self.adapt_param_steplength:
self._adaptive_param_steplength()
# self._adaptive_param_steplength_log()
# self._adaptive_param_steplength_from_vb()
#self._param_steplength_trace.append(self.param_steplength)
def _adaptive_param_steplength(self):
if hasattr(self, 'adapt_param_steplength_decr'):
decr_factor = self.adapt_param_steplength_decr
else:
decr_factor = 0.02
g_tp = self._transform_gradients(self._log_likelihood_gradients())
self.gbar_tp = (1-1/self.tau_tp)*self.gbar_tp + 1/self.tau_tp * g_tp
self.hbar_tp = (1-1/self.tau_tp)*self.hbar_tp + 1/self.tau_tp * np.dot(g_tp.T, g_tp)
new_param_steplength = np.dot(self.gbar_tp.T, self.gbar_tp) / self.hbar_tp
#- hack
new_param_steplength *= decr_factor
self.param_steplength = (self.param_steplength + new_param_steplength)/2
#-
self.tau_tp = self.tau_tp*(1-self.param_steplength) + 1
def _adaptive_param_steplength_log(self):
stp = np.logspace(np.log(0.0001), np.log(1e-6), base=np.e, num=18000)
self.param_steplength = stp[self.iterations]
def _adaptive_param_steplength_log2(self):
self.param_steplength = (self.iterations + 0.001)**-0.5
def _adaptive_param_steplength_from_vb(self):
self.param_steplength = self.vb_steplength * 0.01
def _adaptive_vb_steplength(self):
decr_factor = 0.1
g_t = self.vb_grad_natgrad()[0]
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
new_vb_steplength = np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t
#- hack
new_vb_steplength *= decr_factor
self.vb_steplength = (self.vb_steplength + new_vb_steplength)/2
#-
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
def _adaptive_vb_steplength_KL(self):
decr_factor = 0.1
natgrad = self.vb_grad_natgrad()
g_t1 = natgrad[0]
g_t2 = natgrad[1]
self.gbar_t1 = (1-1/self.tau_t)*self.gbar_t1 + 1/self.tau_t * g_t1
self.gbar_t2 = (1-1/self.tau_t)*self.gbar_t2 + 1/self.tau_t * g_t2
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t1.T, g_t2)
self.vb_steplength = np.dot(self.gbar_t1.T, self.gbar_t2) / self.hbar_t
self.vb_steplength *= decr_factor
self.tau_t = self.tau_t*(1-self.vb_steplength) + 1
def _raw_predict(self, X_new, X_variance_new=None, which_parts='all',full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
#TODO: make this more efficient!
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
if X_variance_new is None:
Kx = self.kern.K(X_new,self.Z)
mu = np.dot(Kx,self.Kmmi_m)
if full_cov:
Kxx = self.kern.K(X_new)
var = Kxx - mdot(Kx,tmp,Kx.T)
else:
Kxx = self.kern.Kdiag(X_new)
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
return mu, var
else:
assert X_variance_new.shape == X_new.shape
Kx = self.kern.psi1(self.Z,X_new, X_variance_new)
mu = np.dot(Kx,self.Kmmi_m)
Kxx = self.kern.psi0(self.Z,X_new,X_variance_new)
psi2 = self.kern.psi2(self.Z,X_new,X_variance_new)
diag_var = Kxx - np.sum(np.sum(psi2*tmp[None,:,:],1),1)
if full_cov:
raise NotImplementedError
else:
return mu, diag_var[:,None]
def predict(self, Xnew, X_variance_new=None, which_parts='all', full_cov=False, sampling=False, num_samples=15000):
# normalize X values
Xnew = (Xnew.copy() - self._Xoffset) / self._Xscale
if X_variance_new is not None:
X_variance_new = X_variance_new / self._Xscale ** 2
# here's the actual prediction by the GP model
mu, var = self._raw_predict(Xnew, X_variance_new, full_cov=full_cov, which_parts=which_parts)
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov, sampling=sampling, num_samples=num_samples)
return mean, var, _025pm, _975pm
def set_vb_param(self,vb_param):
"""set the distribution q(u) from the canonical parameters"""
self.q_u_canonical_flat = vb_param.copy()
self.q_u_canonical = self.q_u_canonical_flat[:self.num_inducing*self.output_dim].reshape(self.num_inducing,self.output_dim),self.q_u_canonical_flat[self.num_inducing*self.output_dim:].reshape(self.num_inducing,self.num_inducing)
self.q_u_prec = -2.*self.q_u_canonical[1]
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
self.q_u_Li = q_u_Li
self.q_u_logdet = -tmp
self.q_u_mean, _ = dpotrs(q_u_Li, np.asfortranarray(self.q_u_canonical[0]),lower=1)
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.output_dim)
def get_vb_param(self):
"""
Return the canonical parameters of the distribution q(u)
"""
return self.q_u_canonical_flat
def plot(self, ax=None, fignum=None, Z_height=None, **kwargs):
if ax is None:
fig = pb.figure(num=fignum)
ax = fig.add_subplot(111)
#horrible hack here:
data = self.likelihood.data.copy()
self.likelihood.data = self.Y
GPBase.plot(self, ax=ax, **kwargs)
self.likelihood.data = data
Zu = self.Z * self._Xscale + self._Xoffset
if self.input_dim==1:
ax.plot(self.X_batch, self.likelihood.data, 'gx',mew=2)
if Z_height is None:
Z_height = ax.get_ylim()[0]
ax.plot(Zu, np.zeros_like(Zu) + Z_height, 'r|', mew=1.5, markersize=12)
if self.input_dim==2:
ax.scatter(self.X[:,0], self.X[:,1], 20., self.Y[:,0], linewidth=0, cmap=pb.cm.jet)
ax.plot(Zu[:,0], Zu[:,1], 'w^')
def plot_traces(self):
pb.figure()
t = np.array(self._param_trace)
pb.subplot(2,1,1)
for l,ti in zip(self._get_param_names(),t.T):
if not l[:3]=='iip':
pb.plot(ti,label=l)
pb.legend(loc=0)
pb.subplot(2,1,2)
pb.plot(np.asarray(self._ll_trace),label='stochastic likelihood')
pb.legend(loc=0)

420
GPy/core/symbolic.py Normal file
View file

@ -0,0 +1,420 @@
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import re
from ..core.parameterization import Parameterized
import numpy as np
import sympy as sym
from ..core.parameterization import Param
from sympy.utilities.lambdify import lambdastr, _imp_namespace, _get_namespace
from sympy.utilities.iterables import numbered_symbols
import scipy
import GPy
def getFromDict(dataDict, mapList):
return reduce(lambda d, k: d[k], mapList, dataDict)
def setInDict(dataDict, mapList, value):
getFromDict(dataDict, mapList[:-1])[mapList[-1]] = value
class Symbolic_core():
"""
Base model symbolic class.
"""
def __init__(self, expressions, cacheable, derivatives=None, parameters=None, func_modules=[]):
# Base class init, do some basic derivatives etc.
# Func_modules sets up the right mapping for functions.
func_modules += [{'gamma':scipy.special.gamma,
'gammaln':scipy.special.gammaln,
'erf':scipy.special.erf, 'erfc':scipy.special.erfc,
'erfcx':scipy.special.erfcx,
'polygamma':scipy.special.polygamma,
'normcdf':GPy.util.functions.normcdf,
'normcdfln':GPy.util.functions.normcdfln,
'logistic':GPy.util.functions.logistic,
'logisticln':GPy.util.functions.logisticln},
'numpy']
self._set_expressions(expressions)
self._set_variables(cacheable)
self._set_derivatives(derivatives)
self._set_parameters(parameters)
# Convert the expressions to a list for common sub expression elimination
# We should find the following type of expressions: 'function', 'derivative', 'second_derivative', 'third_derivative'.
self.update_expression_list()
# Apply any global stabilisation operations to expressions.
self.global_stabilize()
# Helper functions to get data in and out of dictionaries.
# this code from http://stackoverflow.com/questions/14692690/access-python-nested-dictionary-items-via-a-list-of-keys
self.extract_sub_expressions()
self._gen_code()
self._set_namespace(func_modules)
def _set_namespace(self, namespaces):
"""Set the name space for use when calling eval. This needs to contain all the relvant functions for mapping from symbolic python to the numerical python. It also contains variables, cached portions etc."""
self.namespace = {}
for m in namespaces[::-1]:
buf = _get_namespace(m)
self.namespace.update(buf)
self.namespace.update(self.__dict__)
def _set_expressions(self, expressions):
"""Extract expressions and variables from the user provided expressions."""
self.expressions = {}
for key, item in expressions.items():
self.expressions[key] = {'function': item}
def _set_variables(self, cacheable):
"""Pull the variable names out of the provided expressions and separate into cacheable expressions and normal parameters. Those that are only stored in the cache, the parameters are stored in this object."""
# pull the parameters and inputs out of the symbolic pdf
def extract_vars(expr):
return [e for e in expr.atoms() if e.is_Symbol and e not in vars]
self.cacheable = cacheable
self.variables = {}
vars = []
for expression in self.expressions.values():
vars += extract_vars(expression['function'])
# inputs are assumed to be those things that are
# cacheable. I.e. those things that aren't stored within the
# object except as cached. For covariance functions this is X
# and Z, for likelihoods F and for mapping functions X.
self.cacheable_vars = [] # list of everything that's cacheable
for var in cacheable:
self.variables[var] = [e for e in vars if e.name.split('_')[0]==var.lower()]
self.cacheable_vars += self.variables[var]
for var in cacheable:
if not self.variables[var]:
raise ValueError('Variable ' + var + ' was specified as cacheable but is not in expression. Expected to find symbols of the form ' + var.lower() + '_0 to represent ' + var)
# things that aren't cacheable are assumed to be parameters.
self.variables['theta'] = sorted([e for e in vars if not e in self.cacheable_vars],key=lambda e:e.name)
def _set_derivatives(self, derivatives):
# these are arguments for computing derivatives.
def extract_derivative(function, derivative_arguments):
return {theta.name : self.stabilize(sym.diff(function,theta)) for theta in derivative_arguments}
derivative_arguments = []
if derivatives is not None:
for derivative in derivatives:
derivative_arguments += self.variables[derivative]
# Do symbolic work to compute derivatives.
for key, func in self.expressions.items():
# if func['function'].is_Matrix:
# rows = func['function'].shape[0]
# cols = func['function'].shape[1]
# self.expressions[key]['derivative'] = sym.zeros(rows, cols)
# for i in xrange(rows):
# for j in xrange(cols):
# self.expressions[key]['derivative'][i, j] = extract_derivative(func['function'][i, j], derivative_arguments)
# else:
self.expressions[key]['derivative'] = extract_derivative(func['function'], derivative_arguments)
def _set_parameters(self, parameters):
"""Add parameters to the model and initialize with given values."""
for theta in self.variables['theta']:
val = 1.0
# TODO: improve approach for initializing parameters.
if parameters is not None:
if parameters.has_key(theta.name):
val = parameters[theta.name]
# Add parameter.
self.link_parameters(Param(theta.name, val, None))
#self._set_attribute(theta.name, )
def eval_parameters_changed(self):
# TODO: place checks for inf/nan in here
# do all the precomputation codes.
self.eval_update_cache()
def eval_update_cache(self, **kwargs):
# TODO: place checks for inf/nan in here
# for all provided keywords
for var, code in self.variable_sort(self.code['parameters_changed']):
self._set_attribute(var, eval(code, self.namespace))
for var, value in kwargs.items():
# update their cached values
if value is not None:
if var == 'X' or var == 'F' or var == 'M':
value = np.atleast_2d(value)
for i, theta in enumerate(self.variables[var]):
self._set_attribute(theta.name, value[:, i][:, None])
elif var == 'Y':
# Y values can be missing.
value = np.atleast_2d(value)
for i, theta in enumerate(self.variables[var]):
self._set_attribute('missing' + str(i), np.isnan(value[:, i]))
self._set_attribute(theta.name, value[:, i][:, None])
elif var == 'Z':
value = np.atleast_2d(value)
for i, theta in enumerate(self.variables[var]):
self._set_attribute(theta.name, value[:, i][None, :])
else:
value = np.atleast_1d(value)
for i, theta in enumerate(self.variables[var]):
self._set_attribute(theta.name, value[i])
for var, code in self.variable_sort(self.code['update_cache']):
self._set_attribute(var, eval(code, self.namespace))
def eval_update_gradients(self, function, partial, **kwargs):
# TODO: place checks for inf/nan in here?
self.eval_update_cache(**kwargs)
gradient = {}
for theta in self.variables['theta']:
code = self.code[function]['derivative'][theta.name]
gradient[theta.name] = (partial*eval(code, self.namespace)).sum()
return gradient
def eval_gradients_X(self, function, partial, **kwargs):
if kwargs.has_key('X'):
gradients_X = np.zeros_like(kwargs['X'])
self.eval_update_cache(**kwargs)
for i, theta in enumerate(self.variables['X']):
code = self.code[function]['derivative'][theta.name]
gradients_X[:, i:i+1] = partial*eval(code, self.namespace)
return gradients_X
def eval_function(self, function, **kwargs):
self.eval_update_cache(**kwargs)
return eval(self.code[function]['function'], self.namespace)
def code_parameters_changed(self):
# do all the precomputation codes.
lcode = ''
for variable, code in self.variable_sort(self.code['parameters_changed']):
lcode += self._print_code(variable) + ' = ' + self._print_code(code) + '\n'
return lcode
def code_update_cache(self):
lcode = ''
for var in self.cacheable:
lcode += 'if ' + var + ' is not None:\n'
if var == 'X':
reorder = '[:, None]'
elif var == 'Z':
reorder = '[None, :]'
else:
reorder = ''
for i, theta in enumerate(self.variables[var]):
lcode+= "\t" + var + '= np.atleast_2d(' + var + ')\n'
lcode+= "\t" + self._print_code(theta.name) + ' = ' + var + '[:, ' + str(i) + "]" + reorder + "\n"
for variable, code in self.variable_sort(self.code['update_cache']):
lcode+= self._print_code(variable) + ' = ' + self._print_code(code) + "\n"
return lcode
def code_update_gradients(self, function):
lcode = ''
for theta in self.variables['theta']:
code = self.code[function]['derivative'][theta.name]
lcode += self._print_code(theta.name) + '.gradient = (partial*(' + self._print_code(code) + ')).sum()\n'
return lcode
def code_gradients_cacheable(self, function, variable):
if variable not in self.cacheable:
raise RuntimeError, variable + ' must be a cacheable.'
lcode = 'gradients_' + variable + ' = np.zeros_like(' + variable + ')\n'
lcode += 'self.update_cache(' + ', '.join(self.cacheable) + ')\n'
for i, theta in enumerate(self.variables[variable]):
code = self.code[function]['derivative'][theta.name]
lcode += 'gradients_' + variable + '[:, ' + str(i) + ':' + str(i) + '+1] = partial*' + self._print_code(code) + '\n'
lcode += 'return gradients_' + variable + '\n'
return lcode
def code_function(self, function):
lcode = 'self.update_cache(' + ', '.join(self.cacheable) + ')\n'
lcode += 'return ' + self._print_code(self.code[function]['function'])
return lcode
def stabilize(self, expr):
"""Stabilize the code in the model."""
# this code is applied to expressions in the model in an attempt to sabilize them.
return expr
def global_stabilize(self):
"""Stabilize all code in the model."""
pass
def _set_attribute(self, name, value):
"""Make sure namespace gets updated when setting attributes."""
setattr(self, name, value)
self.namespace.update({name: getattr(self, name)})
def update_expression_list(self):
"""Extract a list of expressions from the dictionary of expressions."""
self.expression_list = [] # code arrives in dictionary, but is passed in this list
self.expression_keys = [] # Keep track of the dictionary keys.
self.expression_order = [] # This may be unecessary. It's to give ordering for cse
for fname, fexpressions in self.expressions.items():
for type, texpressions in fexpressions.items():
if type == 'function':
self.expression_list.append(texpressions)
self.expression_keys.append([fname, type])
self.expression_order.append(1)
elif type[-10:] == 'derivative':
for dtype, expression in texpressions.items():
self.expression_list.append(expression)
self.expression_keys.append([fname, type, dtype])
if type[:-10] == 'first_' or type[:-10] == '':
self.expression_order.append(3) #sym.count_ops(self.expressions[type][dtype]))
elif type[:-10] == 'second_':
self.expression_order.append(4) #sym.count_ops(self.expressions[type][dtype]))
elif type[:-10] == 'third_':
self.expression_order.append(5) #sym.count_ops(self.expressions[type][dtype]))
else:
self.expression_list.append(fexpressions[type])
self.expression_keys.append([fname, type])
self.expression_order.append(2)
# This step may be unecessary.
# Not 100% sure if the sub expression elimination is order sensitive. This step orders the list with the 'function' code first and derivatives after.
self.expression_order, self.expression_list, self.expression_keys = zip(*sorted(zip(self.expression_order, self.expression_list, self.expression_keys)))
def extract_sub_expressions(self, cache_prefix='cache', sub_prefix='sub', prefix='XoXoXoX'):
# Do the common sub expression elimination.
common_sub_expressions, expression_substituted_list = sym.cse(self.expression_list, numbered_symbols(prefix=prefix))
self.variables[cache_prefix] = []
self.variables[sub_prefix] = []
# Create dictionary of new sub expressions
sub_expression_dict = {}
for var, void in common_sub_expressions:
sub_expression_dict[var.name] = var
# Sort out any expression that's dependent on something that scales with data size (these are listed in cacheable).
cacheable_list = []
params_change_list = []
# common_sube_expressions contains a list of paired tuples with the new variable and what it equals
for var, expr in common_sub_expressions:
arg_list = [e for e in expr.atoms() if e.is_Symbol]
# List any cacheable dependencies of the sub-expression
cacheable_symbols = [e for e in arg_list if e in cacheable_list or e in self.cacheable_vars]
if cacheable_symbols:
# list which ensures dependencies are cacheable.
cacheable_list.append(var)
else:
params_change_list.append(var)
replace_dict = {}
for i, expr in enumerate(cacheable_list):
sym_var = sym.var(cache_prefix + str(i))
self.variables[cache_prefix].append(sym_var)
replace_dict[expr.name] = sym_var
for i, expr in enumerate(params_change_list):
sym_var = sym.var(sub_prefix + str(i))
self.variables[sub_prefix].append(sym_var)
replace_dict[expr.name] = sym_var
for replace, void in common_sub_expressions:
for expr, keys in zip(expression_substituted_list, self.expression_keys):
setInDict(self.expressions, keys, expr.subs(replace, replace_dict[replace.name]))
for void, expr in common_sub_expressions:
expr = expr.subs(replace, replace_dict[replace.name])
# Replace original code with code including subexpressions.
for keys in self.expression_keys:
for replace, void in common_sub_expressions:
setInDict(self.expressions, keys, getFromDict(self.expressions, keys).subs(replace, replace_dict[replace.name]))
self.expressions['parameters_changed'] = {}
self.expressions['update_cache'] = {}
for var, expr in common_sub_expressions:
for replace, void in common_sub_expressions:
expr = expr.subs(replace, replace_dict[replace.name])
if var in cacheable_list:
self.expressions['update_cache'][replace_dict[var.name].name] = expr
else:
self.expressions['parameters_changed'][replace_dict[var.name].name] = expr
def _gen_code(self):
"""Generate code for the list of expressions provided using the common sub-expression eliminator to separate out portions that are computed multiple times."""
# This is the dictionary that stores all the generated code.
self.code = {}
def match_key(expr):
if type(expr) is dict:
code = {}
for key in expr.keys():
code[key] = match_key(expr[key])
else:
arg_list = [e for e in expr.atoms() if e.is_Symbol]
code = self._expr2code(arg_list, expr)
return code
self.code = match_key(self.expressions)
def _expr2code(self, arg_list, expr):
"""Convert the given symbolic expression into code."""
code = lambdastr(arg_list, expr)
function_code = code.split(':')[1].strip()
#for arg in arg_list:
# function_code = function_code.replace(arg.name, 'self.'+arg.name)
return function_code
def _print_code(self, code):
"""Prepare code for string writing."""
# This needs a rewrite --- it doesn't check for match clashes! So sub11 would be replaced by sub1 before being replaced with sub11!!
for key in self.variables.keys():
for arg in self.variables[key]:
code = code.replace(arg.name, 'self.'+arg.name)
return code
def _display_expression(self, keys, user_substitutes={}):
"""Helper function for human friendly display of the symbolic components."""
# Create some pretty maths symbols for the display.
sigma, alpha, nu, omega, l, variance = sym.var('\sigma, \alpha, \nu, \omega, \ell, \sigma^2')
substitutes = {'scale': sigma, 'shape': alpha, 'lengthscale': l, 'variance': variance}
substitutes.update(user_substitutes)
function_substitutes = {normcdfln : lambda arg : sym.log(normcdf(arg)),
logisticln : lambda arg : -sym.log(1+sym.exp(-arg)),
logistic : lambda arg : 1/(1+sym.exp(-arg)),
erfcx : lambda arg : erfc(arg)/sym.exp(arg*arg),
gammaln : lambda arg : sym.log(sym.gamma(arg))}
expr = getFromDict(self.expressions, keys)
for var_name, sub in self.variable_sort(self.expressions['update_cache'], reverse=True):
for var in self.variables['cache']:
if var_name == var.name:
expr = expr.subs(var, sub)
break
for var_name, sub in self.variable_sort(self.expressions['parameters_changed'], reverse=True):
for var in self.variables['sub']:
if var_name == var.name:
expr = expr.subs(var, sub)
break
for var_name, sub in self.variable_sort(substitutes, reverse=True):
for var in self.variables['theta']:
if var_name == var.name:
expr = expr.subs(var, sub)
break
for m, r in function_substitutes.iteritems():
expr = expr.replace(m, r)#normcdfln, lambda arg : sym.log(normcdf(arg)))
return expr.simplify()
def variable_sort(self, var_dict, reverse=False):
def sort_key(x):
digits = re.findall(r'\d+$', x[0])
if digits:
return int(digits[0])
else:
return x[0]
return sorted(var_dict.iteritems(), key=sort_key, reverse=reverse)

View file

@ -1,143 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from GPy.core.domains import POSITIVE, NEGATIVE, BOUNDED
import sys
lim_val = -np.log(sys.float_info.epsilon)
class transformation(object):
domain = None
def f(self, x):
raise NotImplementedError
def finv(self, x):
raise NotImplementedError
def gradfactor(self, f):
""" df_dx evaluated at self.f(x)=f"""
raise NotImplementedError
def initialize(self, f):
""" produce a sensible initial value for f(x)"""
raise NotImplementedError
def __str__(self):
raise NotImplementedError
class logexp(transformation):
domain = POSITIVE
def f(self, x):
return np.where(x<-lim_val, np.log(1+np.exp(-lim_val)), np.where(x>lim_val, x, np.log(1. + np.exp(x))))
def finv(self, f):
return np.where(f>lim_val, f, np.log(np.exp(f) - 1.))
def gradfactor(self, f):
return np.where(f>lim_val, 1., 1 - np.exp(-f))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve)'
class negative_logexp(transformation):
domain = NEGATIVE
def f(self, x):
return -logexp.f(x)
def finv(self, f):
return logexp.finv(-f)
def gradfactor(self, f):
return -logexp.gradfactor(-f)
def initialize(self, f):
return -logexp.initialize(f)
def __str__(self):
return '(-ve)'
class logexp_clipped(logexp):
max_bound = 1e100
min_bound = 1e-10
log_max_bound = np.log(max_bound)
log_min_bound = np.log(min_bound)
domain = POSITIVE
def __init__(self, lower=1e-6):
self.lower = lower
def f(self, x):
exp = np.exp(np.clip(x, self.log_min_bound, self.log_max_bound))
f = np.log(1. + exp)
# if np.isnan(f).any():
# import ipdb;ipdb.set_trace()
return np.clip(f, self.min_bound, self.max_bound)
def finv(self, f):
return np.log(np.exp(f - 1.))
def gradfactor(self, f):
ef = np.exp(f) # np.clip(f, self.min_bound, self.max_bound))
gf = (ef - 1.) / ef
return gf # np.where(f < self.lower, 0, gf)
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve_c)'
class exponent(transformation):
domain = POSITIVE
def f(self, x):
return np.where(x<lim_val, np.where(x>-lim_val, np.exp(x), np.exp(-lim_val)), np.exp(lim_val))
def finv(self, x):
return np.log(x)
def gradfactor(self, f):
return f
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '(+ve)'
class negative_exponent(exponent):
domain = NEGATIVE
def f(self, x):
return -exponent.f(x)
def finv(self, f):
return exponent.finv(-f)
def gradfactor(self, f):
return f
def initialize(self, f):
return -exponent.initialize(f) #np.abs(f)
def __str__(self):
return '(-ve)'
class square(transformation):
domain = POSITIVE
def f(self, x):
return x ** 2
def finv(self, x):
return np.sqrt(x)
def gradfactor(self, f):
return 2 * np.sqrt(f)
def initialize(self, f):
return np.abs(f)
def __str__(self):
return '(+sq)'
class logistic(transformation):
domain = BOUNDED
def __init__(self, lower, upper):
assert lower < upper
self.lower, self.upper = float(lower), float(upper)
self.difference = self.upper - self.lower
def f(self, x):
return self.lower + self.difference / (1. + np.exp(-x))
def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
def gradfactor(self, f):
return (f - self.lower) * (self.upper - f) / self.difference
def initialize(self, f):
if np.any(np.logical_or(f < self.lower, f > self.upper)):
print "Warning: changing parameters to satisfy constraints"
return np.where(np.logical_or(f < self.lower, f > self.upper), self.f(f * 0.), f)
def __str__(self):
return '({},{})'.format(self.lower, self.upper)

27
GPy/defaults.cfg Normal file
View file

@ -0,0 +1,27 @@
# This is the default configuration file for GPy
# Do note edit this file.
# For machine specific changes (i.e. those specific to a given installation) edit GPy/installation.cfg
# For user specific changes edit $HOME/.gpy_user.cfg
[parallel]
# Enable openmp support. This speeds up some computations, depending on the number
# of cores available. Setting up a compiler with openmp support can be difficult on
# some platforms, hence by default it is off.
openmp=False
[datasets]
# location for the local data cache
dir=$HOME/tmp/GPy-datasets/
[anaconda]
# if you have an anaconda python installation please specify it here.
installed = False
location = None
# set this to true if you have the MKL optimizations installed:
MKL = False
[weave]
#if true, try to use weave, and fall back to numpy. if false, just use numpy.
working = True

View file

@ -1,8 +1,7 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import classification import classification
import regression import regression
import dimensionality_reduction import dimensionality_reduction
import tutorials import non_gaussian
import stochastic

View file

@ -1,11 +1,10 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
""" """
Gaussian Processes classification Gaussian Processes classification examples
""" """
import pylab as pb
import GPy import GPy
default_seed = 10000 default_seed = 10000
@ -15,7 +14,9 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. Run a Gaussian process classification on the three phase oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
""" """
data = GPy.util.datasets.oil() try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.oil()
X = data['X'] X = data['X']
Xtest = data['Xtest'] Xtest = data['Xtest']
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
@ -27,13 +28,13 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing) m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)
# Contrain all parameters to be positive # Contrain all parameters to be positive
m.tie_params('.*len') #m.tie_params('.*len')
m['.*len'] = 10. m['.*len'] = 10.
m.update_likelihood_approximation()
# Optimize # Optimize
if optimize: if optimize:
m.optimize(max_iters=max_iters) for _ in range(5):
m.optimize(max_iters=int(max_iters/5))
print(m) print(m)
#Test #Test
@ -50,7 +51,9 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -61,13 +64,14 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
if optimize: if optimize:
#m.update_likelihood_approximation() #m.update_likelihood_approximation()
# Parameters optimization: # Parameters optimization:
#m.optimize() m.optimize()
#m.update_likelihood_approximation() #m.update_likelihood_approximation()
m.pseudo_EM() #m.pseudo_EM()
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
@ -83,27 +87,30 @@ def toy_linear_1d_classification_laplace(seed=default_seed, optimize=True, plot=
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
bern_noise_model = GPy.likelihoods.bernoulli() likelihood = GPy.likelihoods.Bernoulli()
laplace_likelihood = GPy.likelihoods.Laplace(Y.copy(), bern_noise_model) laplace_inf = GPy.inference.latent_function_inference.Laplace()
kernel = GPy.kern.RBF(1)
# Model definition # Model definition
m = GPy.models.GPClassification(data['X'], Y, likelihood=laplace_likelihood) m = GPy.core.GP(data['X'], Y, kernel=kernel, likelihood=likelihood, inference_method=laplace_inf)
print m
# Optimize # Optimize
if optimize: if optimize:
#m.update_likelihood_approximation() try:
# Parameters optimization: m.optimize('scg', messages=1)
m.optimize('bfgs', messages=1) except Exception as e:
#m.pseudo_EM() return m
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
@ -119,7 +126,9 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -129,21 +138,19 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
# Optimize # Optimize
if optimize: if optimize:
#m.update_likelihood_approximation() m.optimize()
# Parameters optimization:
#m.optimize()
m.pseudo_EM()
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
print m print m
return m return m
def toy_heaviside(seed=default_seed, optimize=True, plot=True): def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
""" """
Simple 1D classification example using a heavy side gp transformation Simple 1D classification example using a heavy side gp transformation
@ -152,25 +159,30 @@ def toy_heaviside(seed=default_seed, optimize=True, plot=True):
""" """
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
# Model definition # Model definition
noise_model = GPy.likelihoods.bernoulli(GPy.likelihoods.noise_models.gp_transformations.Heaviside()) kernel = GPy.kern.RBF(1)
likelihood = GPy.likelihoods.EP(Y, noise_model) likelihood = GPy.likelihoods.Bernoulli(gp_link=GPy.likelihoods.link_functions.Heaviside())
m = GPy.models.GPClassification(data['X'], likelihood=likelihood) ep = GPy.inference.latent_function_inference.expectation_propagation.EP()
m = GPy.core.GP(X=data['X'], Y=Y, kernel=kernel, likelihood=likelihood, inference_method=ep, name='gp_classification_heaviside')
#m = GPy.models.GPClassification(data['X'], likelihood=likelihood)
# Optimize # Optimize
if optimize: if optimize:
m.update_likelihood_approximation()
# Parameters optimization: # Parameters optimization:
m.optimize() for _ in range(5):
#m.pseudo_EM() m.optimize(max_iters=int(max_iters/5))
print m
# Plot # Plot
if plot: if plot:
fig, axes = pb.subplots(2, 1) from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0]) m.plot_f(ax=axes[0])
m.plot(ax=axes[1]) m.plot(ax=axes[1])
@ -189,7 +201,9 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
:param kernel: kernel to use in the model :param kernel: kernel to use in the model
:type kernel: a GPy kernel :type kernel: a GPy kernel
""" """
data = GPy.util.datasets.crescent_data(seed=seed) try:import pods
except ImportError:print 'pods unavailable, see https://github.com/sods/ods for example datasets'
data = pods.datasets.crescent_data(seed=seed)
Y = data['Y'] Y = data['Y']
Y[Y.flatten()==-1] = 0 Y[Y.flatten()==-1] = 0

View file

@ -0,0 +1,89 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
try:
import pylab as pb
except:
pass
import GPy
pb.ion()
pb.close('all')
X1 = np.arange(3)[:,None]
X2 = np.arange(4)[:,None]
I1 = np.zeros_like(X1)
I2 = np.ones_like(X2)
_X = np.vstack([ X1, X2 ])
_I = np.vstack([ I1, I2 ])
X = np.hstack([ _X, _I ])
Y1 = np.sin(X1/8.)
Y2 = np.cos(X2/8.)
Bias = GPy.kern.Bias(1,active_dims=[0])
Coreg = GPy.kern.Coregionalize(1,2,active_dims=[1])
K = Bias.prod(Coreg,name='X')
#K.coregion.W = 0
#print K.coregion.W
#print Bias.K(_X,_X)
#print K.K(X,X)
#pb.matshow(K.K(X,X))
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=2,kernels_list=Mlist,name='H')
kern.B.W = 0
kern.B.kappa = 1.
#kern.B.W.fix()
#kern.B.kappa.fix()
#m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
Z1 = np.array([1.5,2.5])[:,None]
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1], Y_list=[Y1], Z_list = [Z1], kernel=kern)
#m.optimize()
m.checkgrad(verbose=1)
"""
fig = pb.figure()
ax0 = fig.add_subplot(211)
ax1 = fig.add_subplot(212)
slices = GPy.util.multioutput.get_slices([Y1,Y2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
#m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
"""
"""
X1 = 100 * np.random.rand(100)[:,None]
X2 = 100 * np.random.rand(100)[:,None]
#X1.sort()
#X2.sort()
Y1 = np.sin(X1/10.) + np.random.rand(100)[:,None]
Y2 = np.cos(X2/10.) + np.random.rand(100)[:,None]
Mlist = [GPy.kern.Matern32(1,lengthscale=20.,name="Mat")]
kern = GPy.util.multioutput.LCM(input_dim=1,num_outputs=12,kernels_list=Mlist,name='H')
m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2], kernel=kern)
m.optimize()
fig = pb.figure()
ax0 = fig.add_subplot(211)
ax1 = fig.add_subplot(212)
slices = GPy.util.multioutput.get_slices([Y1,Y2])
m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],ax=ax0)
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],ax=ax1)
"""

View file

@ -1,75 +1,80 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as _np import numpy as _np
default_seed = 123344 default_seed = 123344
def bgplvm_test_model(seed=default_seed, optimize=False, verbose=1, plot=False): # default_seed = _np.random.seed(123344)
def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan=False):
""" """
model for testing purposes. Samples from a GP with rbf kernel and learns model for testing purposes. Samples from a GP with rbf kernel and learns
the samples with a new kernel. Normally not for optimization, just model cheking the samples with a new kernel. Normally not for optimization, just model cheking
""" """
from GPy.likelihoods.gaussian import Gaussian
import GPy import GPy
num_inputs = 13 num_inputs = 13
num_inducing = 5 num_inducing = 5
if plot: if plot:
output_dim = 1 output_dim = 1
input_dim = 2 input_dim = 3
else: else:
input_dim = 2 input_dim = 2
output_dim = 25 output_dim = output_dim
# generate GPLVM-like data # generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim) X = _np.random.rand(num_inputs, input_dim)
lengthscales = _np.random.rand(input_dim) lengthscales = _np.random.rand(input_dim)
k = (GPy.kern.rbf(input_dim, .5, lengthscales, ARD=True) k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
+ GPy.kern.white(input_dim, 0.01))
K = k.K(X) K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, output_dim).T Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
lik = Gaussian(Y, normalize=True)
k = GPy.kern.rbf_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim) # k = GPy.kern.RBF_inv(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim)
# k = GPy.kern.linear(input_dim) + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.linear(input_dim)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001) # k = GPy.kern.RBF(input_dim, ARD = False) + GPy.kern.white(input_dim, 0.00001)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.rbf(input_dim, .3, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.RBF(input_dim, .3, _np.ones(input_dim) * .2, ARD=True)
# k = GPy.kern.rbf(input_dim, .5, 2., ARD=0) + GPy.kern.rbf(input_dim, .3, .2, ARD=0) # k = GPy.kern.RBF(input_dim, .5, 2., ARD=0) + GPy.kern.RBF(input_dim, .3, .2, ARD=0)
# k = GPy.kern.rbf(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True) # k = GPy.kern.RBF(input_dim, .5, _np.ones(input_dim) * 2., ARD=True) + GPy.kern.linear(input_dim, _np.ones(input_dim) * .2, ARD=True)
p = .3
m = GPy.models.BayesianGPLVM(Y, input_dim, kernel=k, num_inducing=num_inducing)
if nan:
m.inference_method = GPy.inference.latent_function_inference.var_dtc.VarDTCMissingData()
m.Y[_np.random.binomial(1, p, size=(Y.shape)).astype(bool)] = _np.nan
m.parameters_changed()
m = GPy.models.BayesianGPLVM(lik, input_dim, kernel=k, num_inducing=num_inducing)
#=========================================================================== #===========================================================================
# randomly obstruct data with percentage p # randomly obstruct data with percentage p
p = .8
Y_obstruct = Y.copy()
Y_obstruct[_np.random.uniform(size=(Y.shape)) < p] = _np.nan
#=========================================================================== #===========================================================================
m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing) # m2 = GPy.models.BayesianGPLVMWithMissingData(Y_obstruct, input_dim, kernel=k, num_inducing=num_inducing)
m.lengthscales = lengthscales # m.lengthscales = lengthscales
if plot: if plot:
import matplotlib.pyplot as pb import matplotlib.pyplot as pb
m.plot() m.plot()
pb.title('PCA initialisation') pb.title('PCA initialisation')
m2.plot() # m2.plot()
pb.title('PCA initialisation') # pb.title('PCA initialisation')
if optimize: if optimize:
m.optimize('scg', messages=verbose) m.optimize('scg', messages=verbose)
m2.optimize('scg', messages=verbose) # m2.optimize('scg', messages=verbose)
if plot: if plot:
m.plot() m.plot()
pb.title('After optimisation') pb.title('After optimisation')
m2.plot() # m2.plot()
pb.title('After optimisation') # pb.title('After optimisation')
return m, m2 return m
def gplvm_oil_100(optimize=True, verbose=1, plot=True): def gplvm_oil_100(optimize=True, verbose=1, plot=True):
import GPy import GPy
data = GPy.util.datasets.oil_100() import pods
data = pods.datasets.oil_100()
Y = data['X'] Y = data['X']
# create simple GP model # create simple GP model
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6) kernel = GPy.kern.RBF(6, ARD=True) + GPy.kern.Bias(6)
m = GPy.models.GPLVM(Y, 6, kernel=kernel) m = GPy.models.GPLVM(Y, 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1) m.data_labels = data['Y'].argmax(axis=1)
if optimize: m.optimize('scg', messages=verbose) if optimize: m.optimize('scg', messages=verbose)
@ -78,13 +83,15 @@ def gplvm_oil_100(optimize=True, verbose=1, plot=True):
def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50): def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_inducing=15, max_iters=50):
import GPy import GPy
import pods
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = pods.datasets.oil()
Y = data['X'][:N] Y = data['X'][:N]
Y = Y - Y.mean(0) Y = Y - Y.mean(0)
Y /= Y.std(0) Y /= Y.std(0)
# Create the model # Create the model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q) kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q)
m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing) m = GPy.models.SparseGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
@ -94,9 +101,9 @@ def sparse_gplvm_oil(optimize=True, verbose=0, plot=True, N=100, Q=6, num_induci
m.kern.plot_ARD() m.kern.plot_ARD()
return m return m
def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4, sigma=.2): def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=25, Q=4, sigma=.2):
import GPy import GPy
from GPy.util.datasets import swiss_roll_generated from pods.datasets import swiss_roll_generated
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
data = swiss_roll_generated(num_samples=N, sigma=sigma) data = swiss_roll_generated(num_samples=N, sigma=sigma)
@ -134,93 +141,103 @@ def swiss_roll(optimize=True, verbose=1, plot=True, N=1000, num_inducing=15, Q=4
(1 - var))) + .001 (1 - var))) + .001
Z = _np.random.permutation(X)[:num_inducing] Z = _np.random.permutation(X)[:num_inducing]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, ARD=True) + GPy.kern.Bias(Q, _np.exp(-2)) + GPy.kern.White(Q, _np.exp(-2))
m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel) m = BayesianGPLVM(Y, Q, X=X, X_variance=S, num_inducing=num_inducing, Z=Z, kernel=kernel)
m.data_colors = c m.data_colors = c
m.data_t = t m.data_t = t
m['noise_variance'] = Y.var() / 100.
if optimize: if optimize:
m.optimize('scg', messages=verbose, max_iters=2e3) m.optimize('bfgs', messages=verbose, max_iters=2e3)
if plot: if plot:
fig = plt.figure('fitted') fig = plt.figure('fitted')
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
s = m.input_sensitivity().argsort()[::-1][:2] s = m.input_sensitivity().argsort()[::-1][:2]
ax.scatter(*m.X.T[s], c=c) ax.scatter(*m.X.mean.T[s], c=c)
return m return m
def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k): def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy import GPy
from GPy.likelihoods import Gaussian
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import numpy as np
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() try:
import pods
data = pods.datasets.oil()
except ImportError:
data = GPy.util.datasets.oil()
kernel = GPy.kern.rbf_inv(Q, 1., [.1] * Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2))
kernel = GPy.kern.RBF(Q, 1., 1. / _np.random.uniform(0, 1, (Q,)), ARD=True) # + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
Yn = Gaussian(Y, normalize=True) m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m = GPy.models.BayesianGPLVM(Yn, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
m['noise'] = Yn.Y.var() / 100.
if optimize: if optimize:
m.optimize('scg', messages=verbose, max_iters=max_iters, gtol=.05) m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)
if plot: if plot:
y = m.likelihood.Y[0, :]
fig, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes) m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.util.visualize.vector_show(y) data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :]))
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], # @UnusedVariable lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish') raw_input('Press enter to finish')
plt.close(fig) plt.close(fig)
return m return m
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
x = _np.linspace(0, 4 * _np.pi, N)[:, None] import GPy
s1 = _np.vectorize(lambda x: _np.sin(x)) from matplotlib import pyplot as plt
s2 = _np.vectorize(lambda x: _np.cos(x)) import pods
s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
sS = _np.vectorize(lambda x: _np.sin(2 * x))
s1 = s1(x) _np.random.seed(0)
s2 = s2(x) data = pods.datasets.oil()
s3 = s3(x)
sS = sS(x)
S1 = _np.hstack([s1, sS]) kernel = GPy.kern.RBF(Q, 1., 1. / _np.random.uniform(0, 1, (Q,)), ARD=True) # + GPy.kern.Bias(Q, _np.exp(-2))
S2 = _np.hstack([s2, s3, sS]) Y = data['X'][:N]
S3 = _np.hstack([s3, sS]) m = GPy.models.SSGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
Y1 = S1.dot(_np.random.randn(S1.shape[1], D1)) if optimize:
Y2 = S2.dot(_np.random.randn(S2.shape[1], D2)) m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)
Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
Y1 += .3 * _np.random.randn(*Y1.shape) if plot:
Y2 += .2 * _np.random.randn(*Y2.shape) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
Y3 += .25 * _np.random.randn(*Y3.shape) m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0, :]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean.values[0:1, :], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish')
plt.close(fig)
return m
Y1 -= Y1.mean(0) def _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim=False):
Y2 -= Y2.mean(0) Q_signal = 4
Y3 -= Y3.mean(0) import GPy
Y1 /= Y1.std(0) import numpy as np
Y2 /= Y2.std(0) np.random.seed(3000)
Y3 /= Y3.std(0)
k = GPy.kern.Matern32(Q_signal, 1., lengthscale=(np.random.uniform(1, 6, Q_signal)), ARD=1)
for i in range(Q_signal):
k += GPy.kern.PeriodicExponential(1, variance=1., active_dims=[i], period=3., lower=-2, upper=6)
t = np.c_[[np.linspace(-1, 5, N) for _ in range(Q_signal)]].T
K = k.K(t)
s2, s1, s3, sS = np.random.multivariate_normal(np.zeros(K.shape[0]), K, size=(4))[:, :, None]
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
slist = [sS, s1, s2, s3] slist = [sS, s1, s2, s3]
slist_names = ["sS", "s1", "s2", "s3"] slist_names = ["sS", "s1", "s2", "s3"]
Ylist = [Y1, Y2, Y3] Ylist = [Y1, Y2, Y3]
if plot_sim: if plot_sim:
import pylab from matplotlib import pyplot as plt
import matplotlib.cm as cm import matplotlib.cm as cm
import itertools import itertools
fig = pylab.figure("MRD Simulation Data", figsize=(8, 6)) fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
fig.clf() fig.clf()
ax = fig.add_subplot(2, 1, 1) ax = fig.add_subplot(2, 1, 1)
labls = slist_names labls = slist_names
@ -229,30 +246,75 @@ def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
ax.legend() ax.legend()
for i, Y in enumerate(Ylist): for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i) ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.set_title("Y{}".format(i + 1)) ax.set_title("Y{}".format(i + 1))
pylab.draw() plt.draw()
pylab.tight_layout() plt.tight_layout()
return slist, [S1, S2, S3], Ylist return slist, [S1, S2, S3], Ylist
# def bgplvm_simulation_matlab_compare(): def _simulate_sincos(D1, D2, D3, N, num_inducing, plot_sim=False):
# from GPy.util.datasets import simulation_BGPLVM _np.random.seed(1234)
# from GPy import kern
# from GPy.models import BayesianGPLVM x = _np.linspace(0, 4 * _np.pi, N)[:, None]
# s1 = _np.vectorize(lambda x: _np.sin(x))
# sim_data = simulation_BGPLVM() s2 = _np.vectorize(lambda x: _np.cos(x) ** 2)
# Y = sim_data['Y'] s3 = _np.vectorize(lambda x:-_np.exp(-_np.cos(2 * x)))
# mu = sim_data['mu'] sS = _np.vectorize(lambda x: _np.cos(x))
# num_inducing, [_, Q] = 3, mu.shape
# s1 = s1(x)
# k = kern.linear(Q, ARD=True) + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) s2 = s2(x)
# m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k, s3 = s3(x)
# _debug=False) sS = sS(x)
# m.auto_scale_factor = True
# m['noise'] = Y.var() / 100. s1 -= s1.mean(); s1 /= s1.std(0)
# m['linear_variance'] = .01 s2 -= s2.mean(); s2 /= s2.std(0)
# return m s3 -= s3.mean(); s3 /= s3.std(0)
sS -= sS.mean(); sS /= sS.std(0)
Y1, Y2, Y3, S1, S2, S3 = _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS)
slist = [sS, s1, s2, s3]
slist_names = ["sS", "s1", "s2", "s3"]
Ylist = [Y1, Y2, Y3]
if plot_sim:
from matplotlib import pyplot as plt
import matplotlib.cm as cm
import itertools
fig = plt.figure("MRD Simulation Data", figsize=(8, 6))
fig.clf()
ax = fig.add_subplot(2, 1, 1)
labls = slist_names
for S, lab in itertools.izip(slist, labls):
ax.plot(S, label=lab)
ax.legend()
for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y, aspect='auto', cmap=cm.gray) # @UndefinedVariable
ax.set_title("Y{}".format(i + 1))
plt.draw()
plt.tight_layout()
return slist, [S1, S2, S3], Ylist
def _generate_high_dimensional_output(D1, D2, D3, s1, s2, s3, sS):
S1 = _np.hstack([s1, sS])
S2 = _np.hstack([s2, s3, sS])
S3 = _np.hstack([s3, sS])
Y1 = S1.dot(_np.random.randn(S1.shape[1], D1))
Y2 = S2.dot(_np.random.randn(S2.shape[1], D2))
Y3 = S3.dot(_np.random.randn(S3.shape[1], D3))
Y1 += .3 * _np.random.randn(*Y1.shape)
Y2 += .2 * _np.random.randn(*Y2.shape)
Y3 += .25 * _np.random.randn(*Y3.shape)
Y1 -= Y1.mean(0)
Y2 -= Y2.mean(0)
Y3 -= Y3.mean(0)
Y1 /= Y1.std(0)
Y2 /= Y2.std(0)
Y3 /= Y3.std(0)
return Y1, Y2, Y3, S1, S2, S3
def bgplvm_simulation(optimize=True, verbose=1, def bgplvm_simulation(optimize=True, verbose=1,
plot=True, plot_sim=False, plot=True, plot_sim=False,
@ -261,95 +323,181 @@ def bgplvm_simulation(optimize=True, verbose=1,
from GPy import kern from GPy import kern
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
D1, D2, D3, N, num_inducing, Q = 49, 30, 10, 12, 3, 10 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.linear(Q, ARD=True) k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
m.X_variance = m.X_variance * .7 m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m['noise'] = Y.var() / 100. m.likelihood.variance = .1
if optimize:
print "Optimizing model:"
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m
def ssgplvm_simulation(optimize=True, verbose=1,
plot=True, plot_sim=False,
max_iters=2e4, useGPU=False
):
from GPy import kern
from GPy.models import SSGPLVM
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
# k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k)
m.X.variance[:] = _np.random.uniform(0, .01, m.X.shape)
m.likelihood.variance = .1
if optimize: if optimize:
print "Optimizing model:" print "Optimizing model:"
m.optimize('scg', messages=verbose, max_iters=max_iters, m.optimize('scg', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.plot_X_1d("BGPLVM Latent Space 1D") m.X.plot("SSGPLVM Latent Space 1D")
m.kern.plot_ARD('SSGPLVM Simulation ARD Parameters')
return m
def bgplvm_simulation_missing_data(optimize=True, verbose=1,
plot=True, plot_sim=False,
max_iters=2e4, percent_missing=.1,
):
from GPy import kern
from GPy.models.bayesian_gplvm_minibatch import BayesianGPLVMMiniBatch
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 400, 3, 4
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True) # + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
inan = _np.random.binomial(1, percent_missing, size=Y.shape).astype(bool) # 80% missing data
Ymissing = Y.copy()
Ymissing[inan] = _np.nan
m = BayesianGPLVMMiniBatch(Ymissing, Q, init="random", num_inducing=num_inducing,
kernel=k, missing_data=True)
m.Yreal = Y
if optimize:
print "Optimizing model:"
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m
def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw): def mrd_simulation(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy import kern from GPy import kern
from GPy.models import MRD from GPy.models import MRD
from GPy.likelihoods import Gaussian
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5 D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
likelihood_list = [Gaussian(x, normalize=True) for x in Ylist]
k = kern.linear(Q, ARD=True)# + kern.bias(Q, _np.exp(-2)) + kern.white(Q, _np.exp(-2)) # Ylist = [Ylist[0]]
m = MRD(likelihood_list, input_dim=Q, num_inducing=num_inducing, kernels=k, initx="", initz='permute', **kw) k = kern.Linear(Q, ARD=True)
m.ensure_default_constraints() m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing, kernel=k, initx="PCA_concat", initz='permute', **kw)
m['.*noise'] = [Y.var() / 40. for Y in Ylist]
for i, bgplvm in enumerate(m.bgplvms):
m['{}_noise'.format(i)] = 1 #bgplvm.likelihood.Y.var() / 500.
bgplvm.X_variance = bgplvm.X_variance #* .1
if optimize: if optimize:
print "Optimizing Model:" print "Optimizing Model:"
m.optimize(messages=verbose, max_iters=8e3, gtol=.1) m.optimize(messages=verbose, max_iters=8e3)
if plot: if plot:
m.plot_X_1d("MRD Latent Space 1D") m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales")
return m
def mrd_simulation_missing_data(optimize=True, verbose=True, plot=True, plot_sim=True, **kw):
from GPy import kern
from GPy.models import MRD
D1, D2, D3, N, num_inducing, Q = 60, 20, 36, 60, 6, 5
_, _, Ylist = _simulate_matern(D1, D2, D3, N, num_inducing, plot_sim)
# Ylist = [Ylist[0]]
k = kern.Linear(Q, ARD=True)
inanlist = []
for Y in Ylist:
inan = _np.random.binomial(1, .6, size=Y.shape).astype(bool)
inanlist.append(inan)
Y[inan] = _np.nan
m = MRD(Ylist, input_dim=Q, num_inducing=num_inducing,
kernel=k, inference_method=None,
initx="random", initz='permute', **kw)
if optimize:
print "Optimizing Model:"
m.optimize('bfgs', messages=verbose, max_iters=8e3, gtol=.1)
if plot:
m.X.plot("MRD Latent Space 1D")
m.plot_scales("MRD Scales") m.plot_scales("MRD Scales")
return m return m
def brendan_faces(optimize=True, verbose=True, plot=True): def brendan_faces(optimize=True, verbose=True, plot=True):
import GPy import GPy
import pods
data = GPy.util.datasets.brendan_faces() data = pods.datasets.brendan_faces()
Q = 2 Q = 2
Y = data['Y'] Y = data['Y']
Yn = Y - Y.mean() Yn = Y - Y.mean()
Yn /= Yn.std() Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q) m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)
# optimize # optimize
m.constrain('rbf|noise|white', GPy.core.transformations.logexp_clipped())
if optimize: m.optimize('scg', messages=verbose, max_iters=1000) if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
if plot: if plot:
ax = m.plot_latent(which_indices=(0, 1)) ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False) data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, order='F', invert=False, scale=False)
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
def olivetti_faces(optimize=True, verbose=True, plot=True): def olivetti_faces(optimize=True, verbose=True, plot=True):
import GPy import GPy
import pods
data = GPy.util.datasets.olivetti_faces() data = pods.datasets.olivetti_faces()
Q = 2 Q = 2
Y = data['Y'] Y = data['Y']
Yn = Y - Y.mean() Yn = Y - Y.mean()
Yn /= Yn.std() Yn /= Yn.std()
m = GPy.models.GPLVM(Yn, Q) m = GPy.models.BayesianGPLVM(Yn, Q, num_inducing=20)
if optimize: m.optimize('scg', messages=verbose, max_iters=1000)
if optimize: m.optimize('bfgs', messages=verbose, max_iters=1000)
if plot: if plot:
ax = m.plot_latent(which_indices=(0, 1)) ax = m.plot_latent(which_indices=(0, 1))
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False) data_show = GPy.plotting.matplot_dep.visualize.image_show(y[None, :], dimensions=(112, 92), transpose=False, invert=False, scale=False)
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) lvm = GPy.plotting.matplot_dep.visualize.lvm(m.X.mean[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True): def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=True):
import GPy import GPy
data = GPy.util.datasets.osu_run1() import pods
data = pods.datasets.osu_run1()
# optimize # optimize
if range == None: if range == None:
Y = data['Y'].copy() Y = data['Y'].copy()
@ -357,43 +505,46 @@ def stick_play(range=None, frame_rate=15, optimize=False, verbose=True, plot=Tru
Y = data['Y'][range[0]:range[1], :].copy() Y = data['Y'][range[0]:range[1], :].copy()
if plot: if plot:
y = Y[0, :] y = Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.data_play(Y, data_show, frame_rate) GPy.plotting.matplot_dep.visualize.data_play(Y, data_show, frame_rate)
return Y return Y
def stick(kernel=None, optimize=True, verbose=True, plot=True): def stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
# optimize # optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize('bfgs', messages=verbose, max_f_eval=10000)
if plot and GPy.util.visualize.visual_available: if plot:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[:1, :].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
lvm_visualizer.close()
data_show.close()
return m return m
def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True): def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
# optimize # optimize
mapping = GPy.mappings.Linear(data['Y'].shape[1], 2) mapping = GPy.mappings.Linear(data['Y'].shape[1], 2)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot and GPy.util.visualize.visual_available: if plot and GPy.plotting.matplot_dep.visualize.visual_available:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -401,32 +552,33 @@ def bcgplvm_linear_stick(kernel=None, optimize=True, verbose=True, plot=True):
def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True): def bcgplvm_stick(kernel=None, optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
# optimize # optimize
back_kernel=GPy.kern.rbf(data['Y'].shape[1], lengthscale=5.) back_kernel = GPy.kern.RBF(data['Y'].shape[1], lengthscale=5.)
mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel) mapping = GPy.mappings.Kernel(X=data['Y'], output_dim=2, kernel=back_kernel)
m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping) m = GPy.models.BCGPLVM(data['Y'], 2, kernel=kernel, mapping=mapping)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot and GPy.util.visualize.visual_available: if plot and GPy.plotting.matplot_dep.visualize.visual_available:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.likelihood.Y[0, :]
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
raw_input('Press enter to finish') # raw_input('Press enter to finish')
return m return m
def robot_wireless(optimize=True, verbose=True, plot=True): def robot_wireless(optimize=True, verbose=True, plot=True):
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import GPy import GPy
import pods
data = GPy.util.datasets.robot_wireless() data = pods.datasets.robot_wireless()
# optimize # optimize
m = GPy.models.GPLVM(data['Y'], 2) m = GPy.models.BayesianGPLVM(data['Y'], 4, num_inducing=25)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
m._set_params(m._get_params())
if plot: if plot:
m.plot_latent() m.plot_latent()
@ -435,23 +587,33 @@ def robot_wireless(optimize=True, verbose=True, plot=True):
def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True): def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from matplotlib import pyplot as plt from matplotlib import pyplot as plt
import numpy as np
import GPy import GPy
import pods
data = GPy.util.datasets.osu_run1() data = pods.datasets.osu_run1()
Q = 6 Q = 6
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, _np.exp(-2)) + GPy.kern.white(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, lengthscale=np.repeat(.5, Q), ARD=True)
m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel) m = BayesianGPLVM(data['Y'], Q, init="PCA", num_inducing=20, kernel=kernel)
m.data = data
m.likelihood.variance = 0.001
# optimize # optimize
m.ensure_default_constraints() try:
if optimize: m.optimize('scg', messages=verbose, max_iters=200, xtol=1e-300, ftol=1e-300) if optimize: m.optimize('bfgs', messages=verbose, max_iters=5e3, bfgs_factor=10)
m._set_params(m._get_params()) except KeyboardInterrupt:
print "Keyboard interrupt, continuing to plot and return"
if plot: if plot:
plt.clf, (latent_axes, sense_axes) = plt.subplots(1, 2) fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
plt.sca(latent_axes) plt.sca(latent_axes)
m.plot_latent() m.plot_latent(ax=latent_axes)
y = m.likelihood.Y[0, :].copy() y = m.Y[:1, :].copy()
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y, connect=data['connect'])
GPy.util.visualize.lvm_dimselect(m.X[0, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes) dim_select = GPy.plotting.matplot_dep.visualize.lvm_dimselect(m.X.mean[:1, :].copy(), m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
fig.canvas.draw()
fig.canvas.show()
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m
@ -459,20 +621,50 @@ def stick_bgplvm(model=None, optimize=True, verbose=True, plot=True):
def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True): def cmu_mocap(subject='35', motion=['01'], in_place=True, optimize=True, verbose=True, plot=True):
import GPy import GPy
import pods
data = GPy.util.datasets.cmu_mocap(subject, motion) data = pods.datasets.cmu_mocap(subject, motion)
if in_place: if in_place:
# Make figure move in place. # Make figure move in place.
data['Y'][:, 0:3] = 0.0 data['Y'][:, 0:3] = 0.0
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True) Y = data['Y']
Y_mean = Y.mean(0)
Y_std = Y.std(0)
m = GPy.models.GPLVM((Y - Y_mean) / Y_std, 2)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot: if plot:
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel']) data_show = GPy.plotting.matplot_dep.visualize.skeleton_show(y[None, :], data['skel'])
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm(m.X[0].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
lvm_visualizer.close() lvm_visualizer.close()
data_show.close()
return m return m
def ssgplvm_simulation_linear():
import numpy as np
import GPy
N, D, Q = 1000, 20, 5
pi = 0.2
def sample_X(Q, pi):
x = np.empty(Q)
dies = np.random.rand(Q)
for q in xrange(Q):
if dies[q] < pi:
x[q] = np.random.randn()
else:
x[q] = 0.
return x
Y = np.empty((N, D))
X = np.empty((N, Q))
# Generate data from random sampled weight matrices
for n in xrange(N):
X[n] = sample_X(Q, pi)
w = np.random.randn(D, Q)
Y[n] = np.dot(w, X[n])

View file

@ -1,7 +1,13 @@
# Copyright (c) 2014, Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import GPy import GPy
import numpy as np import numpy as np
import matplotlib.pyplot as plt
from GPy.util import datasets from GPy.util import datasets
try:
import matplotlib.pyplot as plt
except:
pass
def student_t_approx(optimize=True, plot=True): def student_t_approx(optimize=True, plot=True):
""" """
@ -30,47 +36,53 @@ def student_t_approx(optimize=True, plot=True):
#Yc = Yc/Yc.max() #Yc = Yc/Yc.max()
#Add student t random noise to datapoints #Add student t random noise to datapoints
deg_free = 5 deg_free = 1
print "Real noise: ", real_std print "Real noise: ", real_std
initial_var_guess = 0.5 initial_var_guess = 0.5
edited_real_sd = initial_var_guess edited_real_sd = initial_var_guess
# Kernel object # Kernel object
kernel1 = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) kernel1 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
kernel2 = kernel1.copy() kernel2 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
kernel3 = kernel1.copy() kernel3 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
kernel4 = kernel1.copy() kernel4 = GPy.kern.RBF(X.shape[1]) + GPy.kern.White(X.shape[1])
#Gaussian GP model on clean data #Gaussian GP model on clean data
m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1) m1 = GPy.models.GPRegression(X, Y.copy(), kernel=kernel1)
# optimize # optimize
m1.ensure_default_constraints() m1['.*white'].constrain_fixed(1e-5)
m1.constrain_fixed('white', 1e-5)
m1.randomize() m1.randomize()
#Gaussian GP model on corrupt data #Gaussian GP model on corrupt data
m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2) m2 = GPy.models.GPRegression(X, Yc.copy(), kernel=kernel2)
m2.ensure_default_constraints() m2['.*white'].constrain_fixed(1e-5)
m2.constrain_fixed('white', 1e-5)
m2.randomize() m2.randomize()
#Student t GP model on clean data #Student t GP model on clean data
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
stu_t_likelihood = GPy.likelihoods.Laplace(Y.copy(), t_distribution) laplace_inf = GPy.inference.latent_function_inference.Laplace()
m3 = GPy.models.GPRegression(X, Y.copy(), kernel3, likelihood=stu_t_likelihood) m3 = GPy.core.GP(X, Y.copy(), kernel3, likelihood=t_distribution, inference_method=laplace_inf)
m3.ensure_default_constraints() m3['.*t_scale2'].constrain_bounded(1e-6, 10.)
m3.constrain_bounded('t_noise', 1e-6, 10.) m3['.*white'].constrain_fixed(1e-5)
m3.constrain_fixed('white', 1e-5)
m3.randomize() m3.randomize()
#Student t GP model on corrupt data #Student t GP model on corrupt data
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=deg_free, sigma2=edited_real_sd) t_distribution = GPy.likelihoods.StudentT(deg_free=deg_free, sigma2=edited_real_sd)
corrupt_stu_t_likelihood = GPy.likelihoods.Laplace(Yc.copy(), t_distribution) laplace_inf = GPy.inference.latent_function_inference.Laplace()
m4 = GPy.models.GPRegression(X, Yc.copy(), kernel4, likelihood=corrupt_stu_t_likelihood) m4 = GPy.core.GP(X, Yc.copy(), kernel4, likelihood=t_distribution, inference_method=laplace_inf)
m4.ensure_default_constraints() m4['.*t_scale2'].constrain_bounded(1e-6, 10.)
m4.constrain_bounded('t_noise', 1e-6, 10.) m4['.*white'].constrain_fixed(1e-5)
m4.constrain_fixed('white', 1e-5)
m4.randomize() m4.randomize()
print m4
debug=True
if debug:
m4.optimize(messages=1)
import pylab as pb
pb.plot(m4.X, m4.inference_method.f_hat)
pb.plot(m4.X, m4.Y, 'rx')
m4.plot()
print m4
return m4
if optimize: if optimize:
optimizer='scg' optimizer='scg'
@ -115,6 +127,7 @@ def student_t_approx(optimize=True, plot=True):
return m1, m2, m3, m4 return m1, m2, m3, m4
def boston_example(optimize=True, plot=True): def boston_example(optimize=True, plot=True):
raise NotImplementedError("Needs updating")
import sklearn import sklearn
from sklearn.cross_validation import KFold from sklearn.cross_validation import KFold
optimizer='bfgs' optimizer='bfgs'
@ -143,8 +156,8 @@ def boston_example(optimize=True, plot=True):
noise = 1e-1 #np.exp(-2) noise = 1e-1 #np.exp(-2)
rbf_len = 0.5 rbf_len = 0.5
data_axis_plot = 4 data_axis_plot = 4
kernelstu = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) kernelstu = GPy.kern.RBF(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
kernelgp = GPy.kern.rbf(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) kernelgp = GPy.kern.RBF(X.shape[1]) + GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1])
#Baseline #Baseline
score_folds[0, n] = rmse(Y_test, np.mean(Y_train)) score_folds[0, n] = rmse(Y_test, np.mean(Y_train))
@ -152,10 +165,9 @@ def boston_example(optimize=True, plot=True):
#Gaussian GP #Gaussian GP
print "Gauss GP" print "Gauss GP"
mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy()) mgp = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelgp.copy())
mgp.ensure_default_constraints() mgp.constrain_fixed('.*white', 1e-5)
mgp.constrain_fixed('white', 1e-5) mgp['.*len'] = rbf_len
mgp['rbf_len'] = rbf_len mgp['.*noise'] = noise
mgp['noise'] = noise
print mgp print mgp
if optimize: if optimize:
mgp.optimize(optimizer=optimizer, messages=messages) mgp.optimize(optimizer=optimizer, messages=messages)
@ -170,9 +182,8 @@ def boston_example(optimize=True, plot=True):
g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D) g_distribution = GPy.likelihoods.noise_model_constructors.gaussian(variance=noise, N=N, D=D)
g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution) g_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), g_distribution)
mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood) mg = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=g_likelihood)
mg.ensure_default_constraints()
mg.constrain_positive('noise_variance') mg.constrain_positive('noise_variance')
mg.constrain_fixed('white', 1e-5) mg.constrain_fixed('.*white', 1e-5)
mg['rbf_len'] = rbf_len mg['rbf_len'] = rbf_len
mg['noise'] = noise mg['noise'] = noise
print mg print mg
@ -190,11 +201,10 @@ def boston_example(optimize=True, plot=True):
t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise) t_distribution = GPy.likelihoods.noise_model_constructors.student_t(deg_free=df, sigma2=noise)
stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution) stu_t_likelihood = GPy.likelihoods.Laplace(Y_train.copy(), t_distribution)
mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood) mstu_t = GPy.models.GPRegression(X_train.copy(), Y_train.copy(), kernel=kernelstu.copy(), likelihood=stu_t_likelihood)
mstu_t.ensure_default_constraints() mstu_t.constrain_fixed('.*white', 1e-5)
mstu_t.constrain_fixed('white', 1e-5) mstu_t.constrain_bounded('.*t_scale2', 0.0001, 1000)
mstu_t.constrain_bounded('t_noise', 0.0001, 1000)
mstu_t['rbf_len'] = rbf_len mstu_t['rbf_len'] = rbf_len
mstu_t['t_noise'] = noise mstu_t['.*t_scale2'] = noise
print mstu_t print mstu_t
if optimize: if optimize:
mstu_t.optimize(optimizer=optimizer, messages=messages) mstu_t.optimize(optimizer=optimizer, messages=messages)

View file

@ -1,22 +1,29 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
""" """
Gaussian Processes regression examples Gaussian Processes regression examples
""" """
import pylab as pb try:
import pylab as pb
except:
pass
import numpy as np import numpy as np
import GPy import GPy
def olympic_marathon_men(optimize=True, plot=True): def olympic_marathon_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Olympic marathon data.""" """Run a standard Gaussian process regression on the Olympic marathon data."""
data = GPy.util.datasets.olympic_marathon_men() try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.olympic_marathon_men()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
# set the lengthscale to be something sensible (defaults to 1) # set the lengthscale to be something sensible (defaults to 1)
m['rbf_lengthscale'] = 10 m.kern.lengthscale = 10.
if optimize: if optimize:
m.optimize('bfgs', max_iters=200) m.optimize('bfgs', max_iters=200)
@ -25,79 +32,51 @@ def olympic_marathon_men(optimize=True, plot=True):
return m return m
def coregionalization_toy2(optimize=True, plot=True): def coregionalization_toy(optimize=True, plot=True):
""" """
A simple demonstration of coregionalization on two sinusoidal functions. A simple demonstration of coregionalization on two sinusoidal functions.
""" """
#build a design matrix with a column of integers indicating the output #build a design matrix with a column of integers indicating the output
X1 = np.random.rand(50, 1) * 8 X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5 X2 = np.random.rand(30, 1) * 5
index = np.vstack((np.zeros_like(X1), np.ones_like(X2)))
X = np.hstack((np.vstack((X1, X2)), index))
#build a suitable set of observed variables #build a suitable set of observed variables
Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05 Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2. Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
Y = np.vstack((Y1, Y2))
#build the kernel m = GPy.models.GPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
k1 = GPy.kern.rbf(1) + GPy.kern.bias(1)
k2 = GPy.kern.coregionalize(2,1)
k = k1**k2
m = GPy.models.GPRegression(X, Y, kernel=k)
m.constrain_fixed('.*rbf_var', 1.)
if optimize: if optimize:
m.optimize('bfgs', max_iters=100) m.optimize('bfgs', max_iters=100)
if plot: if plot:
m.plot(fixed_inputs=[(1,0)]) slices = GPy.util.multioutput.get_slices([X1,X2])
m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
return m return m
#FIXME: Needs recovering once likelihoods are consolidated
#def coregionalization_toy(optimize=True, plot=True):
# """
# A simple demonstration of coregionalization on two sinusoidal functions.
# """
# X1 = np.random.rand(50, 1) * 8
# X2 = np.random.rand(30, 1) * 5
# X = np.vstack((X1, X2))
# Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
# Y2 = -np.sin(X2) + np.random.randn(*X2.shape) * 0.05
# Y = np.vstack((Y1, Y2))
#
# k1 = GPy.kern.rbf(1)
# m = GPy.models.GPMultioutputRegression(X_list=[X1,X2],Y_list=[Y1,Y2],kernel_list=[k1])
# m.constrain_fixed('.*rbf_var', 1.)
# m.optimize(max_iters=100)
#
# fig, axes = pb.subplots(2,1)
# m.plot(fixed_inputs=[(1,0)],ax=axes[0])
# m.plot(fixed_inputs=[(1,1)],ax=axes[1])
# axes[0].set_title('Output 0')
# axes[1].set_title('Output 1')
# return m
def coregionalization_sparse(optimize=True, plot=True): def coregionalization_sparse(optimize=True, plot=True):
""" """
A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations. A simple demonstration of coregionalization on two sinusoidal functions using sparse approximations.
""" """
#fetch the data from the non sparse examples #build a design matrix with a column of integers indicating the output
m = coregionalization_toy2(optimize=False, plot=False) X1 = np.random.rand(50, 1) * 8
X, Y = m.X, m.likelihood.Y X2 = np.random.rand(30, 1) * 5
#construct a model #build a suitable set of observed variables
m = GPy.models.SparseGPRegression(X,Y) Y1 = np.sin(X1) + np.random.randn(*X1.shape) * 0.05
m.constrain_fixed('iip_\d+_1') # don't optimize the inducing input indexes Y2 = np.sin(X2) + np.random.randn(*X2.shape) * 0.05 + 2.
m = GPy.models.SparseGPCoregionalizedRegression(X_list=[X1,X2], Y_list=[Y1,Y2])
if optimize: if optimize:
m.optimize('bfgs', max_iters=100, messages=1) m.optimize('bfgs', max_iters=100)
if plot: if plot:
m.plot(fixed_inputs=[(1,0)]) slices = GPy.util.multioutput.get_slices([X1,X2])
m.plot(fixed_inputs=[(1,1)], ax=pb.gca()) m.plot(fixed_inputs=[(1,0)],which_data_rows=slices[0],Y_metadata={'output_index':0})
m.plot(fixed_inputs=[(1,1)],which_data_rows=slices[1],Y_metadata={'output_index':1},ax=pb.gca())
pb.ylim(-3,)
return m return m
@ -107,7 +86,11 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
from the Mount Epomeo runs. Requires gpxpy to be installed on your system from the Mount Epomeo runs. Requires gpxpy to be installed on your system
to load in the data. to load in the data.
""" """
data = GPy.util.datasets.epomeo_gpx() try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.epomeo_gpx()
num_data_list = [] num_data_list = []
for Xpart in data['X']: for Xpart in data['X']:
num_data_list.append(Xpart.shape[0]) num_data_list.append(Xpart.shape[0])
@ -127,14 +110,14 @@ def epomeo_gpx(max_iters=200, optimize=True, plot=True):
Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None], Z = np.hstack((np.linspace(t[:,0].min(), t[:, 0].max(), num_inducing)[:, None],
np.random.randint(0, 4, num_inducing)[:, None])) np.random.randint(0, 4, num_inducing)[:, None]))
k1 = GPy.kern.rbf(1) k1 = GPy.kern.RBF(1)
k2 = GPy.kern.coregionalize(output_dim=5, rank=5) k2 = GPy.kern.Coregionalize(output_dim=5, rank=5)
k = k1**k2 k = k1**k2
m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True) m = GPy.models.SparseGPRegression(t, Y, kernel=k, Z=Z, normalize_Y=True)
m.constrain_fixed('.*rbf_var', 1.) m.constrain_fixed('.*variance', 1.)
m.constrain_fixed('iip') m.inducing_inputs.constrain_fixed()
m.constrain_bounded('noise_variance', 1e-3, 1e-1) m.Gaussian_noise.variance.constrain_bounded(1e-3, 1e-1)
m.optimize(max_iters=max_iters,messages=True) m.optimize(max_iters=max_iters,messages=True)
return m return m
@ -150,13 +133,17 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
length_scales = np.linspace(0.1, 60., resolution) length_scales = np.linspace(0.1, 60., resolution)
log_SNRs = np.linspace(-3., 4., resolution) log_SNRs = np.linspace(-3., 4., resolution)
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number) try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.della_gatta_TRP63_gene_expression(data_set='della_gatta',gene_number=gene_number)
# data['Y'] = data['Y'][0::2, :] # data['Y'] = data['Y'][0::2, :]
# data['X'] = data['X'][0::2, :] # data['X'] = data['X'][0::2, :]
data['Y'] = data['Y'] - np.mean(data['Y']) data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.rbf) lls = GPy.examples.regression._contour_data(data, length_scales, log_SNRs, GPy.kern.RBF)
if plot: if plot:
pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet) pb.contour(length_scales, log_SNRs, np.exp(lls), 20, cmap=pb.cm.jet)
ax = pb.gca() ax = pb.gca()
@ -172,20 +159,20 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
optim_point_y = np.empty(2) optim_point_y = np.empty(2)
np.random.seed(seed=seed) np.random.seed(seed=seed)
for i in range(0, model_restarts): for i in range(0, model_restarts):
# kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) # kern = GPy.kern.RBF(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.))
kern = GPy.kern.rbf(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50)) kern = GPy.kern.RBF(1, variance=np.random.uniform(1e-3, 1), lengthscale=np.random.uniform(5, 50))
m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern) m = GPy.models.GPRegression(data['X'], data['Y'], kernel=kern)
m['noise_variance'] = np.random.uniform(1e-3, 1) m.likelihood.variance = np.random.uniform(1e-3, 1)
optim_point_x[0] = m['rbf_lengthscale'] optim_point_x[0] = m.rbf.lengthscale
optim_point_y[0] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); optim_point_y[0] = np.log10(m.rbf.variance) - np.log10(m.likelihood.variance);
# optimize # optimize
if optimize: if optimize:
m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters) m.optimize('scg', xtol=1e-6, ftol=1e-6, max_iters=max_iters)
optim_point_x[1] = m['rbf_lengthscale'] optim_point_x[1] = m.rbf.lengthscale
optim_point_y[1] = np.log10(m['rbf_variance']) - np.log10(m['noise_variance']); optim_point_y[1] = np.log10(m.rbf.variance) - np.log10(m.likelihood.variance);
if plot: if plot:
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k') pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1] - optim_point_x[0], optim_point_y[1] - optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
@ -196,7 +183,7 @@ def multiple_optima(gene_number=937, resolution=80, model_restarts=10, seed=1000
ax.set_ylim(ylim) ax.set_ylim(ylim)
return m # (models, lls) return m # (models, lls)
def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf): def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.RBF):
""" """
Evaluate the GP objective function for a given data set for a range of Evaluate the GP objective function for a given data set for a range of
signal to noise ratios and a range of lengthscales. signal to noise ratios and a range of lengthscales.
@ -216,7 +203,7 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
noise_var = total_var / (1. + SNR) noise_var = total_var / (1. + SNR)
signal_var = total_var - noise_var signal_var = total_var - noise_var
model.kern['.*variance'] = signal_var model.kern['.*variance'] = signal_var
model['noise_variance'] = noise_var model.likelihood.variance = noise_var
length_scale_lls = [] length_scale_lls = []
for length_scale in length_scales: for length_scale in length_scales:
@ -230,13 +217,17 @@ def _contour_data(data, length_scales, log_SNRs, kernel_call=GPy.kern.rbf):
def olympic_100m_men(optimize=True, plot=True): def olympic_100m_men(optimize=True, plot=True):
"""Run a standard Gaussian process regression on the Rogers and Girolami olympics data.""" """Run a standard Gaussian process regression on the Rogers and Girolami olympics data."""
data = GPy.util.datasets.olympic_100m_men() try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.olympic_100m_men()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
# set the lengthscale to be something sensible (defaults to 1) # set the lengthscale to be something sensible (defaults to 1)
m['rbf_lengthscale'] = 10 m.rbf.lengthscale = 10
if optimize: if optimize:
m.optimize('bfgs', max_iters=200) m.optimize('bfgs', max_iters=200)
@ -247,7 +238,11 @@ def olympic_100m_men(optimize=True, plot=True):
def toy_rbf_1d(optimize=True, plot=True): def toy_rbf_1d(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d() try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.toy_rbf_1d()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
@ -261,7 +256,11 @@ def toy_rbf_1d(optimize=True, plot=True):
def toy_rbf_1d_50(optimize=True, plot=True): def toy_rbf_1d_50(optimize=True, plot=True):
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d_50() try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.toy_rbf_1d_50()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
@ -278,14 +277,15 @@ def toy_poisson_rbf_1d_laplace(optimize=True, plot=True):
optimizer='scg' optimizer='scg'
x_len = 30 x_len = 30
X = np.linspace(0, 10, x_len)[:, None] X = np.linspace(0, 10, x_len)[:, None]
f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.rbf(1).K(X)) f_true = np.random.multivariate_normal(np.zeros(x_len), GPy.kern.RBF(1).K(X))
Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None] Y = np.array([np.random.poisson(np.exp(f)) for f in f_true])[:,None]
noise_model = GPy.likelihoods.poisson() kern = GPy.kern.RBF(1)
likelihood = GPy.likelihoods.Laplace(Y,noise_model) poisson_lik = GPy.likelihoods.Poisson()
laplace_inf = GPy.inference.latent_function_inference.Laplace()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(X, Y, likelihood=likelihood) m = GPy.core.GP(X, Y, kernel=kern, likelihood=poisson_lik, inference_method=laplace_inf)
if optimize: if optimize:
m.optimize(optimizer) m.optimize(optimizer)
@ -316,23 +316,22 @@ def toy_ARD(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize
Y /= Y.std() Y /= Y.std()
if kernel_type == 'linear': if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1) kernel = GPy.kern.Linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv': elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1) kernel = GPy.kern.RBF(X.shape[1], ARD=1)
kernel += GPy.kern.white(X.shape[1]) + GPy.kern.bias(X.shape[1]) kernel += GPy.kern.White(X.shape[1]) + GPy.kern.Bias(X.shape[1])
m = GPy.models.GPRegression(X, Y, kernel) m = GPy.models.GPRegression(X, Y, kernel)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior) # m.set_prior('.*lengthscale',len_prior)
if optimize: if optimize:
m.optimize(optimizer='scg', max_iters=max_iters, messages=1) m.optimize(optimizer='scg', max_iters=max_iters)
if plot: if plot:
m.kern.plot_ARD() m.kern.plot_ARD()
print m
return m return m
def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True): def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, optimize=True, plot=True):
@ -355,36 +354,39 @@ def toy_ARD_sparse(max_iters=1000, kernel_type='linear', num_samples=300, D=4, o
Y /= Y.std() Y /= Y.std()
if kernel_type == 'linear': if kernel_type == 'linear':
kernel = GPy.kern.linear(X.shape[1], ARD=1) kernel = GPy.kern.Linear(X.shape[1], ARD=1)
elif kernel_type == 'rbf_inv': elif kernel_type == 'rbf_inv':
kernel = GPy.kern.rbf_inv(X.shape[1], ARD=1) kernel = GPy.kern.RBF_inv(X.shape[1], ARD=1)
else: else:
kernel = GPy.kern.rbf(X.shape[1], ARD=1) kernel = GPy.kern.RBF(X.shape[1], ARD=1)
kernel += GPy.kern.bias(X.shape[1]) #kernel += GPy.kern.Bias(X.shape[1])
X_variance = np.ones(X.shape) * 0.5 X_variance = np.ones(X.shape) * 0.5
m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance) m = GPy.models.SparseGPRegression(X, Y, kernel, X_variance=X_variance)
# len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25 # len_prior = GPy.priors.inverse_gamma(1,18) # 1, 25
# m.set_prior('.*lengthscale',len_prior) # m.set_prior('.*lengthscale',len_prior)
if optimize: if optimize:
m.optimize(optimizer='scg', max_iters=max_iters, messages=1) m.optimize(optimizer='scg', max_iters=max_iters)
if plot: if plot:
m.kern.plot_ARD() m.kern.plot_ARD()
print m
return m return m
def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True): def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
"""Predict the location of a robot given wirelss signal strength readings.""" """Predict the location of a robot given wirelss signal strength readings."""
data = GPy.util.datasets.robot_wireless() try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.robot_wireless()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel) m = GPy.models.GPRegression(data['Y'], data['X'], kernel=kernel)
# optimize # optimize
if optimize: if optimize:
m.optimize(messages=True, max_iters=max_iters) m.optimize(max_iters=max_iters)
Xpredict = m.predict(data['Ytest'])[0] Xpredict = m.predict(data['Ytest'])[0]
if plot: if plot:
@ -396,13 +398,16 @@ def robot_wireless(max_iters=100, kernel=None, optimize=True, plot=True):
sse = ((data['Xtest'] - Xpredict)**2).sum() sse = ((data['Xtest'] - Xpredict)**2).sum()
print m
print('Sum of squares error on test data: ' + str(sse)) print('Sum of squares error on test data: ' + str(sse))
return m return m
def silhouette(max_iters=100, optimize=True, plot=True): def silhouette(max_iters=100, optimize=True, plot=True):
"""Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper.""" """Predict the pose of a figure given a silhouette. This is a task from Agarwal and Triggs 2004 ICML paper."""
data = GPy.util.datasets.silhouette() try:import pods
except ImportError:
print 'pods unavailable, see https://github.com/sods/ods for example datasets'
return
data = pods.datasets.silhouette()
# create simple GP Model # create simple GP Model
m = GPy.models.GPRegression(data['X'], data['Y']) m = GPy.models.GPRegression(data['X'], data['Y'])
@ -414,32 +419,38 @@ def silhouette(max_iters=100, optimize=True, plot=True):
print m print m
return m return m
def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True): def sparse_GP_regression_1D(num_samples=400, num_inducing=5, max_iters=100, optimize=True, plot=True, checkgrad=False):
"""Run a 1D example of a sparse GP regression.""" """Run a 1D example of a sparse GP regression."""
# sample inputs and outputs # sample inputs and outputs
X = np.random.uniform(-3., 3., (num_samples, 1)) X = np.random.uniform(-3., 3., (num_samples, 1))
Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05 Y = np.sin(X) + np.random.randn(num_samples, 1) * 0.05
# construct kernel # construct kernel
rbf = GPy.kern.rbf(1) rbf = GPy.kern.RBF(1)
# create simple GP Model # create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
m.checkgrad(verbose=1)
if checkgrad:
m.checkgrad()
if optimize: if optimize:
m.optimize('tnc', messages=1, max_iters=max_iters) m.optimize('tnc', max_iters=max_iters)
if plot: if plot:
m.plot() m.plot()
return m return m
def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True): def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, optimize=True, plot=True, nan=False):
"""Run a 2D example of a sparse GP regression.""" """Run a 2D example of a sparse GP regression."""
np.random.seed(1234)
X = np.random.uniform(-3., 3., (num_samples, 2)) X = np.random.uniform(-3., 3., (num_samples, 2))
Y = np.sin(X[:, 0:1]) * np.sin(X[:, 1:2]) + np.random.randn(num_samples, 1) * 0.05 Y = np.sin(X[:, 0:1]) * np.sin(X[:, 1:2]) + np.random.randn(num_samples, 1) * 0.05
if nan:
inan = np.random.binomial(1,.2,size=Y.shape)
Y[inan] = np.nan
# construct kernel # construct kernel
rbf = GPy.kern.rbf(2) rbf = GPy.kern.RBF(2)
# create simple GP Model # create simple GP Model
m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing) m = GPy.models.SparseGPRegression(X, Y, kernel=rbf, num_inducing=num_inducing)
@ -462,7 +473,7 @@ def sparse_GP_regression_2D(num_samples=400, num_inducing=50, max_iters=100, opt
def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True): def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
"""Run a 1D example of a sparse GP regression with uncertain inputs.""" """Run a 1D example of a sparse GP regression with uncertain inputs."""
fig, axes = pb.subplots(1, 2, figsize=(12, 5)) fig, axes = pb.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True)
# sample inputs and outputs # sample inputs and outputs
S = np.ones((20, 1)) S = np.ones((20, 1))
@ -471,8 +482,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
# likelihood = GPy.likelihoods.Gaussian(Y) # likelihood = GPy.likelihoods.Gaussian(Y)
Z = np.random.uniform(-3., 3., (7, 1)) Z = np.random.uniform(-3., 3., (7, 1))
k = GPy.kern.rbf(1) k = GPy.kern.RBF(1)
# create simple GP Model - no input uncertainty on this one # create simple GP Model - no input uncertainty on this one
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z) m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z)
@ -485,7 +495,7 @@ def uncertain_inputs_sparse_regression(max_iters=200, optimize=True, plot=True):
print m print m
# the same Model with uncertainty # the same Model with uncertainty
m = GPy.models.SparseGPRegression(X, Y, kernel=k, Z=Z, X_variance=S) m = GPy.models.SparseGPRegression(X, Y, kernel=GPy.kern.RBF(1), Z=Z, X_variance=S)
if optimize: if optimize:
m.optimize('scg', messages=1, max_iters=max_iters) m.optimize('scg', messages=1, max_iters=max_iters)
if plot: if plot:

View file

@ -1,37 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import pylab as pb
import numpy as np
import GPy
def toy_1d(optimize=True, plot=True):
N = 2000
M = 20
#create data
X = np.linspace(0,32,N)[:,None]
Z = np.linspace(0,32,M)[:,None]
Y = np.sin(X) + np.cos(0.3*X) + np.random.randn(*X.shape)/np.sqrt(50.)
m = GPy.models.SVIGPRegression(X,Y, batchsize=10, Z=Z)
m.constrain_bounded('noise_variance',1e-3,1e-1)
m.constrain_bounded('white_variance',1e-3,1e-1)
m.param_steplength = 1e-4
if plot:
fig = pb.figure()
ax = fig.add_subplot(111)
def cb(foo):
ax.cla()
m.plot(ax=ax,Z_height=-3)
ax.set_ylim(-3,3)
fig.canvas.draw()
if optimize:
m.optimize(500, callback=cb, callback_interval=1)
if plot:
m.plot_traces()
return m

View file

@ -1,153 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Code of Tutorials
"""
import pylab as pb
pb.ion()
import numpy as np
import GPy
def tuto_GP_regression(optimize=True, plot=True):
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
kernel = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
m = GPy.models.GPRegression(X, Y, kernel)
print m
if plot:
m.plot()
m.constrain_positive('')
m.unconstrain('') # may be used to remove the previous constrains
m.constrain_positive('.*rbf_variance')
m.constrain_bounded('.*lengthscale',1.,10. )
m.constrain_fixed('.*noise',0.0025)
if optimize:
m.optimize()
m.optimize_restarts(num_restarts = 10)
#######################################################
#######################################################
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GPRegression(X, Y, ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
if optimize:
m.optimize('tnc', max_f_eval = 1000)
if plot:
m.plot()
print m
return(m)
def tuto_kernel_overview(optimize=True, plot=True):
"""The detailed explanations of the commands used in this file can be found in the tutorial section"""
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(input_dim=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(input_dim=1, variance = .75, lengthscale=2.)
ker3 = GPy.kern.rbf(1, .5, .5)
print ker2
if plot:
ker1.plot()
ker2.plot()
ker3.plot()
k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels
k_prod = k1.prod(k2) # By default, tensor=False
k_prodtens = k1.prod(k2,tensor=True)
# Sum of kernels
k_add = k1.add(k2) # By default, tensor=False
k_addtens = k1.add(k2,tensor=True)
k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
k = k1 * k2 # equivalent to k = k1.prod(k2)
print k
# Simulate sample paths
X = np.linspace(-5,5,501)[:,None]
Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.Matern32(1)
k3 = GPy.kern.white(1)
k = k1 + k2 + k3
print k
k.constrain_positive('.*var')
k.constrain_fixed(np.array([1]),1.75)
k.tie_params('.*len')
k.unconstrain('white')
k.constrain_bounded('white',lower=1e-5,upper=.5)
print k
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod(k_cst + k_mat,tensor=True)
print Kanova
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model
m = GPy.models.GPRegression(X, Y, Kanova)
if plot:
fig = pb.figure(figsize=(5,5))
ax = fig.add_subplot(111)
m.plot(ax=ax)
pb.figure(figsize=(20,3))
pb.subplots_adjust(wspace=0.5)
axs = pb.subplot(1,5,1)
m.plot(ax=axs)
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,3)
m.plot(ax=axs, which_parts=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,4)
m.plot(ax=axs, which_parts=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
axs = pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(ax=axs, which_parts=[False,False,False,True])
return(m)
def model_interaction(optimize=True, plot=True):
X = np.random.randn(20,1)
Y = np.sin(X) + np.random.randn(*X.shape)*0.01 + 5.
k = GPy.kern.rbf(1) + GPy.kern.bias(1)
m = GPy.models.GPRegression(X, Y, kernel=k)
return m

View file

@ -1,7 +0,0 @@
# This is the configuration file for GPy
[parallel]
# Enable openmp support. This speeds up some computations, depending on the number
# of cores available. Setting up a compiler with openmp support can be difficult on
# some platforms, hence this option.
openmp=False

View file

@ -0,0 +1,2 @@
import latent_function_inference
import optimization

View file

@ -0,0 +1,98 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
__doc__ = """
Inference over Gaussian process latent functions
In all our GP models, the consistency propery means that we have a Gaussian
prior over a finite set of points f. This prior is
math:: N(f | 0, K)
where K is the kernel matrix.
We also have a likelihood (see GPy.likelihoods) which defines how the data are
related to the latent function: p(y | f). If the likelihood is also a Gaussian,
the inference over f is tractable (see exact_gaussian_inference.py).
If the likelihood object is something other than Gaussian, then exact inference
is not tractable. We then resort to a Laplace approximation (laplace.py) or
expectation propagation (ep.py).
The inference methods return a
:class:`~GPy.inference.latent_function_inference.posterior.Posterior`
instance, which is a simple
structure which contains a summary of the posterior. The model classes can then
use this posterior object for making predictions, optimizing hyper-parameters,
etc.
"""
class LatentFunctionInference(object):
def on_optimization_start(self):
"""
This function gets called, just before the optimization loop to start.
"""
pass
def on_optimization_end(self):
"""
This function gets called, just after the optimization loop ended.
"""
pass
class InferenceMethodList(LatentFunctionInference, list):
def on_optimization_start(self):
for inf in self:
inf.on_optimization_start()
def on_optimization_end(self):
for inf in self:
inf.on_optimization_end()
def __getstate__(self):
state = []
for inf in self:
state.append(inf)
return state
def __setstate__(self, state):
for inf in state:
self.append(inf)
from exact_gaussian_inference import ExactGaussianInference
from laplace import Laplace
from GPy.inference.latent_function_inference.var_dtc import VarDTC
from expectation_propagation import EP
from expectation_propagation_dtc import EPDTC
from dtc import DTC
from fitc import FITC
from var_dtc_parallel import VarDTC_minibatch
# class FullLatentFunctionData(object):
#
#
# class EMLikeLatentFunctionInference(LatentFunctionInference):
# def update_approximation(self):
# """
# This function gets called when the
# """
#
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
# """
# Do inference on the latent functions given a covariance function `kern`,
# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`.
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
# """
# raise NotImplementedError, "Abstract base class for full inference"
#
# class VariationalLatentFunctionInference(LatentFunctionInference):
# def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
# """
# Do inference on the latent functions given a covariance function `kern`,
# inputs and outputs `X` and `Y`, inducing_inputs `Z`, and a likelihood `likelihood`.
# Additional metadata for the outputs `Y` can be given in `Y_metadata`.
# """
# raise NotImplementedError, "Abstract base class for full inference"

View file

@ -0,0 +1,162 @@
# Copyright (c) 2012-2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class DTC(LatentFunctionInference):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
NB. It's not recommended to use this function! It's here for historical purposes.
"""
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z)
Knn = kern.Kdiag(X)
Knm = kern.K(X, Z)
U = Knm
Uy = np.dot(U.T,Y)
#factor Kmm
Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A
LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta)
A = tdot(LiUTbeta) + np.eye(num_inducing)
# factor A
LA = jitchol(A)
# back substutue to get b, P, v
tmp, _ = dtrtrs(L, Uy, lower=1)
b, _ = dtrtrs(LA, tmp*beta, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
P = tdot(tmp.T)
#compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*num_data*output_dim*np.log(beta) + \
-0.5*beta*np.sum(np.square(Y)) + \
0.5*np.sum(np.square(b))
# Compute dL_dKmm
vvT_P = tdot(v.reshape(-1,1)) + P
dL_dK = 0.5*(Kmmi - vvT_P)
# Compute dL_dU
vY = np.dot(v.reshape(-1,1),Y.T)
dL_dU = vY - np.dot(vvT_P, U.T)
dL_dU *= beta
#compute dL_dR
Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn), 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
return post, log_marginal, grad_dict
class vDTC(object):
def __init__(self):
self.const_jitter = 1e-6
def inference(self, kern, X, X_variance, Z, likelihood, Y, Y_metadata):
assert X_variance is None, "cannot use X_variance with DTC. Try varDTC."
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
beta = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1:
raise NotImplementedError, "no hetero noise with this implementation of DTC"
Kmm = kern.K(Z)
Knn = kern.Kdiag(X)
Knm = kern.K(X, Z)
U = Knm
Uy = np.dot(U.T,Y)
#factor Kmm
Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A
LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta)
A_ = tdot(LiUTbeta)
trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_))
A = A_ + np.eye(num_inducing)
# factor A
LA = jitchol(A)
# back substutue to get b, P, v
tmp, _ = dtrtrs(L, Uy, lower=1)
b, _ = dtrtrs(LA, tmp*beta, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
P = tdot(tmp.T)
stop
#compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*num_data*output_dim*np.log(beta) + \
-0.5*beta*np.sum(np.square(Y)) + \
0.5*np.sum(np.square(b)) + \
trace_term
# Compute dL_dKmm
vvT_P = tdot(v.reshape(-1,1)) + P
LAL = Li.T.dot(A).dot(Li)
dL_dK = Kmmi - 0.5*(vvT_P + LAL)
# Compute dL_dU
vY = np.dot(v.reshape(-1,1),Y.T)
#dL_dU = vY - np.dot(vvT_P, U.T)
dL_dU = vY - np.dot(vvT_P - Kmmi, U.T)
dL_dU *= beta
#compute dL_dR
Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2
dL_dR -=beta*trace_term/num_data
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
return post, log_marginal, grad_dict

View file

@ -0,0 +1,59 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import pdinv, dpotrs, tdot
from ...util import diag
import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class ExactGaussianInference(LatentFunctionInference):
"""
An object for inference when the likelihood is Gaussian.
The function self.inference returns a Posterior object, which summarizes
the posterior.
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
"""
def __init__(self):
pass#self._YYTfactor_cache = caching.cache()
def get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LL^T = YY^T.
Note that L may have fewer columns than Y, else L=Y.
"""
N, D = Y.shape
if (N>D):
return Y
else:
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
return Y
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
"""
Returns a Posterior class containing essential quantities of the posterior
"""
YYT_factor = self.get_YYTfactor(Y)
K = kern.K(X)
Ky = K.copy()
diag.add(Ky, likelihood.gaussian_variance(Y_metadata))
Wi, LW, LWi, W_logdet = pdinv(Ky)
alpha, _ = dpotrs(LW, YYT_factor, lower=1)
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}

View file

@ -0,0 +1,122 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs
from posterior import Posterior
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class EP(LatentFunctionInference):
def __init__(self, epsilon=1e-6, eta=1., delta=1.):
"""
The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006.
:param epsilon: Convergence criterion, maximum squared difference allowed between mean updates to stop iterations (float)
:type epsilon: float
:param eta: parameter for fractional EP updates.
:type eta: float64
:param delta: damping EP updates factor.
:type delta: float64
"""
self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.reset()
def reset(self):
self.old_mutilde, self.old_vtilde = None, None
self._ep_approximation = None
def on_optimization_start(self):
self._ep_approximation = None
def on_optimization_end(self):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def inference(self, kern, X, likelihood, Y, Y_metadata=None, Z=None):
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
K = kern.K(X)
if self._ep_approximation is None:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde))
alpha, _ = dpotrs(LW, mu_tilde, lower=1)
log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat??
dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)
dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
num_data, data_dim = Y.shape
assert data_dim == 1, "This EP methods only works for 1D outputs"
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data)
Sigma = K.copy()
#Initial values - Marginal moments
Z_hat = np.empty(num_data,dtype=np.float64)
mu_hat = np.empty(num_data,dtype=np.float64)
sigma2_hat = np.empty(num_data,dtype=np.float64)
#initial values - Gaussian factors
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
else:
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
iterations = 0
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
tau_tilde[i] += delta_tau
v_tilde[i] += delta_v
#Posterior distribution parameters update
DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy
tau_tilde_root = np.sqrt(tau_tilde)
Sroot_tilde_K = tau_tilde_root[:,None] * K
B = np.eye(num_data) + Sroot_tilde_K * tau_tilde_root[None,:]
L = jitchol(B)
V, _ = dtrtrs(L, Sroot_tilde_K, lower=1)
Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,v_tilde)
#monitor convergence
if iterations>0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
iterations += 1
mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat

View file

@ -0,0 +1,351 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util import diag
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
from ...core.parameterization.variational import VariationalPosterior
from . import LatentFunctionInference
from posterior import Posterior
log_2_pi = np.log(2*np.pi)
class EPDTC(LatentFunctionInference):
const_jitter = 1e-6
def __init__(self, epsilon=1e-6, eta=1., delta=1., limit=1):
from ...util.caching import Cacher
self.limit = limit
self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.reset()
def set_limit(self, limit):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def on_optimization_start(self):
self._ep_approximation = None
def on_optimization_end(self):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def _get_trYYT(self, Y):
return np.sum(np.square(Y))
def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled.
return self.limit
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.limit = state
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
def _get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LLT = YYT.
Note that L may have fewer columns than Y.
"""
N, D = Y.shape
if (N>=D):
return Y
else:
return jitchol(tdot(Y))
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
def reset(self):
self.old_mutilde, self.old_vtilde = None, None
self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
Kmm = kern.K(Z)
Kmn = kern.K(Z,X)
if self._ep_approximation is None:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
psi0 = kern.psi0(Z, X)
psi1 = Kmn.T#kern.psi1(Z, X)
psi2 = kern.psi2(Z, X)
else:
uncertain_inputs = False
psi0 = kern.Kdiag(X)
psi1 = Kmn.T#kern.K(X, Z)
psi2 = None
#see whether we're using variational uncertain inputs
_, output_dim = Y.shape
#see whether we've got a different noise variance for each datum
#beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
beta = tau_tilde
VVT_factor = beta[:,None]*mu_tilde[:,None]
trYYT = self.get_trYYT(mu_tilde[:,None])
# do the inference:
het_noise = beta.size > 1
num_inducing = Z.shape[0]
num_data = Y.shape[0]
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
# The rather complex computations of A
if uncertain_inputs:
if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else:
psi2_beta = psi2.sum(0) * beta
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places
dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, mu_tilde[:,None])
dL_dthetaL = 0#likelihood.exact_inference_gradients(dL_dR,Y_metadata)
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'dL_dthetaL':dL_dthetaL}
else:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
if VVT_factor.shape[1] == Y.shape[1]:
woodbury_vector = Cpsi1Vf # == Cpsi1V
else:
print 'foobar'
psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi)
#construct a posterior object
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
num_data, data_dim = Y.shape
assert data_dim == 1, "This EP methods only works for 1D outputs"
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = dtrtrs(Lm,np.eye(Lm.shape[0]))[0] #chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data)
LLT = Kmm.copy() #Sigma = K.copy()
Sigma_diag = Qnn_diag.copy()
#Initial values - Marginal moments
Z_hat = np.empty(num_data,dtype=np.float64)
mu_hat = np.empty(num_data,dtype=np.float64)
sigma2_hat = np.empty(num_data,dtype=np.float64)
#initial values - Gaussian factors
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
else:
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
iterations = 0
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
tau_cav = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde[i] += delta_tau
v_tilde[i] += delta_v
#Posterior distribution parameters update
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (delta_v-delta_tau*mu[i])*si
#mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V2.T,V2)
mu = np.dot(Sigma,v_tilde)
#monitor convergence
if iterations>0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
tau_diff = 0
v_diff = 0
iterations += 1
mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if het_noise:
if uncertain_inputs:
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
dL_dpsi2 = None
else:
dL_dpsi2 = beta * dL_dpsi2_beta
if uncertain_inputs:
# repeat for each of the N psi_2 matrices
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
else:
# subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None
return dL_dpsi0, dL_dpsi1, dL_dpsi2
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y):
# the partial derivative vector for the likelihood
if likelihood.size == 0:
# save computation here.
dL_dR = None
elif het_noise:
if uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else:
#from ...util.linalg import chol_inv
#LBi = chol_inv(LB)
LBi, _ = dtrtrs(LB,np.eye(LB.shape[0]))
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
dL_dR = -0.5 * beta + 0.5 * (beta*Y)**2
dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
else:
# likelihood is not heteroscedatic
dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
return dL_dR
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y):
#compute log marginal likelihood
if het_noise:
lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1))
lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A))
else:
lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
lik_4 = 0.5 * data_fit
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
return log_marginal

View file

@ -0,0 +1,89 @@
# Copyright (c) 2012, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import jitchol, tdot, dtrtrs, dpotri, pdinv
from ...util import diag
import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
class FITC(LatentFunctionInference):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
"""
const_jitter = 1e-6
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
num_inducing, _ = Z.shape
num_data, output_dim = Y.shape
#make sure the noise is not hetero
sigma_n = likelihood.gaussian_variance(Y_metadata)
if sigma_n.size >1:
raise NotImplementedError, "no hetero noise with this implementation of FITC"
Kmm = kern.K(Z)
Knn = kern.Kdiag(X)
Knm = kern.K(X, Z)
U = Knm
#factor Kmm
diag.add(Kmm, self.const_jitter)
Kmmi, L, Li, _ = pdinv(Kmm)
#compute beta_star, the effective noise precision
LiUT = np.dot(Li, U.T)
sigma_star = Knn + sigma_n - np.sum(np.square(LiUT),0)
beta_star = 1./sigma_star
# Compute and factor A
A = tdot(LiUT*np.sqrt(beta_star)) + np.eye(num_inducing)
LA = jitchol(A)
# back substutue to get b, P, v
URiy = np.dot(U.T*beta_star,Y)
tmp, _ = dtrtrs(L, URiy, lower=1)
b, _ = dtrtrs(LA, tmp, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
P = tdot(tmp.T)
#compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*output_dim*np.sum(np.log(beta_star)) + \
-0.5*np.sum(np.square(Y.T*np.sqrt(beta_star))) + \
0.5*np.sum(np.square(b))
#compute dL_dR
Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta_star + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta_star**2
# Compute dL_dKmm
vvT_P = tdot(v.reshape(-1,1)) + P
dL_dK = 0.5*(Kmmi - vvT_P)
KiU = np.dot(Kmmi, U.T)
dL_dK += np.dot(KiU*dL_dR, KiU.T)
# Compute dL_dU
vY = np.dot(v.reshape(-1,1),Y.T)
dL_dU = vY - np.dot(vvT_P, U.T)
dL_dU *= beta_star
dL_dU -= 2.*KiU*dL_dR
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':dL_dR, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
#construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)
return post, log_marginal, grad_dict

View file

@ -0,0 +1,162 @@
# Copyright (c) 2014, Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...core import Model
from ...core.parameterization import variational
def infer_newX(model, Y_new, optimize=True, init='L2'):
"""
Infer the distribution of X for the new observed data *Y_new*.
:param model: the GPy model used in inference
:type model: GPy.core.Model
:param Y_new: the new observed data for inference
:type Y_new: numpy.ndarray
:param optimize: whether to optimize the location of new X (True by default)
:type optimize: boolean
:return: a tuple containing the estimated posterior distribution of X and the model that optimize X
:rtype: (GPy.core.parameterization.variational.VariationalPosterior, GPy.core.Model)
"""
infr_m = InferenceX(model, Y_new, init=init)
if optimize:
infr_m.optimize()
return infr_m.X, infr_m
class InferenceX(Model):
"""
The class for inference of new X with given new Y. (do_test_latent)
:param model: the GPy model used in inference
:type model: GPy.core.Model
:param Y: the new observed data for inference
:type Y: numpy.ndarray
"""
def __init__(self, model, Y, name='inferenceX', init='L2'):
if np.isnan(Y).any() or getattr(model, 'missing_data', False):
assert Y.shape[0]==1, "The current implementation of inference X only support one data point at a time with missing data!"
self.missing_data = True
self.valid_dim = np.logical_not(np.isnan(Y[0]))
self.ninan = getattr(model, 'ninan', None)
else:
self.missing_data = False
super(InferenceX, self).__init__(name)
self.likelihood = model.likelihood.copy()
self.kern = model.kern.copy()
if model.kern.useGPU:
from ...models import SSGPLVM
if isinstance(model, SSGPLVM):
self.kern.GPU_SSRBF(True)
else:
self.kern.GPU(True)
from copy import deepcopy
self.posterior = deepcopy(model.posterior)
if hasattr(model, 'variational_prior'):
self.uncertain_input = True
self.variational_prior = model.variational_prior.copy()
else:
self.uncertain_input = False
if hasattr(model, 'inducing_inputs'):
self.sparse_gp = True
self.Z = model.Z.copy()
else:
self.sparse_gp = False
self.uncertain_input = False
self.Z = model.X.copy()
self.Y = Y
self.X = self._init_X(model, Y, init=init)
self.compute_dL()
self.link_parameter(self.X)
def _init_X(self, model, Y_new, init='L2'):
# Initialize the new X by finding the nearest point in Y space.
Y = model.Y
if self.missing_data:
Y = Y[:,self.valid_dim]
Y_new = Y_new[:,self.valid_dim]
dist = -2.*Y_new.dot(Y.T) + np.square(Y_new).sum(axis=1)[:,None]+ np.square(Y).sum(axis=1)[None,:]
else:
if init=='L2':
dist = -2.*Y_new.dot(Y.T) + np.square(Y_new).sum(axis=1)[:,None]+ np.square(Y).sum(axis=1)[None,:]
elif init=='NCC':
dist = Y_new.dot(Y.T)
elif init=='rand':
dist = np.random.rand(Y_new.shape[0],Y.shape[0])
idx = dist.argmin(axis=1)
from ...models import SSGPLVM
from ...util.misc import param_to_array
if isinstance(model, SSGPLVM):
X = variational.SpikeAndSlabPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]), param_to_array(model.X.gamma[idx]))
if model.group_spike:
X.gamma.fix()
else:
if self.uncertain_input and self.sparse_gp:
X = variational.NormalPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]))
else:
from ...core import Param
X = Param('latent mean',param_to_array(model.X[idx]).copy())
return X
def compute_dL(self):
# Common computation
beta = 1./np.fmax(self.likelihood.variance, 1e-6)
output_dim = self.Y.shape[-1]
wv = self.posterior.woodbury_vector
if self.missing_data:
wv = wv[:,self.valid_dim]
output_dim = self.valid_dim.sum()
if self.ninan is not None:
self.dL_dpsi2 = beta/2.*(self.posterior.woodbury_inv[:,:,self.valid_dim] - np.einsum('md,od->mo',wv, wv)[:, :, None]).sum(-1)
else:
self.dL_dpsi2 = beta/2.*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))
self.dL_dpsi1 = beta*np.dot(self.Y[:,self.valid_dim], wv.T)
self.dL_dpsi0 = - beta/2.* np.ones(self.Y.shape[0])
else:
self.dL_dpsi2 = beta*(output_dim*self.posterior.woodbury_inv - np.einsum('md,od->mo',wv, wv))/2.
self.dL_dpsi1 = beta*np.dot(self.Y, wv.T)
self.dL_dpsi0 = -beta/2.*output_dim* np.ones(self.Y.shape[0])
def parameters_changed(self):
if self.uncertain_input:
psi0 = self.kern.psi0(self.Z, self.X)
psi1 = self.kern.psi1(self.Z, self.X)
psi2 = self.kern.psi2(self.Z, self.X)
else:
psi0 = self.kern.Kdiag(self.X)
psi1 = self.kern.K(self.X, self.Z)
psi2 = np.dot(psi1.T,psi1)
self._log_marginal_likelihood = (self.dL_dpsi2*psi2).sum()+(self.dL_dpsi1*psi1).sum()+(self.dL_dpsi0*psi0).sum()
if self.uncertain_input:
X_grad = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, dL_dpsi0=self.dL_dpsi0, dL_dpsi1=self.dL_dpsi1, dL_dpsi2=self.dL_dpsi2)
self.X.set_gradients(X_grad)
else:
dL_dpsi1 = self.dL_dpsi1 + 2.*np.dot(psi1,self.dL_dpsi2)
X_grad = self.kern.gradients_X_diag(self.dL_dpsi0, self.X)
X_grad += self.kern.gradients_X(dL_dpsi1, self.X, self.Z)
self.X.gradient = X_grad
if self.uncertain_input:
from ...core.parameterization.variational import SpikeAndSlabPrior
if isinstance(self.variational_prior, SpikeAndSlabPrior):
# Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X, N=self.Y.shape[0])
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X, N=self.Y.shape[0])
else:
# Update Log-likelihood
KL_div = self.variational_prior.KL_divergence(self.X)
# update for the KL divergence
self.variational_prior.update_gradients_KL(self.X)
self._log_marginal_likelihood += -KL_div
def log_likelihood(self):
return self._log_marginal_likelihood

View file

@ -0,0 +1,251 @@
# Copyright (c) 2013, 2014 Alan Saul
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
#Parts of this file were influenced by the Matlab GPML framework written by
#Carl Edward Rasmussen & Hannes Nickisch, however all bugs are our own.
#
#The GPML code is released under the FreeBSD License.
#Copyright (c) 2005-2013 Carl Edward Rasmussen & Hannes Nickisch. All rights reserved.
#
#The code and associated documentation is available from
#http://gaussianprocess.org/gpml/code.
import numpy as np
from ...util.linalg import mdot, jitchol, dpotrs, dtrtrs, dpotri, symmetrify, pdinv
from posterior import Posterior
import warnings
def warning_on_one_line(message, category, filename, lineno, file=None, line=None):
return ' %s:%s: %s:%s\n' % (filename, lineno, category.__name__, message)
warnings.formatwarning = warning_on_one_line
from scipy import optimize
from . import LatentFunctionInference
class Laplace(LatentFunctionInference):
def __init__(self):
"""
Laplace Approximation
Find the moments \hat{f} and the hessian at this point
(using Newton-Raphson) of the unnormalised posterior
"""
self._mode_finding_tolerance = 1e-7
self._mode_finding_max_iter = 60
self.bad_fhat = False
#Store whether it is the first run of the inference so that we can choose whether we need
#to calculate things or reuse old variables
self.first_run = True
self._previous_Ki_fhat = None
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
"""
Returns a Posterior class containing essential quantities of the posterior
"""
# Compute K
K = kern.K(X)
#Find mode
if self.bad_fhat or self.first_run:
Ki_f_init = np.zeros_like(Y)
first_run = False
else:
Ki_f_init = self._previous_Ki_fhat
f_hat, Ki_fhat = self.rasm_mode(K, Y, likelihood, Ki_f_init, Y_metadata=Y_metadata)
self.f_hat = f_hat
self.Ki_fhat = Ki_fhat
self.K = K.copy()
#Compute hessian and other variables at mode
log_marginal, woodbury_inv, dL_dK, dL_dthetaL = self.mode_computations(f_hat, Ki_fhat, K, Y, likelihood, kern, Y_metadata)
self._previous_Ki_fhat = Ki_fhat.copy()
return Posterior(woodbury_vector=Ki_fhat, woodbury_inv=woodbury_inv, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def rasm_mode(self, K, Y, likelihood, Ki_f_init, Y_metadata=None):
"""
Rasmussen's numerically stable mode finding
For nomenclature see Rasmussen & Williams 2006
Influenced by GPML (BSD) code, all errors are our own
:param K: Covariance matrix evaluated at locations X
:type K: NxD matrix
:param Y: The data
:type Y: np.ndarray
:param likelihood: the likelihood of the latent function value for the given data
:type likelihood: a GPy.likelihood object
:param Ki_f_init: the initial guess at the mode
:type Ki_f_init: np.ndarray
:param Y_metadata: information about the data, e.g. which likelihood to take from a multi-likelihood object
:type Y_metadata: np.ndarray | None
:returns: f_hat, mode on which to make laplace approxmiation
:rtype: np.ndarray
"""
Ki_f = Ki_f_init.copy()
f = np.dot(K, Ki_f)
#define the objective function (to be maximised)
def obj(Ki_f, f):
return -0.5*np.dot(Ki_f.flatten(), f.flatten()) + np.sum(likelihood.logpdf(f, Y, Y_metadata=Y_metadata))
difference = np.inf
iteration = 0
while difference > self._mode_finding_tolerance and iteration < self._mode_finding_max_iter:
W = -likelihood.d2logpdf_df2(f, Y, Y_metadata=Y_metadata)
if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN')
grad = likelihood.dlogpdf_df(f, Y, Y_metadata=Y_metadata)
if np.any(np.isnan(grad)):
raise ValueError('One or more element(s) of grad is NaN')
W_f = W*f
b = W_f + grad # R+W p46 line 6.
W12BiW12, _, _ = self._compute_B_statistics(K, W, likelihood.log_concave)
W12BiW12Kb = np.dot(W12BiW12, np.dot(K, b))
#Work out the DIRECTION that we want to move in, but don't choose the stepsize yet
full_step_Ki_f = b - W12BiW12Kb # full_step_Ki_f = a in R&W p46 line 6.
dKi_f = full_step_Ki_f - Ki_f
#define an objective for the line search (minimize this one)
def inner_obj(step_size):
Ki_f_trial = Ki_f + step_size*dKi_f
f_trial = np.dot(K, Ki_f_trial)
return -obj(Ki_f_trial, f_trial)
#use scipy for the line search, the compute new values of f, Ki_f
step = optimize.brent(inner_obj, tol=1e-4, maxiter=12)
Ki_f_new = Ki_f + step*dKi_f
f_new = np.dot(K, Ki_f_new)
difference = np.abs(np.sum(f_new - f)) + np.abs(np.sum(Ki_f_new - Ki_f))
Ki_f = Ki_f_new
f = f_new
iteration += 1
#Warn of bad fits
if difference > self._mode_finding_tolerance:
if not self.bad_fhat:
warnings.warn("Not perfect mode found (f_hat). difference: {}, iteration: {} out of max {}".format(difference, iteration, self._mode_finding_max_iter))
self.bad_fhat = True
elif self.bad_fhat:
self.bad_fhat = False
warnings.warn("f_hat now fine again. difference: {}, iteration: {} out of max {}".format(difference, iteration, self._mode_finding_max_iter))
return f, Ki_f
def mode_computations(self, f_hat, Ki_f, K, Y, likelihood, kern, Y_metadata):
"""
At the mode, compute the hessian and effective covariance matrix.
returns: logZ : approximation to the marginal likelihood
woodbury_inv : variable required for calculating the approximation to the covariance matrix
dL_dthetaL : array of derivatives (1 x num_kernel_params)
dL_dthetaL : array of derivatives (1 x num_likelihood_params)
"""
#At this point get the hessian matrix (or vector as W is diagonal)
W = -likelihood.d2logpdf_df2(f_hat, Y, Y_metadata=Y_metadata)
if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN')
K_Wi_i, L, LiW12 = self._compute_B_statistics(K, W, likelihood.log_concave)
#compute vital matrices
C = np.dot(LiW12, K)
Ki_W_i = K - C.T.dot(C)
#compute the log marginal
log_marginal = -0.5*np.dot(Ki_f.flatten(), f_hat.flatten()) + np.sum(likelihood.logpdf(f_hat, Y, Y_metadata=Y_metadata)) - np.sum(np.log(np.diag(L)))
# Compute matrices for derivatives
dW_df = -likelihood.d3logpdf_df3(f_hat, Y, Y_metadata=Y_metadata) # -d3lik_d3fhat
if np.any(np.isnan(dW_df)):
raise ValueError('One or more element(s) of dW_df is NaN')
dL_dfhat = -0.5*(np.diag(Ki_W_i)[:, None]*dW_df) # s2 in R&W p126 line 9.
#BiK, _ = dpotrs(L, K, lower=1)
#dL_dfhat = 0.5*np.diag(BiK)[:, None]*dW_df
I_KW_i = np.eye(Y.shape[0]) - np.dot(K, K_Wi_i)
####################
# compute dL_dK #
####################
if kern.size > 0 and not kern.is_fixed:
#Explicit
explicit_part = 0.5*(np.dot(Ki_f, Ki_f.T) - K_Wi_i)
#Implicit
implicit_part = np.dot(Ki_f, dL_dfhat.T).dot(I_KW_i)
dL_dK = explicit_part + implicit_part
else:
dL_dK = np.zeros(likelihood.size)
####################
#compute dL_dthetaL#
####################
if likelihood.size > 0 and not likelihood.is_fixed:
dlik_dthetaL, dlik_grad_dthetaL, dlik_hess_dthetaL = likelihood._laplace_gradients(f_hat, Y, Y_metadata=Y_metadata)
num_params = likelihood.size
# make space for one derivative for each likelihood parameter
dL_dthetaL = np.zeros(num_params)
for thetaL_i in range(num_params):
#Explicit
dL_dthetaL_exp = ( np.sum(dlik_dthetaL[thetaL_i])
# The + comes from the fact that dlik_hess_dthetaL == -dW_dthetaL
+ 0.5*np.sum(np.diag(Ki_W_i).flatten()*dlik_hess_dthetaL[:, thetaL_i].flatten())
)
#Implicit
dfhat_dthetaL = mdot(I_KW_i, K, dlik_grad_dthetaL[:, thetaL_i])
#dfhat_dthetaL = mdot(Ki_W_i, dlik_grad_dthetaL[:, thetaL_i])
dL_dthetaL_imp = np.dot(dL_dfhat.T, dfhat_dthetaL)
dL_dthetaL[thetaL_i] = dL_dthetaL_exp + dL_dthetaL_imp
else:
dL_dthetaL = np.zeros(likelihood.size)
return log_marginal, K_Wi_i, dL_dK, dL_dthetaL
def _compute_B_statistics(self, K, W, log_concave):
"""
Rasmussen suggests the use of a numerically stable positive definite matrix B
Which has a positive diagonal elements and can be easily inverted
:param K: Prior Covariance matrix evaluated at locations X
:type K: NxN matrix
:param W: Negative hessian at a point (diagonal matrix)
:type W: Vector of diagonal values of Hessian (1xN)
:returns: (W12BiW12, L_B, Li_W12)
"""
if not log_concave:
#print "Under 1e-10: {}".format(np.sum(W < 1e-6))
W[W<1e-6] = 1e-6
# NOTE: when setting a parameter inside parameters_changed it will allways come to closed update circles!!!
#W.__setitem__(W < 1e-6, 1e-6, update=False) # FIXME-HACK: This is a hack since GPy can't handle negative variances which can occur
# If the likelihood is non-log-concave. We wan't to say that there is a negative variance
# To cause the posterior to become less certain than the prior and likelihood,
# This is a property only held by non-log-concave likelihoods
if np.any(np.isnan(W)):
raise ValueError('One or more element(s) of W is NaN')
#W is diagonal so its sqrt is just the sqrt of the diagonal elements
W_12 = np.sqrt(W)
B = np.eye(K.shape[0]) + W_12*K*W_12.T
L = jitchol(B)
LiW12, _ = dtrtrs(L, np.diagflat(W_12), lower=1, trans=0)
K_Wi_i = np.dot(LiW12.T, LiW12) # R = W12BiW12, in R&W p 126, eq 5.25
#here's a better way to compute the required matrix.
# you could do the model finding witha backsub, instead of a dot...
#L2 = L/W_12
#K_Wi_i_2 , _= dpotri(L2)
#symmetrify(K_Wi_i_2)
return K_Wi_i, L, LiW12

View file

@ -0,0 +1,186 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util.linalg import pdinv, dpotrs, dpotri, symmetrify, jitchol
class Posterior(object):
"""
An object to represent a Gaussian posterior over latent function values, p(f|D).
This may be computed exactly for Gaussian likelihoods, or approximated for
non-Gaussian likelihoods.
The purpose of this class is to serve as an interface between the inference
schemes and the model classes. the model class can make predictions for
the function at any new point x_* by integrating over this posterior.
"""
def __init__(self, woodbury_chol=None, woodbury_vector=None, K=None, mean=None, cov=None, K_chol=None, woodbury_inv=None):
"""
woodbury_chol : a lower triangular matrix L that satisfies posterior_covariance = K - K L^{-T} L^{-1} K
woodbury_vector : a matrix (or vector, as Nx1 matrix) M which satisfies posterior_mean = K M
K : the proir covariance (required for lazy computation of various quantities)
mean : the posterior mean
cov : the posterior covariance
Not all of the above need to be supplied! You *must* supply:
K (for lazy computation)
or
K_chol (for lazy computation)
You may supply either:
woodbury_chol
woodbury_vector
Or:
mean
cov
Of course, you can supply more than that, but this class will lazily
compute all other quantites on demand.
"""
#obligatory
self._K = K
if ((woodbury_chol is not None) and (woodbury_vector is not None))\
or ((woodbury_inv is not None) and (woodbury_vector is not None))\
or ((woodbury_inv is not None) and (mean is not None))\
or ((mean is not None) and (cov is not None)):
pass # we have sufficient to compute the posterior
else:
raise ValueError, "insufficient information to compute the posterior"
self._K_chol = K_chol
self._K = K
#option 1:
self._woodbury_chol = woodbury_chol
self._woodbury_vector = woodbury_vector
#option 2.
self._woodbury_inv = woodbury_inv
#and woodbury vector
#option 2:
self._mean = mean
self._covariance = cov
#compute this lazily
self._precision = None
@property
def mean(self):
"""
Posterior mean
$$
K_{xx}v
v := \texttt{Woodbury vector}
$$
"""
if self._mean is None:
self._mean = np.dot(self._K, self.woodbury_vector)
return self._mean
@property
def covariance(self):
"""
Posterior covariance
$$
K_{xx} - K_{xx}W_{xx}^{-1}K_{xx}
W_{xx} := \texttt{Woodbury inv}
$$
"""
if self._covariance is None:
#LiK, _ = dtrtrs(self.woodbury_chol, self._K, lower=1)
self._covariance = (np.atleast_3d(self._K) - np.tensordot(np.dot(np.atleast_3d(self.woodbury_inv).T, self._K), self._K, [1,0]).T).squeeze()
#self._covariance = self._K - self._K.dot(self.woodbury_inv).dot(self._K)
return self._covariance
@property
def precision(self):
"""
Inverse of posterior covariance
"""
if self._precision is None:
cov = np.atleast_3d(self.covariance)
self._precision = np.zeros(cov.shape) # if one covariance per dimension
for p in xrange(cov.shape[-1]):
self._precision[:,:,p] = pdinv(cov[:,:,p])[0]
return self._precision
@property
def woodbury_chol(self):
"""
return $L_{W}$ where L is the lower triangular Cholesky decomposition of the Woodbury matrix
$$
L_{W}L_{W}^{\top} = W^{-1}
W^{-1} := \texttt{Woodbury inv}
$$
"""
if self._woodbury_chol is None:
#compute woodbury chol from
if self._woodbury_inv is not None:
winv = np.atleast_3d(self._woodbury_inv)
self._woodbury_chol = np.zeros(winv.shape)
for p in xrange(winv.shape[-1]):
self._woodbury_chol[:,:,p] = pdinv(winv[:,:,p])[2]
#Li = jitchol(self._woodbury_inv)
#self._woodbury_chol, _ = dtrtri(Li)
#W, _, _, _, = pdinv(self._woodbury_inv)
#symmetrify(W)
#self._woodbury_chol = jitchol(W)
#try computing woodbury chol from cov
elif self._covariance is not None:
raise NotImplementedError, "TODO: check code here"
B = self._K - self._covariance
tmp, _ = dpotrs(self.K_chol, B)
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
_, _, self._woodbury_chol, _ = pdinv(self._woodbury_inv)
else:
raise ValueError, "insufficient information to compute posterior"
return self._woodbury_chol
@property
def woodbury_inv(self):
"""
The inverse of the woodbury matrix, in the gaussian likelihood case it is defined as
$$
(K_{xx} + \Sigma_{xx})^{-1}
\Sigma_{xx} := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_inv is None:
if self._woodbury_chol is not None:
self._woodbury_inv, _ = dpotri(self._woodbury_chol, lower=1)
#self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
symmetrify(self._woodbury_inv)
elif self._covariance is not None:
B = self._K - self._covariance
tmp, _ = dpotrs(self.K_chol, B)
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
return self._woodbury_inv
@property
def woodbury_vector(self):
"""
Woodbury vector in the gaussian likelihood case only is defined as
$$
(K_{xx} + \Sigma)^{-1}Y
\Sigma := \texttt{Likelihood.variance / Approximate likelihood covariance}
$$
"""
if self._woodbury_vector is None:
self._woodbury_vector, _ = dpotrs(self.K_chol, self.mean)
return self._woodbury_vector
@property
def K_chol(self):
"""
Cholesky of the prior covariance K
"""
if self._K_chol is None:
self._K_chol = jitchol(self._K)
return self._K_chol

View file

@ -0,0 +1,249 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
from ...util import diag
from ...core.parameterization.variational import VariationalPosterior
import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
import logging, itertools
logger = logging.getLogger('vardtc')
class VarDTC(LatentFunctionInference):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
"""
const_jitter = 1e-6
def __init__(self, limit=1):
#self._YYTfactor_cache = caching.cache()
from ...util.caching import Cacher
self.limit = limit
self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
def set_limit(self, limit):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y):
return np.einsum("ij,ij->", Y, Y)
# faster than, but same as:
# return np.sum(np.square(Y))
def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled.
return self.limit
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.limit = state
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
def _get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LLT = YYT.
Note that L may have fewer columns than Y.
"""
N, D = Y.shape
if (N>=D):
return Y.view(np.ndarray)
else:
return jitchol(tdot(Y))
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None):
_, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior)
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = self.get_YYTfactor(Y)
#VVT_factor = self.get_VVTfactor(self.YYTfactor, beta)
het_noise = beta.size > 1
if beta.ndim == 1:
beta = beta[:, None]
VVT_factor = beta*Y
#VVT_factor = beta*Y
trYYT = self.get_trYYT(Y)
# do the inference:
num_inducing = Z.shape[0]
num_data = Y.shape[0]
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
if Lm is None:
Lm = jitchol(Kmm)
# The rather complex computations of A, and the psi stats
if uncertain_inputs:
psi0 = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X)
if het_noise:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
else:
psi2_beta = kern.psi2(Z,X) * beta
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
if het_noise:
tmp = psi1 * (np.sqrt(beta))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
if dL_dKmm is None:
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit, Y)
#noise derivatives
dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, Y, VVT_factor)
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)
#put the gradients in the right places
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'dL_dthetaL':dL_dthetaL}
else:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
if VVT_factor.shape[1] == Y.shape[1]:
woodbury_vector = Cpsi1Vf # == Cpsi1V
else:
print 'foobar'
import ipdb; ipdb.set_trace()
psi1V = np.dot(Y.T*beta, psi1).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi)
#construct a posterior object
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta* np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if het_noise:
if uncertain_inputs:
dL_dpsi2 = beta[:, None] * dL_dpsi2_beta[None, :, :]
else:
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta).T).T
dL_dpsi2 = None
else:
dL_dpsi2 = beta * dL_dpsi2_beta
if not uncertain_inputs:
# subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None
return dL_dpsi0, dL_dpsi1, dL_dpsi2
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y, VVT_factr=None):
# the partial derivative vector for the likelihood
if likelihood.size == 0:
# save computation here.
dL_dR = None
elif het_noise:
if uncertain_inputs:
raise NotImplementedError, "heteroscedatic derivates with uncertain inputs not implemented"
else:
#from ...util.linalg import chol_inv
#LBi = chol_inv(LB)
LBi, _ = dtrtrs(LB,np.eye(LB.shape[0]))
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
dL_dR = -0.5 * beta + 0.5 * VVT_factr**2
dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
else:
# likelihood is not heteroscedatic
dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
return dL_dR
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit, Y):
#compute log marginal likelihood
if het_noise:
lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * output_dim * np.sum(np.log(beta)) - 0.5 * np.sum(beta.ravel() * np.square(Y).sum(axis=-1))
lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A))
else:
lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
lik_4 = 0.5 * data_fit
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
return log_marginal

View file

@ -0,0 +1,479 @@
# Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv
from ...util import diag
from ...core.parameterization.variational import VariationalPosterior
import numpy as np
from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi)
try:
from mpi4py import MPI
except:
pass
class VarDTC_minibatch(LatentFunctionInference):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
"""
const_jitter = 1e-6
def __init__(self, batchsize=None, limit=1, mpi_comm=None):
self.batchsize = batchsize
self.mpi_comm = mpi_comm
self.limit = limit
# Cache functions
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.midRes = {}
self.batch_pos = 0 # the starting position of the current mini-batch
self.Y_speedup = False # Replace Y with the cholesky factor of YY.T, but the computation of posterior object will be skipped.
def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled.
return self.batchsize, self.limit, self.Y_speedup
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.batchsize, self.limit, self.Y_speedup = state
self.mpi_comm = None
self.midRes = {}
self.batch_pos = 0
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
def set_limit(self, limit):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y):
return np.sum(np.square(Y))
def _get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LLT = YYT.
Note that L may have fewer columns than Y.
"""
N, D = Y.shape
if (N>=D):
return Y.view(np.ndarray)
else:
return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
het_noise = beta.size > 1
assert beta.size == 1
trYYT = self.get_trYYT(Y)
if self.Y_speedup and not het_noise:
Y = self.get_YYTfactor(Y)
num_inducing = Z.shape[0]
num_data, output_dim = Y.shape
batchsize = num_data if self.batchsize is None else self.batchsize
psi2_full = np.zeros((num_inducing,num_inducing)) # MxM
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0.
YRY_full = 0.
for n_start in xrange(0,num_data,batchsize):
n_end = min(batchsize+n_start, num_data)
if batchsize==num_data:
Y_slice = Y
X_slice = X
else:
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if het_noise:
b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b
else:
b = beta
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2_full += kern.psi2(Z, X_slice)*b
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi2_full += np.dot(psi1.T,psi1)*b
psi0_full += psi0.sum()*b
psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
if not het_noise:
YRY_full = trYYT*beta
if self.mpi_comm != None:
psi0_all = np.array(psi0_full)
psi1Y_all = psi1Y_full.copy()
psi2_all = psi2_full.copy()
YRY_all = np.array(YRY_full)
self.mpi_comm.Allreduce([psi0_full, MPI.DOUBLE], [psi0_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi1Y_full, MPI.DOUBLE], [psi1Y_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
return psi0_all, psi1Y_all, psi2_all, YRY_all
return psi0_full, psi1Y_full, psi2_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y):
"""
The first phase of inference:
Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv,
"""
num_data, output_dim = Y.shape
input_dim = Z.shape[0]
if self.mpi_comm != None:
num_data_all = np.array(num_data,dtype=np.int32)
self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
num_data = num_data_all
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
else:
uncertain_inputs = False
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
if het_noise:
self.batchsize = 1
psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
#======================================================================
# Compute Common Components
#======================================================================
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm, maxtries=100)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
Lambda = np.eye(Kmm.shape[0])+LmInvPsi2LmInvT
LL = jitchol(Lambda, maxtries=100)
logdet_L = 2.*np.sum(np.log(np.diag(LL)))
b = dtrtrs(LL,dtrtrs(Lm,psi1Y_full.T)[0])[0]
bbt = np.square(b).sum()
v = dtrtrs(Lm,dtrtrs(LL,b,trans=1)[0],trans=1)[0]
tmp = -backsub_both_sides(LL, tdot(b)+output_dim*np.eye(input_dim), transpose='left')
dL_dpsi2R = backsub_both_sides(Lm, tmp+output_dim*np.eye(input_dim), transpose='left')/2.
# Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v
#======================================================================
# Compute log-likelihood
#======================================================================
if het_noise:
logL_R = -np.log(beta).sum()
else:
logL_R = -num_data*np.log(beta)
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*logdet_L/2.
#======================================================================
# Compute dL_dKmm
#======================================================================
dL_dKmm = dL_dpsi2R - output_dim*backsub_both_sides(Lm, LmInvPsi2LmInvT, transpose='left')/2.
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)
#======================================================================
if not self.Y_speedup or het_noise:
wd_inv = backsub_both_sides(Lm, np.eye(input_dim)- backsub_both_sides(LL, np.identity(input_dim), transpose='left'), transpose='left')
post = Posterior(woodbury_inv=wd_inv, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
else:
post = None
#======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
if not het_noise:
dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
self.midRes['dL_dthetaL'] = dL_dthetaL
return logL, dL_dKmm, post
def inference_minibatch(self, kern, X, Z, likelihood, Y):
"""
The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd)
"""
num_data, output_dim = Y.shape
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
else:
uncertain_inputs = False
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y)
if self.Y_speedup and not het_noise:
YYT_factor = self.get_YYTfactor(Y)
else:
YYT_factor = Y
n_start = self.batch_pos
batchsize = num_data if self.batchsize is None else self.batchsize
n_end = min(batchsize+n_start, num_data)
if n_end==num_data:
isEnd = True
self.batch_pos = 0
else:
isEnd = False
self.batch_pos = n_end
if batchsize==num_data:
Y_slice = YYT_factor
X_slice =X
else:
Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end]
if not uncertain_inputs:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi2 = None
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
elif het_noise:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
if het_noise:
beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice
#======================================================================
# Load Intermediate Results
#======================================================================
dL_dpsi2R = self.midRes['dL_dpsi2R']
v = self.midRes['v']
#======================================================================
# Compute dL_dpsi
#======================================================================
dL_dpsi0 = -output_dim * (beta * np.ones((n_end-n_start,)))/2.
dL_dpsi1 = np.dot(betaY,v.T)
if uncertain_inputs:
dL_dpsi2 = beta* dL_dpsi2R
else:
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
dL_dpsi2 = None
#======================================================================
# Compute dL_dthetaL
#======================================================================
if het_noise:
if uncertain_inputs:
psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else:
if isEnd:
dL_dthetaL = self.midRes['dL_dthetaL']
else:
dL_dthetaL = 0.
if uncertain_inputs:
grad_dict = {'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'dL_dthetaL':dL_dthetaL}
else:
grad_dict = {'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict
def update_gradients(model, mpi_comm=None):
if mpi_comm == None:
Y = model.Y
X = model.X
else:
Y = model.Y_local
X = model.X[model.N_range[0]:model.N_range[1]]
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
het_noise = model.likelihood.variance.size > 1
if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],))
else:
dL_dthetaL = np.float64(0.)
kern_grad = model.kern.gradient.copy()
kern_grad[:] = 0.
model.Z.gradient = 0.
isEnd = False
while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y)
if isinstance(model.X, VariationalPosterior):
if (n_range[1]-n_range[0])==X.shape[0]:
X_slice = X
elif mpi_comm ==None:
X_slice = model.X[n_range[0]:n_range[1]]
else:
X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
#gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
kern_grad += model.kern.gradient
#gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_Z_expectations(
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
model.set_X_gradients(X_slice, X_grad)
if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else:
dL_dthetaL += grad_dict['dL_dthetaL']
# Gather the gradients from multiple MPI nodes
if mpi_comm != None:
if het_noise:
raise "het_noise not implemented!"
kern_grad_all = kern_grad.copy()
Z_grad_all = model.Z.gradient.copy()
mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
kern_grad = kern_grad_all
model.Z.gradient = Z_grad_all
#gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
model.kern.gradient += kern_grad
#gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
# Update Log-likelihood
KL_div = model.variational_prior.KL_divergence(X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(X)
if mpi_comm != None:
KL_div_all = np.array(KL_div)
mpi_comm.Allreduce([np.float64(KL_div), MPI.DOUBLE], [KL_div_all, MPI.DOUBLE])
KL_div = KL_div_all
[mpi_comm.Allgatherv([pp.copy(), MPI.DOUBLE], [pa, (model.N_list*pa.shape[-1], None), MPI.DOUBLE]) for pp,pa in zip(model.get_X_gradients(X),model.get_X_gradients(model.X))]
# from ...models import SSGPLVM
# if isinstance(model, SSGPLVM):
# grad_pi = np.array(model.variational_prior.pi.gradient)
# mpi_comm.Allreduce([grad_pi.copy(), MPI.DOUBLE], [model.variational_prior.pi.gradient, MPI.DOUBLE])
model._log_marginal_likelihood -= KL_div
# dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL)
def update_gradients_sparsegp(model, mpi_comm=None):
if mpi_comm == None:
Y = model.Y
X = model.X
else:
Y = model.Y_local
X = model.X[model.N_range[0]:model.N_range[1]]
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
het_noise = model.likelihood.variance.size > 1
if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],))
else:
dL_dthetaL = np.float64(0.)
kern_grad = model.kern.gradient.copy()
kern_grad[:] = 0.
model.Z.gradient = 0.
isEnd = False
while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y)
if (n_range[1]-n_range[0])==X.shape[0]:
X_slice = X
elif mpi_comm ==None:
X_slice = model.X[n_range[0]:n_range[1]]
else:
X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
model.kern.update_gradients_diag(grad_dict['dL_dKdiag'], X_slice)
kern_grad += model.kern.gradient
model.kern.update_gradients_full(grad_dict['dL_dKnm'], X_slice, model.Z)
kern_grad += model.kern.gradient
model.Z.gradient += model.kern.gradients_X(grad_dict['dL_dKnm'].T, model.Z, X_slice)
if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else:
dL_dthetaL += grad_dict['dL_dthetaL']
# Gather the gradients from multiple MPI nodes
if mpi_comm != None:
if het_noise:
raise "het_noise not implemented!"
kern_grad_all = kern_grad.copy()
Z_grad_all = model.Z.gradient.copy()
mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
kern_grad = kern_grad_all
model.Z.gradient = Z_grad_all
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
model.kern.gradient += kern_grad
model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
# dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL)

View file

@ -0,0 +1 @@
from hmc import HMC

174
GPy/inference/mcmc/hmc.py Normal file
View file

@ -0,0 +1,174 @@
# ## Copyright (c) 2014, Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
class HMC:
"""
An implementation of Hybrid Monte Carlo (HMC) for GPy models
Initialize an object for HMC sampling. Note that the status of the model (model parameters) will be changed during sampling.
:param model: the GPy model that will be sampled
:type model: GPy.core.Model
:param M: the mass matrix (an identity matrix by default)
:type M: numpy.ndarray
:param stepsize: the step size for HMC sampling
:type stepsize: float
"""
def __init__(self, model, M=None,stepsize=1e-1):
self.model = model
self.stepsize = stepsize
self.p = np.empty_like(model.optimizer_array.copy())
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, num_samples=1000, hmc_iters=20):
"""
Sample the (unfixed) model parameters.
:param num_samples: the number of samples to draw (1000 by default)
:type num_samples: int
:param hmc_iters: the number of leap-frog iterations (20 by default)
:type hmc_iters: int
:return: the list of parameters samples with the size N x P (N - the number of samples, P - the number of parameters to sample)
:rtype: numpy.ndarray
"""
params = np.empty((num_samples,self.p.size))
for i in xrange(num_samples):
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
theta_old = self.model.optimizer_array.copy()
params[i] = self.model.unfixed_param_array
#Matropolis
self._update(hmc_iters)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters):
for i in xrange(hmc_iters):
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + self.stepsize*np.dot(self.Minv, self.p)
self.p[:] += -self.stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.
class HMC_shortcut:
def __init__(self,model,M=None,stepsize_range=[1e-6, 1e-1],groupsize=5, Hstd_th=[1e-5, 3.]):
self.model = model
self.stepsize_range = np.log(stepsize_range)
self.p = np.empty_like(model.optimizer_array.copy())
self.groupsize = groupsize
self.Hstd_th = Hstd_th
if M is None:
self.M = np.eye(self.p.size)
else:
self.M = M
self.Minv = np.linalg.inv(self.M)
def sample(self, m_iters=1000, hmc_iters=20):
params = np.empty((m_iters,self.p.size))
for i in xrange(m_iters):
# sample a stepsize from the uniform distribution
stepsize = np.exp(np.random.rand()*(self.stepsize_range[1]-self.stepsize_range[0])+self.stepsize_range[0])
self.p[:] = np.random.multivariate_normal(np.zeros(self.p.size),self.M)
H_old = self._computeH()
params[i] = self.model.unfixed_param_array
theta_old = self.model.optimizer_array.copy()
#Matropolis
self._update(hmc_iters, stepsize)
H_new = self._computeH()
if H_old>H_new:
k = 1.
else:
k = np.exp(H_old-H_new)
if np.random.rand()<k:
params[i] = self.model.unfixed_param_array
else:
self.model.optimizer_array = theta_old
return params
def _update(self, hmc_iters, stepsize):
theta_buf = np.empty((2*hmc_iters+1,self.model.optimizer_array.size))
p_buf = np.empty((2*hmc_iters+1,self.p.size))
H_buf = np.empty((2*hmc_iters+1,))
# Set initial position
theta_buf[hmc_iters] = self.model.optimizer_array
p_buf[hmc_iters] = self.p
H_buf[hmc_iters] = self._computeH()
reversal = []
pos = 1
i=0
while i<hmc_iters:
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
self.model.optimizer_array = self.model.optimizer_array + stepsize*np.dot(self.Minv, self.p)
self.p[:] += -stepsize/2.*self.model._transform_gradients(self.model.objective_function_gradients())
theta_buf[hmc_iters+pos] = self.model.optimizer_array
p_buf[hmc_iters+pos] = self.p
H_buf[hmc_iters+pos] = self._computeH()
i+=1
if i<self.groupsize:
pos += 1
continue
else:
if len(reversal)==0:
Hlist = range(hmc_iters+pos,hmc_iters+pos-self.groupsize,-1)
if self._testH(H_buf[Hlist]):
pos += 1
else:
# Reverse the trajectory for the 1st time
reversal.append(pos)
if hmc_iters-i>pos:
pos = -1
i += pos
self.model.optimizer_array = theta_buf[hmc_iters]
self.p[:] = -p_buf[hmc_iters]
else:
pos_new = pos-hmc_iters+i
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = -p_buf[hmc_iters+pos_new]
break
else:
Hlist = range(hmc_iters+pos,hmc_iters+pos+self.groupsize)
if self._testH(H_buf[Hlist]):
pos += -1
else:
# Reverse the trajectory for the 2nd time
r = (hmc_iters - i)%((reversal[0]-pos)*2)
if r>(reversal[0]-pos):
pos_new = 2*reversal[0] - r - pos
else:
pos_new = pos + r
self.model.optimizer_array = theta_buf[hmc_iters+pos_new]
self.p[:] = p_buf[hmc_iters+pos_new] # the sign of momentum might be wrong!
break
def _testH(self, Hlist):
Hstd = np.std(Hlist)
if Hstd<self.Hstd_th[0] or Hstd>self.Hstd_th[1]:
return False
else:
return True
def _computeH(self,):
return self.model.objective_function()+self.p.size*np.log(2*np.pi)/2.+np.log(np.linalg.det(self.M))/2.+np.dot(self.p, np.dot(self.Minv,self.p[:,None]))/2.

View file

@ -1,10 +1,9 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # ## Copyright (c) 2014, Zhenwen Dai
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from scipy import linalg, optimize from scipy import linalg, optimize
import pylab as pb
import Tango import Tango
import sys import sys
import re import re
@ -80,6 +79,3 @@ class Metropolis_Hastings:
fs.append(function(*args)) fs.append(function(*args))
self.model._set_params(param)# reset model to starting state self.model._set_params(param)# reset model to starting state
return fs return fs

View file

@ -0,0 +1,2 @@
from scg import SCG
from optimization import *

View file

@ -1,9 +1,7 @@
''' # Copyright (c) 2012-2014, Max Zwiessele
Created on 24 Apr 2013 # Licensed under the BSD 3-clause license (see LICENSE.txt)
@author: maxz from gradient_descent_update_rules import FletcherReeves, \
'''
from GPy.inference.gradient_descent_update_rules import FletcherReeves, \
PolakRibiere PolakRibiere
from Queue import Empty from Queue import Empty
from multiprocessing import Value from multiprocessing import Value

View file

@ -1,8 +1,6 @@
''' # Copyright (c) 2012-2014, Max Zwiessele
Created on 24 Apr 2013 # Licensed under the BSD 3-clause license (see LICENSE.txt)
@author: maxz
'''
import numpy import numpy
class GDUpdateRule(): class GDUpdateRule():

View file

@ -1,7 +1,6 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import pylab as pb
import datetime as dt import datetime as dt
from scipy import optimize from scipy import optimize
from warnings import warn from warnings import warn
@ -57,13 +56,14 @@ class Optimizer():
raise NotImplementedError, "this needs to be implemented to use the optimizer class" raise NotImplementedError, "this needs to be implemented to use the optimizer class"
def plot(self): def plot(self):
if self.trace == None: """
print "No trace present so I can't plot it. Please check that the optimizer actually supplies a trace." See GPy.plotting.matplot_dep.inference_plots
else: """
pb.figure() import sys
pb.plot(self.trace) assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
pb.xlabel('Iteration') from ...plotting.matplot_dep import inference_plots
pb.ylabel('f(x)') inference_plots.plot_optimizer(self)
def __str__(self): def __str__(self):
diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name diagnostics = "Optimizer: \t\t\t\t %s\n" % self.opt_name
@ -118,7 +118,7 @@ class opt_lbfgsb(Optimizer):
assert f_fp != None, "BFGS requires f_fp" assert f_fp != None, "BFGS requires f_fp"
if self.messages: if self.messages:
iprint = 0 iprint = 1
else: else:
iprint = -1 iprint = -1
@ -126,29 +126,18 @@ class opt_lbfgsb(Optimizer):
if self.xtol is not None: if self.xtol is not None:
print "WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it" print "WARNING: l-bfgs-b doesn't have an xtol arg, so I'm going to ignore it"
if self.ftol is not None: if self.ftol is not None:
opt_dict['ftol'] = self.ftol print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
# print "WARNING: l-bfgs-b doesn't have an ftol arg, so I'm going to ignore it"
if self.gtol is not None: if self.gtol is not None:
opt_dict['gtol'] = self.gtol opt_dict['pgtol'] = self.gtol
if self.bfgs_factor is not None: if self.bfgs_factor is not None:
opt_dict['factr'] = self.bfgs_factor opt_dict['factr'] = self.bfgs_factor
opt_dict['iprint'] = iprint
opt_dict['maxiter'] = self.max_iters opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint,
opt_dict['disp'] = self.messages maxfun=self.max_iters, **opt_dict)
#dict(maxiter=self.max_iters, disp=self.messages, iprint=iprint, ftol=self.ftol, gtol=self.gtol) self.x_opt = opt_result[0]
self.f_opt = f_fp(self.x_opt)[0]
opt_result = optimize.minimize(f_fp, self.x_init, method='L-BFGS-B', jac=True, options=opt_dict) self.funct_eval = opt_result[2]['funcalls']
#opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint=iprint, self.status = rcstrings[opt_result[2]['warnflag']]
# maxfun=self.max_iters, **opt_dict)
#self.x_opt = opt_result[0]
#self.f_opt = f_fp(self.x_opt)[0]
#self.funct_eval = opt_result[2]['funcalls']
#self.status = rcstrings[opt_result[2]['warnflag']]
self.x_opt = opt_result.x
self.status = opt_result.success
self.funct_eval = opt_result.nfev
self.f_opt = opt_result.fun
self.opt_result = opt_result
class opt_simplex(Optimizer): class opt_simplex(Optimizer):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
@ -236,13 +225,11 @@ class opt_SCG(Optimizer):
self.status = opt_result[3] self.status = opt_result[3]
def get_optimizer(f_min): def get_optimizer(f_min):
from sgd import opt_SGD
optimizers = {'fmin_tnc': opt_tnc, optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex, 'simplex': opt_simplex,
'lbfgsb': opt_lbfgsb, 'lbfgsb': opt_lbfgsb,
'scg': opt_SCG, 'scg': opt_SCG}
'sgd': opt_SGD}
if rasm_available: if rasm_available:
optimizers['rasmussen'] = opt_rasm optimizers['rasmussen'] = opt_rasm

View file

@ -28,11 +28,11 @@ import sys
def print_out(len_maxiters, fnow, current_grad, beta, iteration): def print_out(len_maxiters, fnow, current_grad, beta, iteration):
print '\r', print '\r',
print '{0:>0{mi}g} {1:> 12e} {2:> 12e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r', print '{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush() sys.stdout.flush()
def exponents(fnow, current_grad): def exponents(fnow, current_grad):
exps = [np.abs(fnow), current_grad] exps = [np.abs(np.float(fnow)), current_grad]
return np.sign(exps) * np.log10(exps).astype(int) return np.sign(exps) * np.log10(exps).astype(int)
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None): def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
@ -56,13 +56,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if gtol is None: if gtol is None:
gtol = 1e-5 gtol = 1e-5
sigma0 = 1.0e-8 sigma0 = 1.0e-7
fold = f(x, *optargs) # Initial function value. fold = f(x, *optargs) # Initial function value.
function_eval = 1 function_eval = 1
fnow = fold fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient. gradnew = gradf(x, *optargs) # Initial gradient.
if any(np.isnan(gradnew)): #if any(np.isnan(gradnew)):
raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value" # raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
current_grad = np.dot(gradnew, gradnew) current_grad = np.dot(gradnew, gradnew)
gradold = gradnew.copy() gradold = gradnew.copy()
d = -gradnew # Initial search direction. d = -gradnew # Initial search direction.
@ -168,13 +168,13 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True,
if Delta < 0.25: if Delta < 0.25:
beta = min(4.0 * beta, betamax) beta = min(4.0 * beta, betamax)
if Delta > 0.75: if Delta > 0.75:
beta = max(0.5 * beta, betamin) beta = max(0.25 * beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start # Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps. # in direction of negative gradient after nparams steps.
if nsuccess == x.size: if nsuccess == x.size:
d = -gradnew d = -gradnew
# beta = 1. # TODO: betareset!! beta = 1. # This is not in the original paper
nsuccess = 0 nsuccess = 0
elif success: elif success:
Gamma = np.dot(gradold - gradnew, gradnew) / (mu) Gamma = np.dot(gradold - gradnew, gradnew) / (mu)

View file

@ -0,0 +1,56 @@
# Copyright (c) 2012-2014, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
class StochasticStorage(object):
'''
This is a container for holding the stochastic parameters,
such as subset indices or step length and so on.
'''
def __init__(self, model):
"""
Initialize this stochastic container using the given model
"""
def do_stochastics(self):
"""
Update the internal state to the next batch of the stochastic
descent algorithm.
"""
pass
def reset(self):
"""
Reset the state of this stochastics generator.
"""
class SparseGPMissing(StochasticStorage):
def __init__(self, model, batchsize=1):
"""
Here we want to loop over all dimensions everytime.
Thus, we can just make sure the loop goes over self.d every
time.
"""
self.d = xrange(model.Y_normalized.shape[1])
class SparseGPStochastics(StochasticStorage):
"""
For the sparse gp we need to store the dimension we are in,
and the indices corresponding to those
"""
def __init__(self, model, batchsize=1):
self.batchsize = batchsize
self.output_dim = model.Y.shape[1]
self.reset()
self.do_stochastics()
def do_stochastics(self):
if self.batchsize == 1:
self.current_dim = (self.current_dim+1)%self.output_dim
self.d = [self.current_dim]
else:
import numpy as np
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
def reset(self):
self.current_dim = -1
self.d = None

View file

@ -1,355 +0,0 @@
import numpy as np
import scipy as sp
import scipy.sparse
from optimization import Optimizer
from scipy import linalg, optimize
import pylab as plt
import copy, sys, pickle
class opt_SGD(Optimizer):
"""
Optimize using stochastic gradient descent.
:param Model: reference to the Model object
:param iterations: number of iterations
:param learning_rate: learning rate
:param momentum: momentum
"""
def __init__(self, start, iterations = 10, learning_rate = 1e-4, momentum = 0.9, model = None, messages = False, batch_size = 1, self_paced = False, center = True, iteration_file = None, learning_rate_adaptation=None, actual_iter=None, schedule=None, **kwargs):
self.opt_name = "Stochastic Gradient Descent"
self.Model = model
self.iterations = iterations
self.momentum = momentum
self.learning_rate = learning_rate
self.x_opt = None
self.f_opt = None
self.messages = messages
self.batch_size = batch_size
self.self_paced = self_paced
self.center = center
self.param_traces = [('noise',[])]
self.iteration_file = iteration_file
self.learning_rate_adaptation = learning_rate_adaptation
self.actual_iter = actual_iter
if self.learning_rate_adaptation != None:
if self.learning_rate_adaptation == 'annealing':
self.learning_rate_0 = self.learning_rate
else:
self.learning_rate_0 = self.learning_rate.mean()
self.schedule = schedule
# if len([p for p in self.model.kern.parts if p.name == 'bias']) == 1:
# self.param_traces.append(('bias',[]))
# if len([p for p in self.model.kern.parts if p.name == 'linear']) == 1:
# self.param_traces.append(('linear',[]))
# if len([p for p in self.model.kern.parts if p.name == 'rbf']) == 1:
# self.param_traces.append(('rbf_var',[]))
self.param_traces = dict(self.param_traces)
self.fopt_trace = []
num_params = len(self.Model._get_params())
if isinstance(self.learning_rate, float):
self.learning_rate = np.ones((num_params,)) * self.learning_rate
assert (len(self.learning_rate) == num_params), "there must be one learning rate per parameter"
def __str__(self):
status = "\nOptimizer: \t\t\t %s\n" % self.opt_name
status += "f(x_opt): \t\t\t %.4f\n" % self.f_opt
status += "Number of iterations: \t\t %d\n" % self.iterations
status += "Learning rate: \t\t\t max %.3f, min %.3f\n" % (self.learning_rate.max(), self.learning_rate.min())
status += "Momentum: \t\t\t %.3f\n" % self.momentum
status += "Batch size: \t\t\t %d\n" % self.batch_size
status += "Time elapsed: \t\t\t %s\n" % self.time
return status
def plot_traces(self):
plt.figure()
plt.subplot(211)
plt.title('Parameters')
for k in self.param_traces.keys():
plt.plot(self.param_traces[k], label=k)
plt.legend(loc=0)
plt.subplot(212)
plt.title('Objective function')
plt.plot(self.fopt_trace)
def non_null_samples(self, data):
return (np.isnan(data).sum(axis=1) == 0)
def check_for_missing(self, data):
if sp.sparse.issparse(self.Model.likelihood.Y):
return True
else:
return np.isnan(data).sum() > 0
def subset_parameter_vector(self, x, samples, param_shapes):
subset = np.array([], dtype = int)
x = np.arange(0, len(x))
i = 0
for s in param_shapes:
N, input_dim = s
X = x[i:i+N*input_dim].reshape(N, input_dim)
X = X[samples]
subset = np.append(subset, X.flatten())
i += N*input_dim
subset = np.append(subset, x[i:])
return subset
def shift_constraints(self, j):
constrained_indices = copy.deepcopy(self.Model.constrained_indices)
for c, constraint in enumerate(constrained_indices):
mask = (np.ones_like(constrained_indices[c]) == 1)
for i in range(len(constrained_indices[c])):
pos = np.where(j == constrained_indices[c][i])[0]
if len(pos) == 1:
self.Model.constrained_indices[c][i] = pos
else:
mask[i] = False
self.Model.constrained_indices[c] = self.Model.constrained_indices[c][mask]
return constrained_indices
# back them up
# bounded_i = copy.deepcopy(self.Model.constrained_bounded_indices)
# bounded_l = copy.deepcopy(self.Model.constrained_bounded_lowers)
# bounded_u = copy.deepcopy(self.Model.constrained_bounded_uppers)
# for b in range(len(bounded_i)): # for each group of constraints
# for bc in range(len(bounded_i[b])):
# pos = np.where(j == bounded_i[b][bc])[0]
# if len(pos) == 1:
# pos2 = np.where(self.Model.constrained_bounded_indices[b] == bounded_i[b][bc])[0][0]
# self.Model.constrained_bounded_indices[b][pos2] = pos[0]
# else:
# if len(self.Model.constrained_bounded_indices[b]) == 1:
# # if it's the last index to be removed
# # the logic here is just a mess. If we remove the last one, then all the
# # b-indices change and we have to iterate through everything to find our
# # current index. Can't deal with this right now.
# raise NotImplementedError
# else: # just remove it from the indices
# mask = self.Model.constrained_bounded_indices[b] != bc
# self.Model.constrained_bounded_indices[b] = self.Model.constrained_bounded_indices[b][mask]
# # here we shif the positive constraints. We cycle through each positive
# # constraint
# positive = self.Model.constrained_positive_indices.copy()
# mask = (np.ones_like(positive) == 1)
# for p in range(len(positive)):
# # we now check whether the constrained index appears in the j vector
# # (the vector of the "active" indices)
# pos = np.where(j == self.Model.constrained_positive_indices[p])[0]
# if len(pos) == 1:
# self.Model.constrained_positive_indices[p] = pos
# else:
# mask[p] = False
# self.Model.constrained_positive_indices = self.Model.constrained_positive_indices[mask]
# return (bounded_i, bounded_l, bounded_u), positive
def restore_constraints(self, c):#b, p):
# self.Model.constrained_bounded_indices = b[0]
# self.Model.constrained_bounded_lowers = b[1]
# self.Model.constrained_bounded_uppers = b[2]
# self.Model.constrained_positive_indices = p
self.Model.constrained_indices = c
def get_param_shapes(self, N = None, input_dim = None):
model_name = self.Model.__class__.__name__
if model_name == 'GPLVM':
return [(N, input_dim)]
if model_name == 'Bayesian_GPLVM':
return [(N, input_dim), (N, input_dim)]
else:
raise NotImplementedError
def step_with_missing_data(self, f_fp, X, step, shapes):
N, input_dim = X.shape
if not sp.sparse.issparse(self.Model.likelihood.Y):
Y = self.Model.likelihood.Y
samples = self.non_null_samples(self.Model.likelihood.Y)
self.Model.N = samples.sum()
Y = Y[samples]
else:
samples = self.Model.likelihood.Y.nonzero()[0]
self.Model.N = len(samples)
Y = np.asarray(self.Model.likelihood.Y[samples].todense(), dtype = np.float64)
if self.Model.N == 0 or Y.std() == 0.0:
return 0, step, self.Model.N
self.Model.likelihood._offset = Y.mean()
self.Model.likelihood._scale = Y.std()
self.Model.likelihood.set_data(Y)
# self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision
sigma = self.Model.likelihood._variance
self.Model.likelihood._variance = None # invalidate cache
self.Model.likelihood._set_params(sigma)
j = self.subset_parameter_vector(self.x_opt, samples, shapes)
self.Model.X = X[samples]
model_name = self.Model.__class__.__name__
if model_name == 'Bayesian_GPLVM':
self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T)
self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT)
ci = self.shift_constraints(j)
f, fp = f_fp(self.x_opt[j])
step[j] = self.momentum * step[j] + self.learning_rate[j] * fp
self.x_opt[j] -= step[j]
self.restore_constraints(ci)
self.Model.grads[j] = fp
# restore likelihood _offset and _scale, otherwise when we call set_data(y) on
# the next feature, it will get normalized with the mean and std of this one.
self.Model.likelihood._offset = 0
self.Model.likelihood._scale = 1
return f, step, self.Model.N
def adapt_learning_rate(self, t, D):
if self.learning_rate_adaptation == 'adagrad':
if t > 0:
g_k = self.Model.grads
self.s_k += np.square(g_k)
t0 = 100.0
self.learning_rate = 0.1/(t0 + np.sqrt(self.s_k))
import pdb; pdb.set_trace()
else:
self.learning_rate = np.zeros_like(self.learning_rate)
self.s_k = np.zeros_like(self.x_opt)
elif self.learning_rate_adaptation == 'annealing':
#self.learning_rate = self.learning_rate_0/(1+float(t+1)/10)
self.learning_rate = np.ones_like(self.learning_rate) * self.schedule[t]
elif self.learning_rate_adaptation == 'semi_pesky':
if self.Model.__class__.__name__ == 'Bayesian_GPLVM':
g_t = self.Model.grads
if t == 0:
self.hbar_t = 0.0
self.tau_t = 100.0
self.gbar_t = 0.0
self.gbar_t = (1-1/self.tau_t)*self.gbar_t + 1/self.tau_t * g_t
self.hbar_t = (1-1/self.tau_t)*self.hbar_t + 1/self.tau_t * np.dot(g_t.T, g_t)
self.learning_rate = np.ones_like(self.learning_rate)*(np.dot(self.gbar_t.T, self.gbar_t) / self.hbar_t)
tau_t = self.tau_t*(1-self.learning_rate) + 1
def opt(self, f_fp=None, f=None, fp=None):
self.x_opt = self.Model._get_params_transformed()
self.grads = []
X, Y = self.Model.X.copy(), self.Model.likelihood.Y.copy()
self.Model.likelihood.YYT = 0
self.Model.likelihood.trYYT = 0
self.Model.likelihood._offset = 0.0
self.Model.likelihood._scale = 1.0
N, input_dim = self.Model.X.shape
D = self.Model.likelihood.Y.shape[1]
num_params = self.Model._get_params()
self.trace = []
missing_data = self.check_for_missing(self.Model.likelihood.Y)
step = np.zeros_like(num_params)
for it in range(self.iterations):
if self.actual_iter != None:
it = self.actual_iter
self.Model.grads = np.zeros_like(self.x_opt) # TODO this is ugly
if it == 0 or self.self_paced is False:
features = np.random.permutation(Y.shape[1])
else:
features = np.argsort(NLL)
b = len(features)/self.batch_size
features = [features[i::b] for i in range(b)]
NLL = []
import pylab as plt
for count, j in enumerate(features):
self.Model.input_dim = len(j)
self.Model.likelihood.input_dim = len(j)
self.Model.likelihood.set_data(Y[:, j])
# self.Model.likelihood.V = self.Model.likelihood.Y*self.Model.likelihood.precision
sigma = self.Model.likelihood._variance
self.Model.likelihood._variance = None # invalidate cache
self.Model.likelihood._set_params(sigma)
if missing_data:
shapes = self.get_param_shapes(N, input_dim)
f, step, Nj = self.step_with_missing_data(f_fp, X, step, shapes)
else:
self.Model.likelihood.YYT = np.dot(self.Model.likelihood.Y, self.Model.likelihood.Y.T)
self.Model.likelihood.trYYT = np.trace(self.Model.likelihood.YYT)
Nj = N
f, fp = f_fp(self.x_opt)
self.Model.grads = fp.copy()
step = self.momentum * step + self.learning_rate * fp
self.x_opt -= step
if self.messages == 2:
noise = self.Model.likelihood._variance
status = "evaluating {feature: 5d}/{tot: 5d} \t f: {f: 2.3f} \t non-missing: {nm: 4d}\t noise: {noise: 2.4f}\r".format(feature = count, tot = len(features), f = f, nm = Nj, noise = noise)
sys.stdout.write(status)
sys.stdout.flush()
self.param_traces['noise'].append(noise)
self.adapt_learning_rate(it+count, D)
NLL.append(f)
self.fopt_trace.append(NLL[-1])
# fig = plt.figure('traces')
# plt.clf()
# plt.plot(self.param_traces['noise'])
# for k in self.param_traces.keys():
# self.param_traces[k].append(self.Model.get(k)[0])
self.grads.append(self.Model.grads.tolist())
# should really be a sum(), but earlier samples in the iteration will have a very crappy ll
self.f_opt = np.mean(NLL)
self.Model.N = N
self.Model.X = X
self.Model.input_dim = D
self.Model.likelihood.N = N
self.Model.likelihood.input_dim = D
self.Model.likelihood.Y = Y
sigma = self.Model.likelihood._variance
self.Model.likelihood._variance = None # invalidate cache
self.Model.likelihood._set_params(sigma)
self.trace.append(self.f_opt)
if self.iteration_file is not None:
f = open(self.iteration_file + "iteration%d.pickle" % it, 'w')
data = [self.x_opt, self.fopt_trace, self.param_traces]
pickle.dump(data, f)
f.close()
if self.messages != 0:
sys.stdout.write('\r' + ' '*len(status)*2 + ' \r')
status = "SGD Iteration: {0: 3d}/{1: 3d} f: {2: 2.3f} max eta: {3: 1.5f}\n".format(it+1, self.iterations, self.f_opt, self.learning_rate.max())
sys.stdout.write(status)
sys.stdout.flush()

2
GPy/installation.cfg Normal file
View file

@ -0,0 +1,2 @@
# This is the local installation configuration file for GPy

View file

@ -1,9 +1,19 @@
# Copyright (c) 2012, 2013 GPy authors (see AUTHORS.txt). from _src.kern import Kern
# Licensed under the BSD 3-clause license (see LICENSE.txt) from _src.rbf import RBF
from _src.linear import Linear, LinearFull
from _src.static import Bias, White
from _src.brownian import Brownian
from _src.stationary import Exponential, OU, Matern32, Matern52, ExpQuad, RatQuad, Cosine
from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs, Hierarchical
from _src.coregionalize import Coregionalize
from _src.ODE_UY import ODE_UY
from _src.ODE_UYC import ODE_UYC
from _src.ODE_st import ODE_st
from _src.ODE_t import ODE_t
from _src.poly import Poly
from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern
from constructors import *
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:
pass
from kern import *

282
GPy/kern/_src/ODE_UY.py Normal file
View file

@ -0,0 +1,282 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_UY(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., active_dims=None, name='ode_uy'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_UY, self).__init__(input_dim, active_dims, name)
self.variance_Y = Param('variance_Y', variance_Y, Logexp())
self.variance_U = Param('variance_U', variance_Y, Logexp())
self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
self.lengthscale_U = Param('lengthscale_U', lengthscale_Y, Logexp())
self.link_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U)
def K(self, X, X2=None):
# model : a * dy/dt + b * y = U
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
#iu=self.input_lengthU #dimention of U
Vu=self.variance_U
Vy=self.variance_Y
#Vy=ly/2
#stop
# kernel for kuu matern3/2
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# kernel for kyy
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
#kyu3 = lambda dist: 0
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
#k1cros = lambda dist:0
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
#k2cros = lambda dist:0
Vyu=np.sqrt(Vy*ly*2)
# cross covariance kuy
kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
elif i==0 and j==1:
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
else:
#K[ss1,ss2]= 0
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
Kdiag = np.zeros(X.shape[0])
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu = self.variance_U
Vy=self.variance_Y
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= self.variance_U
elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu=self.variance_U
Vy=self.variance_Y
Vyu = np.sqrt(Vy*ly*2)
dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy)
dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly)
rd=rdist.shape
dktheta1 = np.zeros(rd)
dktheta2 = np.zeros(rd)
dkUdvar = np.zeros(rd)
dkYdvar = np.zeros(rd)
# dk dtheta for UU
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
UUdtheta2 = lambda dist: 0
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# dk dtheta for YY
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
dk2theta1 = lambda dist: (1.0)*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
#dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 =lambda dist: 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
#dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2)
# kyy kernel
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
#dkdvar = k1+k2+k3
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
# cross covariance kuy
kuyp = lambda dist:(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:(kyu3(-dist)) #t<0 kyu
# dk dtheta for UY
dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\
(lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) )
dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \
np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) )
dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \
lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\
lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\
lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) )
dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\
lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\
lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 )
dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy
dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu
dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu
dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy
dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu
dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = 0
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
dkYdvar[ss1,ss2] = 0
elif i==0 and j==1:
########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) )
elif i==1 and j==1:
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
else:
#######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2]))
#stop
self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu
self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy
self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu
self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly

290
GPy/kern/_src/ODE_UYC.py Normal file
View file

@ -0,0 +1,290 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_UYC(Kern):
def __init__(self, input_dim, variance_U=3., variance_Y=1., lengthscale_U=1., lengthscale_Y=1., ubias =1. ,active_dims=None, name='ode_uyc'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_UYC, self).__init__(input_dim, active_dims, name)
self.variance_Y = Param('variance_Y', variance_Y, Logexp())
self.variance_U = Param('variance_U', variance_U, Logexp())
self.lengthscale_Y = Param('lengthscale_Y', lengthscale_Y, Logexp())
self.lengthscale_U = Param('lengthscale_U', lengthscale_U, Logexp())
self.ubias = Param('ubias', ubias, Logexp())
self.add_parameters(self.variance_Y, self.variance_U, self.lengthscale_Y, self.lengthscale_U, self.ubias)
def K(self, X, X2=None):
# model : a * dy/dt + b * y = U
#lu=sqrt(3)/theta1 ly=1/theta2 theta2= a/b :thetay sigma2=1/(2ab) :sigmay
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
#stop
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
#iu=self.input_lengthU #dimention of U
Vu=self.variance_U
Vy=self.variance_Y
#Vy=ly/2
#stop
# kernel for kuu matern3/2
kuu = lambda dist:Vu * (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist)) +self.ubias
# kernel for kyy
k1 = lambda dist:np.exp(-ly*np.abs(dist))*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist:(np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist:np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
kyy = lambda dist:Vu*Vy*(k1(dist) + k2(dist) + k3(dist))
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
#kyu3 = lambda dist: 0
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
#k1cros = lambda dist:0
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
#k2cros = lambda dist:0
Vyu=np.sqrt(Vy*ly*2)
# cross covariance kuy
kuyp = lambda dist:Vu*Vyu*(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:Vu*Vyu*(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:Vu*Vyu*(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:Vu*Vyu*(kyu3(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
elif i==0 and j==1:
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
else:
#K[ss1,ss2]= 0
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
Kdiag = np.zeros(X.shape[0])
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu = self.variance_U
Vy=self.variance_Y
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= self.variance_U + self.ubias
elif i==1:
Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
#Kdiag[slices[0][0]]+= self.variance_U #matern32 diag
#Kdiag[slices[1][0]]+= self.variance_U*self.variance_Y*(k1+k2+k3) # diag
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
#rdist = X[:,0][:,None] - X2[:,0][:,None].T
rdist = X - X2.T
ly=1/self.lengthscale_Y
lu=np.sqrt(3)/self.lengthscale_U
Vu=self.variance_U
Vy=self.variance_Y
Vyu = np.sqrt(Vy*ly*2)
dVdly = 0.5/np.sqrt(ly)*np.sqrt(2*Vy)
dVdVy = 0.5/np.sqrt(Vy)*np.sqrt(2*ly)
rd=rdist.shape[0]
dktheta1 = np.zeros([rd,rd])
dktheta2 = np.zeros([rd,rd])
dkUdvar = np.zeros([rd,rd])
dkYdvar = np.zeros([rd,rd])
dkdubias = np.zeros([rd,rd])
# dk dtheta for UU
UUdtheta1 = lambda dist: np.exp(-lu* dist)*dist + (-dist)*np.exp(-lu* dist)*(1+lu*dist)
UUdtheta2 = lambda dist: 0
#UUdvar = lambda dist: (1 + lu*dist)*np.exp(-lu*dist)
UUdvar = lambda dist: (1 + lu* np.abs(dist)) * np.exp(-lu * np.abs(dist))
# dk dtheta for YY
dk1theta1 = lambda dist: np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
dk2theta1 = lambda dist: (1.0)*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = lambda dist: np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
#dktheta1 = lambda dist: self.variance_U*self.variance_Y*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = lambda dist: np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 =lambda dist: 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = lambda dist: np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
#dktheta2 = lambda dist: self.variance_U*self.variance_Y*(dk1theta2 + dk2theta2 +dk3theta2)
# kyy kernel
k1 = lambda dist: np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = lambda dist: (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = lambda dist: np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
#dkdvar = k1+k2+k3
# cross covariance function
kyu3 = lambda dist:np.exp(-lu*dist)/(lu+ly)*(1+lu*(dist+1/(lu+ly)))
k1cros = lambda dist:np.exp(ly*dist)/(lu-ly) * ( 1- np.exp( (lu-ly)*dist) + lu* ( dist*np.exp( (lu-ly)*dist ) + (1- np.exp( (lu-ly)*dist ) ) /(lu-ly) ) )
k2cros = lambda dist:np.exp(ly*dist)*( 1/(lu+ly) + lu/(lu+ly)**2 )
# cross covariance kuy
kuyp = lambda dist:(kyu3(dist)) #t>0 kuy
kuyn = lambda dist:(k1cros(dist)+k2cros(dist)) #t<0 kuy
# cross covariance kyu
kyup = lambda dist:(k1cros(-dist)+k2cros(-dist)) #t>0 kyu
kyun = lambda dist:(kyu3(-dist)) #t<0 kyu
# dk dtheta for UY
dkyu3dtheta2 = lambda dist: np.exp(-lu*dist) * ( (-1)*(lu+ly)**(-2)*(1+lu*dist+lu*(lu+ly)**(-1)) + (lu+ly)**(-1)*(-lu)*(lu+ly)**(-2) )
dkyu3dtheta1 = lambda dist: np.exp(-lu*dist)*(lu+ly)**(-1)* ( (-dist)*(1+dist*lu+lu*(lu+ly)**(-1)) -\
(lu+ly)**(-1)*(1+dist*lu+lu*(lu+ly)**(-1)) +dist+(lu+ly)**(-1)-lu*(lu+ly)**(-2) )
dkcros2dtheta1 = lambda dist: np.exp(ly*dist)* ( -(ly+lu)**(-2) + (ly+lu)**(-2) + (-2)*lu*(lu+ly)**(-3) )
dkcros2dtheta2 = lambda dist: np.exp(ly*dist)*dist* ( (ly+lu)**(-1) + lu*(lu+ly)**(-2) ) + \
np.exp(ly*dist)*( -(lu+ly)**(-2) + lu*(-2)*(lu+ly)**(-3) )
dkcros1dtheta1 = lambda dist: np.exp(ly*dist)*( -(lu-ly)**(-2)*( 1-np.exp((lu-ly)*dist) + lu*dist*np.exp((lu-ly)*dist)+ \
lu*(1-np.exp((lu-ly)*dist))/(lu-ly) ) + (lu-ly)**(-1)*( -np.exp( (lu-ly)*dist )*dist + dist*np.exp( (lu-ly)*dist)+\
lu*dist**2*np.exp((lu-ly)*dist)+(1-np.exp((lu-ly)*dist))/(lu-ly) - lu*np.exp((lu-ly)*dist)*dist/(lu-ly) -\
lu*(1-np.exp((lu-ly)*dist))/(lu-ly)**2 ) )
dkcros1dtheta2 = lambda t: np.exp(ly*t)*t/(lu-ly)*( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)+\
lu*(1-np.exp((lu-ly)*t))/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)**2* ( 1-np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t) + lu*( 1-np.exp((lu-ly)*t) )/(lu-ly) )+\
np.exp(ly*t)/(lu-ly)*( np.exp((lu-ly)*t)*t -lu*t*t*np.exp((lu-ly)*t) +lu*t*np.exp((lu-ly)*t)/(lu-ly)+\
lu*( 1-np.exp((lu-ly)*t) )/(lu-ly)**2 )
dkuypdtheta1 = lambda dist:(dkyu3dtheta1(dist)) #t>0 kuy
dkuyndtheta1 = lambda dist:(dkcros1dtheta1(dist)+dkcros2dtheta1(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta1 = lambda dist:(dkcros1dtheta1(-dist)+dkcros2dtheta1(-dist)) #t>0 kyu
dkyundtheta1 = lambda dist:(dkyu3dtheta1(-dist)) #t<0 kyu
dkuypdtheta2 = lambda dist:(dkyu3dtheta2(dist)) #t>0 kuy
dkuyndtheta2 = lambda dist:(dkcros1dtheta2(dist)+dkcros2dtheta2(dist)) #t<0 kuy
# cross covariance kyu
dkyupdtheta2 = lambda dist:(dkcros1dtheta2(-dist)+dkcros2dtheta2(-dist)) #t>0 kyu
dkyundtheta2 = lambda dist:(dkyu3dtheta2(-dist)) #t<0 kyu
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
#target[ss1,ss2] = kuu(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = Vu*UUdtheta1(np.abs(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = 0
dkUdvar[ss1,ss2] = UUdvar(np.abs(rdist[ss1,ss2]))
dkYdvar[ss1,ss2] = 0
dkdubias[ss1,ss2] = 1
elif i==0 and j==1:
########target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kuyp(rdist[ss1,ss2]), Vyu* kuyn(rdist[ss1,ss2]) )
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkuypdtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyp(rdist[ss1,ss2]),Vu*Vyu*dkuyndtheta2(rdist[ss1,ss2])+Vu*dVdly*kuyn(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kuyp(rdist[ss1,ss2]), Vu*dVdVy* kuyn(rdist[ss1,ss2]) )
dkdubias[ss1,ss2] = 0
elif i==1 and j==1:
#target[ss1,ss2] = kyy(np.abs(rdist[ss1,ss2]))
dktheta1[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))+dk3theta1(np.abs(rdist[ss1,ss2])))
dktheta2[ss1,ss2] = self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2])) + dk2theta2(np.abs(rdist[ss1,ss2])) +dk3theta2(np.abs(rdist[ss1,ss2])))
dkUdvar[ss1,ss2] = self.variance_Y*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkYdvar[ss1,ss2] = self.variance_U*(k1(np.abs(rdist[ss1,ss2]))+k2(np.abs(rdist[ss1,ss2]))+k3(np.abs(rdist[ss1,ss2])) )
dkdubias[ss1,ss2] = 0
else:
#######target[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[s1[0],s2[0]]) ) )
#dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta1(np.abs(rdist[ss1,ss2]))+dk2theta1(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta1(np.abs(rdist[ss1,ss2])) )
#dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 ,self.variance_U*self.variance_Y*(dk1theta2(np.abs(rdist[ss1,ss2]))+dk2theta2(np.abs(rdist[ss1,ss2]))) , self.variance_U*self.variance_Y*dkcrtheta2(np.abs(rdist[ss1,ss2])) )
dktheta1[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta1(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta1(rdist[ss1,ss2]) )
dkUdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vyu*kyup(rdist[ss1,ss2]),Vyu*kyun(rdist[ss1,ss2]))
dktheta2[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*Vyu*dkyupdtheta2(rdist[ss1,ss2])+Vu*dVdly*kyup(rdist[ss1,ss2]),Vu*Vyu*dkyundtheta2(rdist[ss1,ss2])+Vu*dVdly*kyun(rdist[ss1,ss2]) )
dkYdvar[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , Vu*dVdVy*kyup(rdist[ss1,ss2]), Vu*dVdVy*kyun(rdist[ss1,ss2]))
dkdubias[ss1,ss2] = 0
#stop
self.variance_U.gradient = np.sum(dkUdvar * dL_dK) # Vu
self.variance_Y.gradient = np.sum(dkYdvar * dL_dK) # Vy
self.lengthscale_U.gradient = np.sum(dktheta1*(-np.sqrt(3)*self.lengthscale_U**(-2))* dL_dK) #lu
self.lengthscale_Y.gradient = np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK) #ly
self.ubias.gradient = np.sum(dkdubias * dL_dK)

267
GPy/kern/_src/ODE_st.py Normal file
View file

@ -0,0 +1,267 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_st(Kern):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
:type lengthscaleY: float
:rtype: kernel object
"""
def __init__(self, input_dim, a=1.,b=1., c=1.,variance_Yx=3.,variance_Yt=1.5, lengthscale_Yx=1.5, lengthscale_Yt=1.5, active_dims=None, name='ode_st'):
assert input_dim ==3, "only defined for 3 input dims"
super(ODE_st, self).__init__(input_dim, active_dims, name)
self.variance_Yt = Param('variance_Yt', variance_Yt, Logexp())
self.variance_Yx = Param('variance_Yx', variance_Yx, Logexp())
self.lengthscale_Yt = Param('lengthscale_Yt', lengthscale_Yt, Logexp())
self.lengthscale_Yx = Param('lengthscale_Yx', lengthscale_Yx, Logexp())
self.a= Param('a', a, Logexp())
self.b = Param('b', b, Logexp())
self.c = Param('c', c, Logexp())
self.add_parameters(self.a, self.b, self.c, self.variance_Yt, self.variance_Yx, self.lengthscale_Yt,self.lengthscale_Yx)
def K(self, X, X2=None):
# model : -a d^2y/dx^2 + b dy/dt + c * y = U
# kernel Kyy rbf spatiol temporal
# vyt Y temporal variance vyx Y spatiol variance lyt Y temporal lengthscale lyx Y spatiol lengthscale
# kernel Kuu doper( doper(Kyy))
# a b c lyt lyx vyx*vyt
"""Compute the covariance matrix between X and X2."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
xdist = (X[:,1][:,None] - X2[:,1][None,:])**2
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
#rdist = [tdist,xdist]
#dist = np.abs(X - X2.T)
vyt = self.variance_Yt
vyx = self.variance_Yx
lyt=1/(2*self.lengthscale_Yt)
lyx=1/(2*self.lengthscale_Yx)
a = self.a ## -a is used in the model, negtive diffusion
b = self.b
c = self.c
kyy = lambda tdist,xdist: np.exp(-lyt*(tdist) -lyx*(xdist))
k1 = lambda tdist: (2*lyt - 4*lyt**2 * (tdist) )
k2 = lambda xdist: ( 4*lyx**2 * (xdist) - 2*lyx )
k3 = lambda xdist: ( 3*4*lyx**2 - 6*8*xdist*lyx**3 + 16*xdist**2*lyx**4 )
k4 = lambda ttdist: 2*lyt*(ttdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
elif i==0 and j==1:
K[ss1,ss2] = (-a*k2(xdist[ss1,ss2]) + b*k4(ttdist[ss1,ss2]) + c)*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(np.abs(rdist[ss1,ss2])), kuyn(np.abs(rdist[ss1,ss2]) ) )
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kuyp(rdist[ss1,ss2]), kuyn(rdist[ss1,ss2] ) )
elif i==1 and j==1:
K[ss1,ss2] = ( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 )* vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
else:
K[ss1,ss2] = (-a*k2(xdist[ss1,ss2]) - b*k4(ttdist[ss1,ss2]) + c)*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#K[ss1,ss2]= np.where( rdist[ss1,ss2]>0 , kyup(np.abs(rdist[ss1,ss2])), kyun(np.abs(rdist[ss1,ss2]) ) )
#K[ss1,ss2] = np.where( rdist[ss1,ss2]>0 , kyup(rdist[ss1,ss2]), kyun(rdist[ss1,ss2] ) )
#stop
return K
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix associated to X."""
vyt = self.variance_Yt
vyx = self.variance_Yx
lyt = 1./(2*self.lengthscale_Yt)
lyx = 1./(2*self.lengthscale_Yx)
a = self.a
b = self.b
c = self.c
## dk^2/dtdt'
k1 = (2*lyt )*vyt*vyx
## dk^2/dx^2
k2 = ( - 2*lyx )*vyt*vyx
## dk^4/dx^2dx'^2
k3 = ( 4*3*lyx**2 )*vyt*vyx
Kdiag = np.zeros(X.shape[0])
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= vyt*vyx
elif i==1:
#i=1
Kdiag[s1]+= b**2*k1 - 2*a*c*k2 + a**2*k3 + c**2*vyt*vyx
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
#def dK_dtheta(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
vyt = self.variance_Yt
vyx = self.variance_Yx
lyt = 1./(2*self.lengthscale_Yt)
lyx = 1./(2*self.lengthscale_Yx)
a = self.a
b = self.b
c = self.c
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
xdist = (X[:,1][:,None] - X2[:,1][None,:])**2
#rdist = [tdist,xdist]
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
rd=tdist.shape[0]
dka = np.zeros([rd,rd])
dkb = np.zeros([rd,rd])
dkc = np.zeros([rd,rd])
dkYdvart = np.zeros([rd,rd])
dkYdvarx = np.zeros([rd,rd])
dkYdlent = np.zeros([rd,rd])
dkYdlenx = np.zeros([rd,rd])
kyy = lambda tdist,xdist: np.exp(-lyt*(tdist) -lyx*(xdist))
#k1 = lambda tdist: (lyt - lyt**2 * (tdist) )
#k2 = lambda xdist: ( lyx**2 * (xdist) - lyx )
#k3 = lambda xdist: ( 3*lyx**2 - 6*xdist*lyx**3 + xdist**2*lyx**4 )
#k4 = lambda tdist: -lyt*np.sqrt(tdist)
k1 = lambda tdist: (2*lyt - 4*lyt**2 * (tdist) )
k2 = lambda xdist: ( 4*lyx**2 * (xdist) - 2*lyx )
k3 = lambda xdist: ( 3*4*lyx**2 - 6*8*xdist*lyx**3 + 16*xdist**2*lyx**4 )
k4 = lambda ttdist: 2*lyt*(ttdist)
dkyydlyx = lambda tdist,xdist: kyy(tdist,xdist)*(-xdist)
dkyydlyt = lambda tdist,xdist: kyy(tdist,xdist)*(-tdist)
dk1dlyt = lambda tdist: 2. - 4*2.*lyt*tdist
dk2dlyx = lambda xdist: (4.*2.*lyx*xdist -2.)
dk3dlyx = lambda xdist: (6.*4.*lyx - 18.*8*xdist*lyx**2 + 4*16*xdist**2*lyx**3)
dk4dlyt = lambda ttdist: 2*(ttdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
dka[ss1,ss2] = 0
dkb[ss1,ss2] = 0
dkc[ss1,ss2] = 0
dkYdvart[ss1,ss2] = vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])
elif i==0 and j==1:
dka[ss1,ss2] = -k2(xdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkb[ss1,ss2] = k4(ttdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkc[ss1,ss2] = vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#dkYdvart[ss1,ss2] = 0
#dkYdvarx[ss1,ss2] = 0
#dkYdlent[ss1,ss2] = 0
#dkYdlenx[ss1,ss2] = 0
dkYdvart[ss1,ss2] = (-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = (-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])* (-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*b*dk4dlyt(ttdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*k2(xdist[ss1,ss2])+b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*dk2dlyx(xdist[ss1,ss2]))
elif i==1 and j==1:
dka[ss1,ss2] = (2*a*k3(xdist[ss1,ss2]) - 2*c*k2(xdist[ss1,ss2]))*vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkb[ss1,ss2] = 2*b*k1(tdist[ss1,ss2])*vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkc[ss1,ss2] = (-2*a*k2(xdist[ss1,ss2]) + 2*c )*vyt*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvart[ss1,ss2] = ( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 )*vyx* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = ( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 )*vyt* kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])*( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 ) +\
vyx*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*b**2*dk1dlyt(tdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])*( b**2*k1(tdist[ss1,ss2]) - 2*a*c*k2(xdist[ss1,ss2]) + a**2*k3(xdist[ss1,ss2]) + c**2 ) +\
vyx*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])* (-2*a*c*dk2dlyx(xdist[ss1,ss2]) + a**2*dk3dlyx(xdist[ss1,ss2]) )
else:
dka[ss1,ss2] = -k2(xdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkb[ss1,ss2] = -k4(ttdist[ss1,ss2])*vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkc[ss1,ss2] = vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
#dkYdvart[ss1,ss2] = 0
#dkYdvarx[ss1,ss2] = 0
#dkYdlent[ss1,ss2] = 0
#dkYdlenx[ss1,ss2] = 0
dkYdvart[ss1,ss2] = (-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdvarx[ss1,ss2] = (-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)*vyt*kyy(tdist[ss1,ss2],xdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*vyx*dkyydlyt(tdist[ss1,ss2],xdist[ss1,ss2])* (-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*(-1)*b*dk4dlyt(ttdist[ss1,ss2])
dkYdlenx[ss1,ss2] = vyt*vyx*dkyydlyx(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*k2(xdist[ss1,ss2])-b*k4(ttdist[ss1,ss2])+c)+\
vyt*vyx*kyy(tdist[ss1,ss2],xdist[ss1,ss2])*(-a*dk2dlyx(xdist[ss1,ss2]))
self.a.gradient = np.sum(dka * dL_dK)
self.b.gradient = np.sum(dkb * dL_dK)
self.c.gradient = np.sum(dkc * dL_dK)
self.variance_Yt.gradient = np.sum(dkYdvart * dL_dK) # Vy
self.variance_Yx.gradient = np.sum(dkYdvarx * dL_dK)
self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK) #ly np.sum(dktheta2*(-self.lengthscale_Y**(-2)) * dL_dK)
self.lengthscale_Yx.gradient = np.sum(dkYdlenx*(-0.5*self.lengthscale_Yx**(-2)) * dL_dK)

165
GPy/kern/_src/ODE_t.py Normal file
View file

@ -0,0 +1,165 @@
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
from independent_outputs import index_to_slices
class ODE_t(Kern):
def __init__(self, input_dim, a=1., c=1.,variance_Yt=3., lengthscale_Yt=1.5,ubias =1., active_dims=None, name='ode_st'):
assert input_dim ==2, "only defined for 2 input dims"
super(ODE_t, self).__init__(input_dim, active_dims, name)
self.variance_Yt = Param('variance_Yt', variance_Yt, Logexp())
self.lengthscale_Yt = Param('lengthscale_Yt', lengthscale_Yt, Logexp())
self.a= Param('a', a, Logexp())
self.c = Param('c', c, Logexp())
self.ubias = Param('ubias', ubias, Logexp())
self.add_parameters(self.a, self.c, self.variance_Yt, self.lengthscale_Yt,self.ubias)
def K(self, X, X2=None):
"""Compute the covariance matrix between X and X2."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
K = np.zeros((X.shape[0], X2.shape[0]))
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
vyt = self.variance_Yt
lyt=1/(2*self.lengthscale_Yt)
a = -self.a
c = self.c
kyy = lambda tdist: np.exp(-lyt*(tdist))
k1 = lambda tdist: (2*lyt - 4*lyt**2 *(tdist) )
k4 = lambda tdist: 2*lyt*(tdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
K[ss1,ss2] = vyt*kyy(tdist[ss1,ss2])
elif i==0 and j==1:
K[ss1,ss2] = (k4(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
#K[ss1,ss2] = (2*lyt*(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
elif i==1 and j==1:
K[ss1,ss2] = ( k1(tdist[ss1,ss2]) + 1. )*vyt* kyy(tdist[ss1,ss2])+self.ubias
else:
K[ss1,ss2] = (-k4(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
#K[ss1,ss2] = (-2*lyt*(ttdist[ss1,ss2])+1)*vyt*kyy(tdist[ss1,ss2])
#stop
return K
def Kdiag(self, X):
vyt = self.variance_Yt
lyt = 1./(2*self.lengthscale_Yt)
a = -self.a
c = self.c
k1 = (2*lyt )*vyt
Kdiag = np.zeros(X.shape[0])
slices = index_to_slices(X[:,-1])
for i, ss1 in enumerate(slices):
for s1 in ss1:
if i==0:
Kdiag[s1]+= vyt
elif i==1:
#i=1
Kdiag[s1]+= k1 + vyt+self.ubias
#Kdiag[s1]+= Vu*Vy*(k1+k2+k3)
else:
raise ValueError, "invalid input/output index"
return Kdiag
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters."""
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
K = np.zeros((X.shape[0], X.shape[0]))
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
vyt = self.variance_Yt
lyt = 1./(2*self.lengthscale_Yt)
tdist = (X[:,0][:,None] - X2[:,0][None,:])**2
ttdist = (X[:,0][:,None] - X2[:,0][None,:])
#rdist = [tdist,xdist]
rd=tdist.shape[0]
dka = np.zeros([rd,rd])
dkc = np.zeros([rd,rd])
dkYdvart = np.zeros([rd,rd])
dkYdlent = np.zeros([rd,rd])
dkdubias = np.zeros([rd,rd])
kyy = lambda tdist: np.exp(-lyt*(tdist))
dkyydlyt = lambda tdist: kyy(tdist)*(-tdist)
k1 = lambda tdist: (2*lyt - 4*lyt**2 * (tdist) )
k4 = lambda ttdist: 2*lyt*(ttdist)
dk1dlyt = lambda tdist: 2. - 4*2.*lyt*tdist
dk4dlyt = lambda ttdist: 2*(ttdist)
for i, s1 in enumerate(slices):
for j, s2 in enumerate(slices2):
for ss1 in s1:
for ss2 in s2:
if i==0 and j==0:
dkYdvart[ss1,ss2] = kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])
dkdubias[ss1,ss2] = 0
elif i==0 and j==1:
dkYdvart[ss1,ss2] = (k4(ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
#dkYdvart[ss1,ss2] = ((2*lyt*ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (k4(ttdist[ss1,ss2])+1.)+\
vyt*kyy(tdist[ss1,ss2])*(dk4dlyt(ttdist[ss1,ss2]))
#dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (2*lyt*(ttdist[ss1,ss2])+1.)+\
#vyt*kyy(tdist[ss1,ss2])*(2*ttdist[ss1,ss2])
dkdubias[ss1,ss2] = 0
elif i==1 and j==1:
dkYdvart[ss1,ss2] = (k1(tdist[ss1,ss2]) + 1. )* kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])*( k1(tdist[ss1,ss2]) + 1. ) +\
vyt*kyy(tdist[ss1,ss2])*dk1dlyt(tdist[ss1,ss2])
dkdubias[ss1,ss2] = 1
else:
dkYdvart[ss1,ss2] = (-k4(ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
#dkYdvart[ss1,ss2] = (-2*lyt*(ttdist[ss1,ss2])+1)*kyy(tdist[ss1,ss2])
dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (-k4(ttdist[ss1,ss2])+1.)+\
vyt*kyy(tdist[ss1,ss2])*(-dk4dlyt(ttdist[ss1,ss2]) )
dkdubias[ss1,ss2] = 0
#dkYdlent[ss1,ss2] = vyt*dkyydlyt(tdist[ss1,ss2])* (-2*lyt*(ttdist[ss1,ss2])+1.)+\
#vyt*kyy(tdist[ss1,ss2])*(-2)*(ttdist[ss1,ss2])
self.variance_Yt.gradient = np.sum(dkYdvart * dL_dK)
self.lengthscale_Yt.gradient = np.sum(dkYdlent*(-0.5*self.lengthscale_Yt**(-2)) * dL_dK)
self.ubias.gradient = np.sum(dkdubias * dL_dK)

View file

188
GPy/kern/_src/add.py Normal file
View file

@ -0,0 +1,188 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import itertools
from ...util.caching import Cache_this
from kern import CombinationKernel
class Add(CombinationKernel):
"""
Add given list of kernels together.
propagates gradients through.
This kernel will take over the active dims of it's subkernels passed in.
"""
def __init__(self, subkerns, name='add'):
for i, kern in enumerate(subkerns[:]):
if isinstance(kern, Add):
del subkerns[i]
for part in kern.parts[::-1]:
kern.unlink_parameter(part)
subkerns.insert(i, part)
super(Add, self).__init__(subkerns, name)
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Add all kernels together.
If a list of parts (of this kernel!) `which_parts` is given, only
the parts of the list are taken to compute the covariance.
"""
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts if not p.is_fixed]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from static import White, Bias
from rbf import RBF
#from rbf_inv import RBFInv
from linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p1.variance * (tmp[:,None]+tmp[None,:]) #(tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p2.variance * (tmp[:,None]+tmp[None,:]) #(tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += np.einsum('nm,no->mo',tmp1,tmp2)+np.einsum('nm,no->mo',tmp2,tmp1)
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_psi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(dL_psi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target_grads = [np.zeros(v.shape) for v in variational_posterior.parameters]
for p1 in self.parameters:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parameters:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(0) * p2.psi1(Z, variational_posterior) * 2.
grads = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
[np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))]
return target_grads
def add(self, other):
if isinstance(other, Add):
other_params = other.parameters[:]
for p in other_params:
other.unlink_parameter(p)
self.link_parameters(*other_params)
else:
self.link_parameter(other)
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
return self
def input_sensitivity(self, summarize=True):
if summarize:
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts])
else:
i_s = np.zeros((len(self.parts), self.input_dim))
from operator import setitem
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
return i_s

50
GPy/kern/_src/brownian.py Normal file
View file

@ -0,0 +1,50 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
class Brownian(Kern):
"""
Brownian motion in 1D only.
Negative times are treated as a separate (backwards!) Brownian motion.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance:
:type variance: float
"""
def __init__(self, input_dim=1, variance=1., active_dims=None, name='Brownian'):
assert input_dim==1, "Brownian motion in 1D only"
super(Brownian, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.link_parameters(self.variance)
def K(self,X,X2=None):
if X2 is None:
X2 = X
return self.variance*np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.)
def Kdiag(self,X):
return self.variance*np.abs(X.flatten())
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
self.variance.gradient = np.sum(dL_dK * np.where(np.sign(X)==np.sign(X2.T),np.fmin(np.abs(X),np.abs(X2.T)), 0.))
#def update_gradients_diag(self, dL_dKdiag, X):
#self.variance.gradient = np.dot(np.abs(X.flatten()), dL_dKdiag)
#def gradients_X(self, dL_dK, X, X2=None):
#if X2 is None:
#return np.sum(self.variance*dL_dK*np.abs(X),1)[:,None]
#else:
#return np.sum(np.where(np.logical_and(np.abs(X)<np.abs(X2.T), np.sign(X)==np.sign(X2)), self.variance*dL_dK,0.),1)[:,None]

View file

@ -0,0 +1,174 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
from scipy import weave
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.config import config # for assesing whether to use weave
class Coregionalize(Kern):
"""
Covariance function for intrinsic/linear coregionalization models
This covariance has the form:
.. math::
\mathbf{B} = \mathbf{W}\mathbf{W}^\top + \text{diag}(kappa)
An intrinsic/linear coregionalization covariance function of the form:
.. math::
k_2(x, y)=\mathbf{B} k(x, y)
it is obtained as the tensor product between a covariance function
k(x, y) and B.
:param output_dim: number of outputs to coregionalize
:type output_dim: int
:param rank: number of columns of the W matrix (this parameter is ignored if parameter W is not None)
:type rank: int
:param W: a low rank matrix that determines the correlations between the different outputs, together with kappa it forms the coregionalization matrix B
:type W: numpy array of dimensionality (num_outpus, W_columns)
:param kappa: a vector which allows the outputs to behave independently
:type kappa: numpy array of dimensionality (output_dim, )
.. note: see coregionalization examples in GPy.examples.regression for some usage.
"""
def __init__(self, input_dim, output_dim, rank=1, W=None, kappa=None, active_dims=None, name='coregion'):
super(Coregionalize, self).__init__(input_dim, active_dims, name=name)
self.output_dim = output_dim
self.rank = rank
if self.rank>output_dim:
print("Warning: Unusual choice of rank, it should normally be less than the output_dim.")
if W is None:
W = 0.5*np.random.randn(self.output_dim, self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.output_dim, self.rank)
self.W = Param('W', W)
if kappa is None:
kappa = 0.5*np.ones(self.output_dim)
else:
assert kappa.shape==(self.output_dim, )
self.kappa = Param('kappa', kappa, Logexp())
self.link_parameters(self.W, self.kappa)
def parameters_changed(self):
self.B = np.dot(self.W, self.W.T) + np.diag(self.kappa)
def K(self, X, X2=None):
if config.getboolean('weave', 'working'):
try:
return self._K_weave(X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self._K_numpy(X, X2)
else:
return self._K_numpy(X, X2)
def _K_numpy(self, X, X2=None):
index = np.asarray(X, dtype=np.int)
if X2 is None:
return self.B[index,index.T]
else:
index2 = np.asarray(X2, dtype=np.int)
return self.B[index,index2.T]
def _K_weave(self, X, X2=None):
"""compute the kernel function using scipy.weave"""
index = np.asarray(X, dtype=np.int)
if X2 is None:
target = np.empty((X.shape[0], X.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<N; i++){
target[i+i*N] = B[index[i]+output_dim*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] = B[index[i]+output_dim*index[j]];
target[i+j*N] = target[j+i*N];
}
}
"""
N, B, output_dim = index.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'N', 'B', 'output_dim'])
else:
index2 = np.asarray(X2, dtype=np.int)
target = np.empty((X.shape[0], X2.shape[0]), dtype=np.float64)
code="""
for(int i=0;i<num_inducing; i++){
for(int j=0; j<N; j++){
target[i+j*num_inducing] = B[output_dim*index[j]+index2[i]];
}
}
"""
N, num_inducing, B, output_dim = index.size, index2.size, self.B, self.output_dim
weave.inline(code, ['target', 'index', 'index2', 'N', 'num_inducing', 'B', 'output_dim'])
return target
def Kdiag(self, X):
return np.diag(self.B)[np.asarray(X, dtype=np.int).flatten()]
def update_gradients_full(self, dL_dK, X, X2=None):
index = np.asarray(X, dtype=np.int)
if X2 is None:
index2 = index
else:
index2 = np.asarray(X2, dtype=np.int)
#attempt to use weave for a nasty double indexing loop: fall back to numpy
if config.getboolean('weave', 'working'):
try:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
else:
dL_dK_small = self._gradient_reduce_weave(dL_dK, index, index2)
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:, None, :]*dL_dK_small[:, :, None]).sum(0)
self.W.gradient = dW
self.kappa.gradient = dkappa
def _gradient_reduce_weave(self, dL_dK, index, index2):
dL_dK_small = np.zeros_like(self.B)
code="""
for(int i=0; i<num_inducing; i++){
for(int j=0; j<N; j++){
dL_dK_small[index[j] + output_dim*index2[i]] += dL_dK[i+j*num_inducing];
}
}
"""
N, num_inducing, output_dim = index.size, index2.size, self.output_dim
weave.inline(code, ['N', 'num_inducing', 'output_dim', 'dL_dK', 'dL_dK_small', 'index', 'index2'])
return dL_dK_small
def _gradient_reduce_numpy(self, dL_dK, index, index2):
index, index2 = index[:,0], index2[:,0]
dL_dK_small = np.zeros_like(self.B)
for i in range(k.output_dim):
tmp1 = dL_dK[index==i]
for j in range(k.output_dim):
dL_dK_small[j,i] = tmp1[:,index2==j].sum()
return dL_dK_small
def update_gradients_diag(self, dL_dKdiag, X):
index = np.asarray(X, dtype=np.int).flatten()
dL_dKdiag_small = np.array([dL_dKdiag[index==i].sum() for i in xrange(self.output_dim)])
self.W.gradient = 2.*self.W*dL_dKdiag_small[:, None]
self.kappa.gradient = dL_dKdiag_small
def gradients_X(self, dL_dK, X, X2=None):
return np.zeros(X.shape)
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)

View file

@ -0,0 +1,202 @@
# Copyright (c) 2012, James Hesnsman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern, CombinationKernel
import numpy as np
import itertools
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
if len(index)==0:
return[]
#contruct the return structure
ind = np.asarray(index,dtype=np.int)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class IndependentOutputs(CombinationKernel):
"""
A kernel which can represent several independent functions. this kernel
'switches off' parts of the matrix where the output indexes are different.
The index of the functions is given by the last column in the input X the
rest of the columns of X are passed to the underlying kernel for
computation (in blocks).
:param kernels: either a kernel, or list of kernels to work with. If it is
a list of kernels the indices in the index_dim, index the kernels you gave!
"""
def __init__(self, kernels, index_dim=-1, name='independ'):
assert isinstance(index_dim, int), "IndependentOutputs kernel is only defined with one input dimension being the index"
if not isinstance(kernels, list):
self.single_kern = True
self.kern = kernels
kernels = [kernels]
else:
self.single_kern = False
self.kern = kernels
super(IndependentOutputs, self).__init__(kernels=kernels, extra_dims=[index_dim], name=name)
self.index_dim = index_dim
def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
[[target.__setitem__((s,ss), kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for kern, slices_i in zip(kerns, slices)]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
target = np.zeros((X.shape[0], X2.shape[0]))
[[target.__setitem__((s,s2), kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
return target
def Kdiag(self,X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape[0])
[[np.copyto(target[s], kern.Kdiag(X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target
def update_gradients_full(self,dL_dK,X,X2=None):
slices = index_to_slices(X[:,self.index_dim])
if self.single_kern:
target = np.zeros(self.kern.size)
kerns = itertools.repeat(self.kern)
else:
kerns = self.kern
target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
def collate_grads(kern, i, dL, X, X2):
kern.update_gradients_full(dL,X,X2)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
if X2 is None:
[[collate_grads(kern, i, dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for i,(kern,slices_i) in enumerate(zip(kerns,slices))]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
[[[collate_grads(kern, i, dL_dK[s,s2],X[s],X2[s2]) for s in slices_i] for s2 in slices_j] for i,(kern,slices_i,slices_j) in enumerate(zip(kerns,slices,slices2))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
def gradients_X(self,dL_dK, X, X2=None):
target = np.zeros(X.shape)
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if X2 is None:
# TODO: make use of index_to_slices
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s,s],X[s],None))
for kern, s in zip(kerns, slices)]
#slices = index_to_slices(X[:,self.index_dim])
#[[np.add(target[s], kern.gradients_X(dL_dK[s,s], X[s]), out=target[s])
# for s in slices_i] for kern, slices_i in zip(kerns, slices)]
#import ipdb;ipdb.set_trace()
#[[(np.add(target[s ], kern.gradients_X(dL_dK[s ,ss],X[s ], X[ss]), out=target[s ]),
# np.add(target[ss], kern.gradients_X(dL_dK[ss,s ],X[ss], X[s ]), out=target[ss]))
# for s, ss in itertools.combinations(slices_i, 2)] for kern, slices_i in zip(kerns, slices)]
else:
values = np.unique(X[:,self.index_dim])
slices = [X[:,self.index_dim]==i for i in values]
slices2 = [X2[:,self.index_dim]==i for i in values]
[target.__setitem__(s, kern.gradients_X(dL_dK[s, :][:, s2],X[s],X2[s2]))
for kern, s, s2 in zip(kerns, slices, slices2)]
# TODO: make work with index_to_slices
#slices = index_to_slices(X[:,self.index_dim])
#slices2 = index_to_slices(X2[:,self.index_dim])
#[[target.__setitem__(s, target[s] + kern.gradients_X(dL_dK[s,s2], X[s], X2[s2])) for s, s2 in itertools.product(slices_i, slices_j)] for kern, slices_i,slices_j in zip(kerns, slices,slices2)]
return target
def gradients_X_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
target = np.zeros(X.shape)
[[target.__setitem__(s, kern.gradients_X_diag(dL_dKdiag[s],X[s])) for s in slices_i] for kern, slices_i in zip(kerns, slices)]
return target
def update_gradients_diag(self, dL_dKdiag, X):
slices = index_to_slices(X[:,self.index_dim])
kerns = itertools.repeat(self.kern) if self.single_kern else self.kern
if self.single_kern: target = np.zeros(self.kern.size)
else: target = [np.zeros(kern.size) for kern, _ in zip(kerns, slices)]
def collate_grads(kern, i, dL, X):
kern.update_gradients_diag(dL,X)
if self.single_kern: target[:] += kern.gradient
else: target[i][:] += kern.gradient
[[collate_grads(kern, i, dL_dKdiag[s], X[s,:]) for s in slices_i] for i, (kern, slices_i) in enumerate(zip(kerns, slices))]
if self.single_kern: kern.gradient = target
else:[kern.gradient.__setitem__(Ellipsis, target[i]) for i, [kern, _] in enumerate(zip(kerns, slices))]
class Hierarchical(CombinationKernel):
"""
A kernel which can represent a simple hierarchical model.
See Hensman et al 2013, "Hierarchical Bayesian modelling of gene expression time
series across irregularly sampled replicates and clusters"
http://www.biomedcentral.com/1471-2105/14/252
To construct this kernel, you must pass a list of kernels. the first kernel
will be assumed to be the 'base' kernel, and will be computed everywhere.
For every additional kernel, we assume another layer in the hierachy, with
a corresponding column of the input matrix which indexes which function the
data are in at that level.
For more, see the ipython notebook documentation on Hierarchical
covariances.
"""
def __init__(self, kernels, name='hierarchy'):
assert all([k.input_dim==kernels[0].input_dim for k in kernels])
assert len(kernels) > 1
self.levels = len(kernels) -1
input_max = max([k.input_dim for k in kernels])
super(Hierarchical, self).__init__(kernels=kernels, extra_dims = range(input_max, input_max + len(kernels)-1), name=name)
def K(self,X ,X2=None):
K = self.parts[0].K(X, X2) # compute 'base' kern everywhere
slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
if X2 is None:
[[[np.add(K[s,s], k.K(X[s], None), K[s, s]) for s in slices_i] for slices_i in slices_k] for k, slices_k in zip(self.parts[1:], slices)]
else:
slices2 = [index_to_slices(X2[:,i]) for i in self.extra_dims]
[[[np.add(K[s,ss], k.K(X[s], X2[ss]), K[s, ss]) for s,ss in zip(slices_i, slices_j)] for slices_i, slices_j in zip(slices_k1, slices_k2)] for k, slices_k1, slices_k2 in zip(self.parts[1:], slices, slices2)]
return K
def Kdiag(self,X):
return np.diag(self.K(X))
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def update_gradients_full(self,dL_dK,X,X2=None):
slices = [index_to_slices(X[:,i]) for i in self.extra_dims]
if X2 is None:
self.parts[0].update_gradients_full(dL_dK, X, None)
for k, slices_k in zip(self.parts[1:], slices):
target = np.zeros(k.size)
def collate_grads(dL, X, X2, target):
k.update_gradients_full(dL,X,X2)
target += k.gradient
[[collate_grads(dL_dK[s,s], X[s], None, target) for s in slices_i] for slices_i in slices_k]
k.gradient[:] = target
else:
raise NotImplementedError

280
GPy/kern/_src/kern.py Normal file
View file

@ -0,0 +1,280 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import sys
import numpy as np
from ...core.parameterization.parameterized import Parameterized
from kernel_slice_operations import KernCallsViaSlicerMeta
from ...util.caching import Cache_this
from GPy.core.parameterization.observable_array import ObsAr
class Kern(Parameterized):
#===========================================================================
# This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta
#===========================================================================
_support_GPU=False
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
"""
The base class for a kernel: a positive definite function
which forms of a covariance function (kernel).
input_dim:
is the number of dimensions to work on. Make sure to give the
tight dimensionality of inputs.
You most likely want this to be the integer telling the number of
input dimensions of the kernel.
If this is not an integer (!) we will work on the whole input matrix X,
and not check whether dimensions match or not (!).
active_dims:
is the active_dimensions of inputs X we will work on.
All kernels will get sliced Xes as inputs, if active_dims is not None
Only positive integers are allowed in active_dims!
if active_dims is None, slicing is switched off and all X will be passed through as given.
:param int input_dim: the number of input dimensions to the function
:param array-like|None active_dims: list of indices on which dimensions this kernel works on, or none if no slicing
Do not instantiate.
"""
super(Kern, self).__init__(name=name, *a, **kw)
self.input_dim = int(input_dim)
if active_dims is None:
active_dims = np.arange(input_dim)
self.active_dims = np.atleast_1d(active_dims).astype(int)
assert self.active_dims.size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, self.active_dims.size, self.active_dims)
self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
self._return_psi2_n_flag = ObsAr(np.zeros(1)).astype(bool)
@property
def return_psi2_n(self):
"""
Flag whether to pass back psi2 as NxMxM or MxM, by summing out N.
"""
return self._return_psi2_n_flag[0]
@return_psi2_n.setter
def return_psi2_n(self, val):
def visit(self):
if isinstance(self, Kern):
self._return_psi2_n_flag[0]=val
self.traverse(visit)
@Cache_this(limit=20)
def _slice_X(self, X):
return X[:, self.active_dims]
def K(self, X, X2):
"""
Compute the kernel function.
:param X: the first set of inputs to the kernel
:param X2: (optional) the second set of arguments to the kernel. If X2
is None, this is passed throgh to the 'part' object, which
handLes this as X2 == X.
"""
raise NotImplementedError
def Kdiag(self, X):
raise NotImplementedError
def psi0(self, Z, variational_posterior):
raise NotImplementedError
def psi1(self, Z, variational_posterior):
raise NotImplementedError
def psi2(self, Z, variational_posterior):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError
def update_gradients_diag(self, dL_dKdiag, X):
""" update the gradients of all parameters when using only the diagonal elements of the covariance matrix"""
raise NotImplementedError
def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel.
The esential maths is
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i}
"""
raise NotImplementedError
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Returns the derivative of the objective wrt Z, using the chain rule
through the expectation variables.
"""
raise NotImplementedError
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel
"""
raise NotImplementedError
def plot(self, x=None, fignum=None, ax=None, title=None, plot_limits=None, resolution=None, **mpl_kwargs):
"""
plot this kernel.
:param x: the value to use for the other kernel argument (kernels are a function of two variables!)
:param fignum: figure number of the plot
:param ax: matplotlib axis to plot on
:param title: the matplotlib title
:param plot_limits: the range over which to plot the kernel
:resolution: the resolution of the lines used in plotting
:mpl_kwargs avalid keyword arguments to pass through to matplotlib (e.g. lw=7)
"""
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots
kernel_plots.plot(self, x, fignum, ax, title, plot_limits, resolution, **mpl_kwargs)
def plot_ARD(self, *args, **kw):
"""
See :class:`~GPy.plotting.matplot_dep.kernel_plots`
"""
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args,**kw)
def input_sensitivity(self, summarize=True):
"""
Returns the sensitivity for each dimension of this kernel.
"""
return np.zeros(self.input_dim)
def __add__(self, other):
""" Overloading of the '+' operator. for more control, see self.add """
return self.add(other)
def __iadd__(self, other):
return self.add(other)
def add(self, other, name='add'):
"""
Add another kernel to this one.
:param other: the other kernel to be added
:type other: GPy.kern
"""
assert isinstance(other, Kern), "only kernels can be added to kernels..."
from add import Add
return Add([self, other], name=name)
def __mul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __imul__(self, other):
""" Here we overload the '*' operator. See self.prod for more information"""
return self.prod(other)
def __pow__(self, other):
"""
Shortcut for tensor `prod`.
"""
assert np.all(self.active_dims == range(self.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
assert np.all(other.active_dims == range(other.input_dim)), "Can only use kernels, which have their input_dims defined from 0"
other.active_dims += self.input_dim
return self.prod(other)
def prod(self, other, name='mul'):
"""
Multiply two kernels (either on the same space, or on the tensor
product of the input space).
:param other: the other kernel to be added
:type other: GPy.kern
:param tensor: whether or not to use the tensor space (default is false).
:type tensor: bool
"""
assert isinstance(other, Kern), "only kernels can be multiplied to kernels..."
from prod import Prod
#kernels = []
#if isinstance(self, Prod): kernels.extend(self.parameters)
#else: kernels.append(self)
#if isinstance(other, Prod): kernels.extend(other.parameters)
#else: kernels.append(other)
return Prod([self, other], name)
def _check_input_dim(self, X):
assert X.shape[1] == self.input_dim, "{} did not specify active_dims and X has wrong shape: X_dim={}, whereas input_dim={}".format(self.name, X.shape[1], self.input_dim)
def _check_active_dims(self, X):
assert X.shape[1] >= len(self.active_dims), "At least {} dimensional X needed, X.shape={!s}".format(len(self.active_dims), X.shape)
class CombinationKernel(Kern):
"""
Abstract super class for combination kernels.
A combination kernel combines (a list of) kernels and works on those.
Examples are the HierarchicalKernel or Add and Prod kernels.
"""
def __init__(self, kernels, name, extra_dims=[]):
"""
Abstract super class for combination kernels.
A combination kernel combines (a list of) kernels and works on those.
Examples are the HierarchicalKernel or Add and Prod kernels.
:param list kernels: List of kernels to combine (can be only one element)
:param str name: name of the combination kernel
:param array-like extra_dims: if needed extra dimensions for the combination kernel to work on
"""
assert all([isinstance(k, Kern) for k in kernels])
extra_dims = np.array(extra_dims, dtype=int)
input_dim, active_dims = self.get_input_dim_active_dims(kernels, extra_dims)
# initialize the kernel with the full input_dim
super(CombinationKernel, self).__init__(input_dim, active_dims, name)
self.extra_dims = extra_dims
self.link_parameters(*kernels)
@property
def parts(self):
return self.parameters
def get_input_dim_active_dims(self, kernels, extra_dims = None):
#active_dims = reduce(np.union1d, (np.r_[x.active_dims] for x in kernels), np.array([], dtype=int))
#active_dims = np.array(np.concatenate((active_dims, extra_dims if extra_dims is not None else [])), dtype=int)
input_dim = reduce(max, (k.active_dims.max() for k in kernels)) + 1
if extra_dims is not None:
input_dim += extra_dims.size
active_dims = np.arange(input_dim)
return input_dim, active_dims
def input_sensitivity(self, summarize=True):
"""
If summize is true, we want to get the summerized view of the sensitivities,
otherwise put everything into an array with shape (#kernels, input_dim)
in the order of appearance of the kernels in the parameterized object.
"""
raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
def _check_active_dims(self, X):
return
def _check_input_dim(self, X):
# As combination kernels cannot always know, what their inner kernels have as input dims, the check will be done inside them, respectively
return

View file

@ -0,0 +1,143 @@
'''
Created on 11 Mar 2014
@author: maxz
'''
from ...core.parameterization.parameterized import ParametersChangedMeta
import numpy as np
from functools import wraps
def put_clean(dct, name, func):
if name in dct:
dct['_clean_{}'.format(name)] = dct[name]
dct[name] = func(dct[name])
class KernCallsViaSlicerMeta(ParametersChangedMeta):
def __new__(cls, name, bases, dct):
put_clean(dct, 'K', _slice_K)
put_clean(dct, 'Kdiag', _slice_Kdiag)
put_clean(dct, 'update_gradients_full', _slice_update_gradients_full)
put_clean(dct, 'update_gradients_diag', _slice_update_gradients_diag)
put_clean(dct, 'gradients_X', _slice_gradients_X)
put_clean(dct, 'gradients_X_diag', _slice_gradients_X_diag)
put_clean(dct, 'psi0', _slice_psi)
put_clean(dct, 'psi1', _slice_psi)
put_clean(dct, 'psi2', _slice_psi)
put_clean(dct, 'update_gradients_expectations', _slice_update_gradients_expectations)
put_clean(dct, 'gradients_Z_expectations', _slice_gradients_Z_expectations)
put_clean(dct, 'gradients_qX_expectations', _slice_gradients_qX_expectations)
return super(KernCallsViaSlicerMeta, cls).__new__(cls, name, bases, dct)
class _Slice_wrap(object):
def __init__(self, k, X, X2=None):
self.k = k
self.shape = X.shape
assert X.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X.shape={!s}".format(X.shape)
if X2 is not None:
assert X2.ndim == 2, "only matrices are allowed as inputs to kernels for now, given X2.shape={!s}".format(X2.shape)
if (self.k.active_dims is not None) and (self.k._sliced_X == 0):
self.k._check_active_dims(X)
self.X = self.k._slice_X(X)
self.X2 = self.k._slice_X(X2) if X2 is not None else X2
self.ret = True
else:
self.k._check_input_dim(X)
self.X = X
self.X2 = X2
self.ret = False
def __enter__(self):
self.k._sliced_X += 1
return self
def __exit__(self, *a):
self.k._sliced_X -= 1
def handle_return_array(self, return_val):
if self.ret:
ret = np.zeros(self.shape)
ret[:, self.k.active_dims] = return_val
return ret
return return_val
def _slice_K(f):
@wraps(f)
def wrap(self, X, X2 = None, *a, **kw):
with _Slice_wrap(self, X, X2) as s:
ret = f(self, s.X, s.X2, *a, **kw)
return ret
return wrap
def _slice_Kdiag(f):
@wraps(f)
def wrap(self, X, *a, **kw):
with _Slice_wrap(self, X, None) as s:
ret = f(self, s.X, *a, **kw)
return ret
return wrap
def _slice_update_gradients_full(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
with _Slice_wrap(self, X, X2) as s:
ret = f(self, dL_dK, s.X, s.X2)
return ret
return wrap
def _slice_update_gradients_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
ret = f(self, dL_dKdiag, s.X)
return ret
return wrap
def _slice_gradients_X(f):
@wraps(f)
def wrap(self, dL_dK, X, X2=None):
with _Slice_wrap(self, X, X2) as s:
ret = s.handle_return_array(f(self, dL_dK, s.X, s.X2))
return ret
return wrap
def _slice_gradients_X_diag(f):
@wraps(f)
def wrap(self, dL_dKdiag, X):
with _Slice_wrap(self, X, None) as s:
ret = s.handle_return_array(f(self, dL_dKdiag, s.X))
return ret
return wrap
def _slice_psi(f):
@wraps(f)
def wrap(self, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = f(self, s.X, s.X2)
return ret
return wrap
def _slice_update_gradients_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2)
return ret
return wrap
def _slice_gradients_Z_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, Z, variational_posterior) as s:
ret = s.handle_return_array(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X, s.X2))
return ret
return wrap
def _slice_gradients_qX_expectations(f):
@wraps(f)
def wrap(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
with _Slice_wrap(self, variational_posterior, Z) as s:
ret = list(f(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, s.X2, s.X))
r2 = ret[:2]
ret[0] = s.handle_return_array(r2[0])
ret[1] = s.handle_return_array(r2[1])
del r2
return ret
return wrap

177
GPy/kern/_src/linear.py Normal file
View file

@ -0,0 +1,177 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...util.linalg import tdot
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this
from ...util.config import *
from .psi_comp import PSICOMP_Linear
class Linear(Kern):
"""
Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i x_iy_i
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, ARD=False, active_dims=None, name='linear'):
super(Linear, self).__init__(input_dim, active_dims, name)
self.ARD = ARD
if not ARD:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.link_parameter(self.variances)
self.psicomp = PSICOMP_Linear()
@Cache_this(limit=2)
def K(self, X, X2=None):
if self.ARD:
if X2 is None:
return tdot(X*np.sqrt(self.variances))
else:
rv = np.sqrt(self.variances)
return np.dot(X*rv, (X2*rv).T)
else:
return self._dot_product(X, X2) * self.variances
@Cache_this(limit=1, ignore_args=(0,))
def _dot_product(self, X, X2=None):
if X2 is None:
return tdot(X)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return np.sum(self.variances * np.square(X), -1)
def update_gradients_full(self, dL_dK, X, X2=None):
if self.ARD:
if X2 is None:
#self.variances.gradient = np.array([np.sum(dL_dK * tdot(X[:, i:i + 1])) for i in range(self.input_dim)])
self.variances.gradient = np.einsum('ij,iq,jq->q', dL_dK, X, X)
else:
#product = X[:, None, :] * X2[None, :, :]
#self.variances.gradient = (dL_dK[:, :, None] * product).sum(0).sum(0)
self.variances.gradient = np.einsum('ij,iq,jq->q', dL_dK, X, X2)
else:
self.variances.gradient = np.sum(self._dot_product(X, X2) * dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
tmp = dL_dKdiag[:, None] * X ** 2
if self.ARD:
self.variances.gradient = tmp.sum(0)
else:
self.variances.gradient = np.atleast_1d(tmp.sum())
def gradients_X(self, dL_dK, X, X2=None):
if X2 is None:
return np.einsum('jq,q,ij->iq', X, 2*self.variances, dL_dK)
else:
#return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
return np.einsum('jq,q,ij->iq', X2, self.variances, dL_dK)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X
def input_sensitivity(self, summarize=True):
return np.ones(self.input_dim) * self.variances
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[0]
if self.ARD:
self.variances.gradient = dL_dvar
else:
self.variances.gradient = dL_dvar.sum()
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[1]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
class LinearFull(Kern):
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
super(LinearFull, self).__init__(input_dim, active_dims, name)
if W is None:
W = np.ones((input_dim, rank))
if kappa is None:
kappa = np.ones(input_dim)
assert W.shape == (input_dim, rank)
assert kappa.shape == (input_dim,)
self.W = Param('W', W)
self.kappa = Param('kappa', kappa, Logexp())
self.link_parameters(self.W, self.kappa)
def K(self, X, X2=None):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return np.einsum('ij,jk,lk->il', X, P, X if X2 is None else X2)
def update_gradients_full(self, dL_dK, X, X2=None):
self.kappa.gradient = np.einsum('ij,ik,kj->j', X, dL_dK, X if X2 is None else X2)
self.W.gradient = np.einsum('ij,kl,ik,lm->jm', X, X if X2 is None else X2, dL_dK, self.W)
self.W.gradient += np.einsum('ij,kl,ik,jm->lm', X, X if X2 is None else X2, dL_dK, self.W)
def Kdiag(self, X):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return np.einsum('ij,jk,ik->i', X, P, X)
def update_gradients_diag(self, dL_dKdiag, X):
self.kappa.gradient = np.einsum('ij,i->j', np.square(X), dL_dKdiag)
self.W.gradient = 2.*np.einsum('ij,ik,jl,i->kl', X, X, self.W, dL_dKdiag)
def gradients_X(self, dL_dK, X, X2=None):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
if X2 is None:
return 2.*np.einsum('ij,jk,kl->il', dL_dK, X, P)
else:
return np.einsum('ij,jk,kl->il', dL_dK, X2, P)
def gradients_X_diag(self, dL_dKdiag, X):
P = np.dot(self.W, self.W.T) + np.diag(self.kappa)
return 2.*np.einsum('jk,i,ij->ik', P, dL_dKdiag, X)

129
GPy/kern/_src/mlp.py Normal file
View file

@ -0,0 +1,129 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
import numpy as np
four_over_tau = 2./np.pi
class MLP(Kern):
"""
Multi layer perceptron kernel (also known as arc sine kernel or neural network kernel)
.. math::
k(x,y) = \\sigma^{2}\\frac{2}{\\pi } \\text{asin} \\left ( \\frac{ \\sigma_w^2 x^\\top y+\\sigma_b^2}{\\sqrt{\\sigma_w^2x^\\top x + \\sigma_b^2 + 1}\\sqrt{\\sigma_w^2 y^\\top y \\sigma_b^2 +1}} \\right )
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., weight_variance=1., bias_variance=100., active_dims=None, name='mlp'):
super(MLP, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.weight_variance = Param('weight_variance', weight_variance, Logexp())
self.bias_variance = Param('bias_variance', bias_variance, Logexp())
self.link_parameters(self.variance, self.weight_variance, self.bias_variance)
def K(self, X, X2=None):
self._K_computations(X, X2)
return self.variance*self._K_dvar
def Kdiag(self, X):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
return self.variance*self._K_diag_dvar
def update_gradients_full(self, dL_dK, X, X2=None):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self.variance.gradient = np.sum(self._K_dvar*dL_dK)
denom3 = self._K_denom**3
base = four_over_tau*self.variance/np.sqrt(1-self._K_asin_arg*self._K_asin_arg)
base_cov_grad = base*dL_dK
if X2 is None:
vec = np.diag(self._K_inner_prod)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec+self.bias_variance+1.), vec)
+np.outer(vec,(self.weight_variance*vec+self.bias_variance+1.))))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec[None, :]+vec[:, None])*self.weight_variance
+2.*self.bias_variance + 2.))*base_cov_grad).sum()
else:
vec1 = (X*X).sum(1)
vec2 = (X2*X2).sum(1)
self.weight_variance.gradient = ((self._K_inner_prod/self._K_denom
-.5*self._K_numer/denom3
*(np.outer((self.weight_variance*vec1+self.bias_variance+1.), vec2) + np.outer(vec1, self.weight_variance*vec2 + self.bias_variance+1.)))*base_cov_grad).sum()
self.bias_variance.gradient = ((1./self._K_denom
-.5*self._K_numer/denom3
*((vec1[:, None]+vec2[None, :])*self.weight_variance
+ 2*self.bias_variance + 2.))*base_cov_grad).sum()
def update_gradients_diag(self, X):
raise NotImplementedError, "TODO"
def gradients_X(self, dL_dK, X, X2):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_asin_arg
numer = self._K_numer
denom = self._K_denom
denom3 = denom*denom*denom
if X2 is not None:
vec2 = (X2*X2).sum(1)*self.weight_variance+self.bias_variance + 1.
return four_over_tau*self.weight_variance*self.variance*((X2[None, :, :]/denom[:, :, None] - vec2[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
else:
vec = (X*X).sum(1)*self.weight_variance+self.bias_variance + 1.
return 2*four_over_tau*self.weight_variance*self.variance*((X[None, :, :]/denom[:, :, None] - vec[None, :, None]*X[:, None, :]*(numer/denom3)[:, :, None])*(dL_dK/np.sqrt(1-arg*arg))[:, :, None]).sum(1)
def gradients_X_diag(self, dL_dKdiag, X):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_asin_arg
denom = self._K_diag_denom
#numer = self._K_diag_numer
return four_over_tau*2.*self.weight_variance*self.variance*X*(1./denom*(1. - arg)*dL_dKdiag/(np.sqrt(1-arg*arg)))[:, None]
def _K_computations(self, X, X2):
"""Pre-computations for the covariance matrix (used for computing the covariance and its gradients."""
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec = np.diag(self._K_numer) + 1.
self._K_denom = np.sqrt(np.outer(vec,vec))
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_numer = self._K_inner_prod*self.weight_variance + self.bias_variance
vec1 = (X*X).sum(1)*self.weight_variance + self.bias_variance + 1.
vec2 = (X2*X2).sum(1)*self.weight_variance + self.bias_variance + 1.
self._K_denom = np.sqrt(np.outer(vec1,vec2))
self._K_asin_arg = self._K_numer/self._K_denom
self._K_dvar = four_over_tau*np.arcsin(self._K_asin_arg)
def _K_diag_computations(self, X):
"""Pre-computations concerning the diagonal terms (used for computation of diagonal and its gradients)."""
self._K_diag_numer = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_denom = self._K_diag_numer+1.
self._K_diag_asin_arg = self._K_diag_numer/self._K_diag_denom
self._K_diag_dvar = four_over_tau*np.arcsin(self._K_diag_asin_arg)

View file

@ -2,12 +2,288 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np import numpy as np
from GPy.util.linalg import mdot from kern import Kern
from GPy.util.decorators import silence_errors from ...util.linalg import mdot
from ...util.decorators import silence_errors
from ...core.parameterization.param import Param
from ...core.parameterization.transformations import Logexp
class PeriodicMatern52(Kernpart): class Periodic(Kern):
def __init__(self, input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name):
"""
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
assert input_dim==1, "Periodic kernels are only defined for input_dim=1"
super(Periodic, self).__init__(input_dim, active_dims, name)
self.input_dim = input_dim
self.lower,self.upper = lower, upper
self.n_freq = n_freq
self.n_basis = 2*n_freq
self.variance = Param('variance', np.float64(variance), Logexp())
self.lengthscale = Param('lengthscale', np.float64(lengthscale), Logexp())
self.period = Param('period', np.float64(period), Logexp())
self.link_parameters(self.variance, self.lengthscale, self.period)
def _cos(self, alpha, omega, phase):
def f(x):
return alpha*np.cos(omega*x + phase)
return f
@silence_errors
def _cos_factorization(self, alpha, omega, phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def K(self, X, X2=None):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
return mdot(FX,self.Gi,FX2.T)
def Kdiag(self,X):
return np.diag(self.K(X))
class PeriodicExponential(Periodic):
"""
Kernel of the periodic subspace (up to a given frequency) of a exponential
(Matern 1/2) RKHS.
Only defined for input_dim=1.
"""
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_exponential'):
super(PeriodicExponential, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self):
self.a = [1./self.lengthscale, 1.]
self.b = [1]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
@silence_errors
def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters (shape is N x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-1./self.lengthscale**2,0.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
# SIMPLIFY!!! IPPprim1 = (self.upper - self.lower)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.logical_or(np.isnan(IPPprim1), np.isinf(IPPprim1)), IPPprim2, IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
self.variance.gradient = np.sum(dK_dvar*dL_dK)
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
self.period.gradient = np.sum(dK_dper*dL_dK)
class PeriodicMatern32(Periodic):
"""
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for input_dim=1.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (input_dim,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern32'):
super(PeriodicMatern32, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
def parameters_changed(self):
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
self.b = [1,self.lengthscale**2/3]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
@silence_errors
def update_gradients_full(self,dL_dK,X,X2):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
db_dlen = [0.,2*self.lengthscale/3.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
self.variance.gradient = np.sum(dK_dvar*dL_dK)
self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
self.period.gradient = np.sum(dK_dper*dL_dK)
class PeriodicMatern52(Periodic):
""" """
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1. Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for input_dim=1.
@ -25,67 +301,21 @@ class PeriodicMatern52(Kernpart):
""" """
def __init__(self,input_dim=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi): def __init__(self, input_dim=1, variance=1., lengthscale=1., period=2.*np.pi, n_freq=10, lower=0., upper=4*np.pi, active_dims=None, name='periodic_Matern52'):
assert input_dim==1, "Periodic kernels are only defined for input_dim=1" super(PeriodicMatern52, self).__init__(input_dim, variance, lengthscale, period, n_freq, lower, upper, active_dims, name)
self.name = 'periodic_Mat52'
self.input_dim = input_dim
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Wrong size: only one lengthscale needed"
else:
lengthscale = np.ones(1)
self.lower,self.upper = lower, upper
self.num_params = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
@silence_errors
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
@silence_errors
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
def parameters_changed(self):
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.] self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)] self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
self.basis_alpha = np.ones((2*self.n_freq,)) self.basis_alpha = np.ones((2*self.n_freq,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[])) self.basis_omega = (2*np.pi*np.arange(1,self.n_freq+1)/self.period).repeat(2)
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[])) self.basis_phi = np.zeros(self.n_freq * 2)
self.basis_phi[::2] = -np.pi/2
self.G = self.Gram_matrix() self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G) self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
def Gram_matrix(self): def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3)) La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega)) Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
@ -99,23 +329,8 @@ class PeriodicMatern52(Kernpart):
lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T) lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms) return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
np.add(mdot(FX,self.Gi,FX2.T), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
@silence_errors @silence_errors
def dK_dtheta(self,dL_dK,X,X2,target): def update_gradients_full(self, dL_dK, X, X2=None):
"""derivative of the covariance matrix with respect to the parameters (shape is num_data x num_inducing x num_params)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2) FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
@ -156,14 +371,12 @@ class PeriodicMatern52(Kernpart):
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2)) IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T)) IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T)) IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1) IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi) IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi) IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T) IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T) IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1) IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period)) dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
@ -186,81 +399,7 @@ class PeriodicMatern52(Kernpart):
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper) dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0]) self.variance.gradient = np.sum(dK_dvar*dL_dK)
target[0] += np.sum(dK_dvar*dL_dK) self.lengthscale.gradient = np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,1],dK_dlen, target[:,:,1]) self.period.gradient = np.sum(dK_dper*dL_dK)
target[1] += np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*dL_dK)
@silence_errors
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1. / self.variance * mdot(FX, self.Gi, FX.T)
#dK_dlen
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + .5*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + .5*self.lower**2*np.cos(phi-phi1.T)
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

41
GPy/kern/_src/poly.py Normal file
View file

@ -0,0 +1,41 @@
# Copyright (c) 2014, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Poly(Kern):
"""
Polynomial kernel
"""
def __init__(self, input_dim, variance=1., order=3., active_dims=None, name='poly'):
super(Poly, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.link_parameter(self.variance)
self.order=order
def K(self, X, X2=None):
return (self._dot_product(X, X2) + 1.)**self.order * self.variance
def _dot_product(self, X, X2=None):
if X2 is None:
return np.dot(X, X.T)
else:
return np.dot(X, X2.T)
def Kdiag(self, X):
return self.variance*(np.square(X).sum(1) + 1.)**self.order
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.sum(dL_dK * (self._dot_product(X, X2) + 1.)**self.order)
def update_gradients_diag(self, dL_dKdiag, X):
raise NotImplementedError
def gradients_X(self, dL_dK, X, X2=None):
raise NotImplementedError
def gradients_X_diag(self, dL_dKdiag, X):
raise NotImplementedError

66
GPy/kern/_src/prod.py Normal file
View file

@ -0,0 +1,66 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from kern import CombinationKernel
from ...util.caching import Cache_this
import itertools
class Prod(CombinationKernel):
"""
Computes the product of 2 kernels
:param k1, k2: the kernels to multiply
:type k1, k2: Kern
:param tensor: The kernels are either multiply as functions defined on the same input space (default) or on the product of the input spaces
:type tensor: Boolean
:rtype: kernel object
"""
def __init__(self, kernels, name='mul'):
for i, kern in enumerate(kernels[:]):
if isinstance(kern, Prod):
del kernels[i]
for part in kern.parts[::-1]:
kern.unlink_parameter(part)
kernels.insert(i, part)
super(Prod, self).__init__(kernels, name)
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.multiply, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
if which_parts is None:
which_parts = self.parts
return reduce(np.multiply, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
k = self.K(X,X2)*dL_dK
for p in self.parts:
p.update_gradients_full(k/p.K(X,X2),X,X2)
def update_gradients_diag(self, dL_dKdiag, X):
k = self.Kdiag(X)*dL_dKdiag
for p in self.parts:
p.update_gradients_diag(k/p.Kdiag(X),X)
def gradients_X(self, dL_dK, X, X2=None):
target = np.zeros(X.shape)
k = self.K(X,X2)*dL_dK
for p in self.parts:
target += p.gradients_X(k/p.K(X,X2),X,X2)
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
k = self.Kdiag(X)*dL_dKdiag
for p in self.parts:
target += p.gradients_X_diag(k/p.Kdiag(X),X)
return target

View file

@ -0,0 +1,55 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from ....core.parameterization.parameter_core import Pickleable
from GPy.util.caching import Cache_this
from ....core.parameterization import variational
import rbf_psi_comp
import ssrbf_psi_comp
import sslinear_psi_comp
import linear_psi_comp
class PSICOMP_RBF(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self):
pass
class PSICOMP_Linear(Pickleable):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def _setup_observers(self):
pass

View file

@ -0,0 +1,77 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the Psi statistics computation of the linear kernel for Bayesian GPLVM
"""
import numpy as np
from ....util.linalg import tdot
def psicomputations(variance, Z, variational_posterior):
"""
Compute psi-statistics for ss-linear kernel
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# psi0 N
# psi1 NxM
# psi2 MxM
mu = variational_posterior.mean
S = variational_posterior.variance
psi0 = (variance*(np.square(mu)+S)).sum(axis=1)
psi1 = np.dot(mu,(variance*Z).T)
psi2 = np.dot(S.sum(axis=0)*np.square(variance)*Z,Z.T)+ tdot(psi1.T)
return psi0, psi1, psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
dL_dvar, dL_dmu, dL_dS, dL_dZ = _psi2computations(dL_dpsi2, variance, Z, mu, S)
# Compute for psi0 and psi1
mu2S = np.square(mu)+S
dL_dpsi0_var = dL_dpsi0[:,None]*variance[None,:]
dL_dpsi1_mu = np.dot(dL_dpsi1.T,mu)
dL_dvar += (dL_dpsi0[:,None]*mu2S).sum(axis=0)+ (dL_dpsi1_mu*Z).sum(axis=0)
dL_dmu += 2.*dL_dpsi0_var*mu+np.dot(dL_dpsi1,Z)*variance
dL_dS += dL_dpsi0_var
dL_dZ += dL_dpsi1_mu*variance
return dL_dvar, dL_dZ, dL_dmu, dL_dS
def _psi2computations(dL_dpsi2, variance, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2_dvariance Q
# _psi2_dZ MxQ
# _psi2_dmu NxQ
# _psi2_dS NxQ
variance2 = np.square(variance)
common_sum = np.dot(mu,(variance*Z).T)
Z_expect = (np.dot(dL_dpsi2,Z)*Z).sum(axis=0)
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
common_expect = np.dot(common_sum,np.dot(dL_dpsi2T,Z))
Z2_expect = np.inner(common_sum,dL_dpsi2T)
Z1_expect = np.dot(dL_dpsi2T,Z)
dL_dvar = 2.*S.sum(axis=0)*variance*Z_expect+(common_expect*mu).sum(axis=0)
dL_dmu = common_expect*variance
dL_dS = np.empty(S.shape)
dL_dS[:] = Z_expect*variance2
dL_dZ = variance2*S.sum(axis=0)*Z1_expect+np.dot(Z2_expect.T,variance*mu)
return dL_dvar, dL_dmu, dL_dS, dL_dZ

View file

@ -0,0 +1,161 @@
"""
The module for psi-statistics for RBF kernel
"""
import numpy as np
from GPy.util.caching import Cacher
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
return psi0, psi1, psi2
def __psi1computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
_psi1 = variance*np.exp(_psi1_log)
return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
Lpsi1 = dL_dpsi1*_psi1
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
denom = 1./(S+lengthscale2)
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
_dL_dvar = Lpsi1.sum()/variance
_dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom)
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
denom = 1./(2*S+lengthscale2)
denom2 = np.square(denom)
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
Lpsi2 = dL_dpsi2*_psi2 # dL_dpsi2 is MxM, using broadcast to multiply N out
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Zhat = Lpsi2Z
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
_dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1)

View file

@ -0,0 +1,411 @@
"""
The module for psi-statistics for RBF kernel
"""
import numpy as np
from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """
// define THREADNUM
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NMM(n,m1,m2) ((m2*M+m1)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_NM(n,m) (m*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define IDX_MM(m1,m2) (m2*M+m1)
#define IDX_NQB(n,q,b) ((b*Q+q)*N+n)
#define IDX_QB(q,b) (b*Q+q)
// Divide data evenly
__device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) {
int residue = (total_data)%psize;
if(pidx<residue) {
int size = total_data/psize+1;
*start = size*pidx;
*end = *start+size;
} else {
int size = total_data/psize;
*start = size*pidx+residue;
*end = *start+size;
}
}
__device__ void reduce_sum(double* array, int array_size) {
int s;
if(array_size >= blockDim.x) {
for(int i=blockDim.x+threadIdx.x; i<array_size; i+= blockDim.x) {
array[threadIdx.x] += array[i];
}
array_size = blockDim.x;
}
__syncthreads();
for(int i=1; i<=array_size;i*=2) {s=i;}
if(threadIdx.x < array_size-s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
for(s=s/2;s>=1;s=s/2) {
if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
}
}
__global__ void compDenom(double *log_denom1, double *log_denom2, double *l, double *S, int N, int Q)
{
int n_start, n_end;
divide_data(N, gridDim.x, blockIdx.x, &n_start, &n_end);
for(int i=n_start*Q+threadIdx.x; i<n_end*Q; i+=blockDim.x) {
int n=i/Q;
int q=i%Q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_denom1[IDX_NQ(n,q)] = log(Snq/lq+1.);
log_denom2[IDX_NQ(n,q)] = log(2.*Snq/lq+1.);
}
}
__global__ void psi1computations(double *psi1, double *log_denom1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
for(int m=m_start; m<m_end; m++) {
for(int n=threadIdx.x; n<N; n+= blockDim.x) {
double log_psi1 = 0;
for(int q=0;q<Q;q++) {
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_psi1 += (muZ*muZ/(Snq+lq)+log_denom1[IDX_NQ(n,q)])/(-2.);
}
psi1[IDX_NM(n,m)] = var*exp(log_psi1);
}
}
}
__global__ void psi2computations(double *psi2, double *psi2n, double *log_denom2, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int psi2_idx_start, psi2_idx_end;
__shared__ double psi2_local[THREADNUM];
divide_data((M+1)*M/2, gridDim.x, blockIdx.x, &psi2_idx_start, &psi2_idx_end);
for(int psi2_idx=psi2_idx_start; psi2_idx<psi2_idx_end; psi2_idx++) {
int m1 = int((sqrt(8.*psi2_idx+1.)-1.)/2.);
int m2 = psi2_idx - (m1+1)*m1/2;
psi2_local[threadIdx.x] = 0;
for(int n=threadIdx.x;n<N;n+=blockDim.x) {
double log_psi2_n = 0;
for(int q=0;q<Q;q++) {
double dZ = Z[IDX_MQ(m1,q)] - Z[IDX_MQ(m2,q)];
double muZhat = mu[IDX_NQ(n,q)]- (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_psi2_n += dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) + log_denom2[IDX_NQ(n,q)]/(-2.);
}
double exp_psi2_n = exp(log_psi2_n);
psi2n[IDX_NMM(n,m1,m2)] = var*var*exp_psi2_n;
if(m1!=m2) { psi2n[IDX_NMM(n,m2,m1)] = var*var*exp_psi2_n;}
psi2_local[threadIdx.x] += exp_psi2_n;
}
__syncthreads();
reduce_sum(psi2_local, THREADNUM);
if(threadIdx.x==0) {
psi2[IDX_MM(m1,m2)] = var*var*psi2_local[0];
if(m1!=m2) { psi2[IDX_MM(m2,m1)] = var*var*psi2_local[0]; }
}
__syncthreads();
}
}
__global__ void psi1compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dL_dpsi1, double *psi1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double Snq,mu_nq;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)];}
for(int m=m_start; m<m_end; m++) {
if(n<N) {
double lpsi1 = psi1[IDX_NM(n,m)]*dL_dpsi1[IDX_NM(n,m)];
if(q==0) {dvar_local += lpsi1;}
double Zmu = Z[IDX_MQ(m,q)] - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
dmu_local += lpsi1*Zmu/denom;
dS_local += lpsi1*(Zmu2_denom-1.)/denom;
dl_local += lpsi1*(Zmu2_denom+Snq/lq)/denom;
g_local[threadIdx.x] = -lpsi1*Zmu/denom;
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local/2.;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]/var;}
}
__global__ void psi2compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dL_dpsi2, double *psi2n, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double Snq,mu_nq;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)];}
for(int m1=m_start; m1<m_end; m1++) {
g_local[threadIdx.x] = 0;
for(int m2=0;m2<M;m2++) {
if(n<N) {
double lpsi2 = psi2n[IDX_NMM(n,m1,m2)]*dL_dpsi2[IDX_MM(m1,m2)];
if(q==0) {dvar_local += lpsi2;}
double dZ = Z[IDX_MQ(m1,q)] - Z[IDX_MQ(m2,q)];
double muZhat = mu_nq - (Z[IDX_MQ(m1,q)] + Z[IDX_MQ(m2,q)])/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
dmu_local += lpsi2*muZhat/denom;
dS_local += lpsi2*(2.*muZhat2_denom-1.)/denom;
dl_local += lpsi2*((Snq/lq+muZhat2_denom)/denom+dZ*dZ/(4.*lq*lq));
g_local[threadIdx.x] += 2.*lpsi2*(muZhat/denom-dZ/(2*lq));
}
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m1,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += -2.*dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]*2/var;}
}
"""
class PSICOMP_RBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
self.GPU_direct = GPU_direct
self.gpuCache = None
self.threadnum = threadnum
self.blocknum = blocknum
module = SourceModule("#define THREADNUM "+str(self.threadnum)+"\n"+gpu_code)
self.g_psi1computations = module.get_function('psi1computations')
self.g_psi1computations.prepare('PPdPPPPiii')
self.g_psi2computations = module.get_function('psi2computations')
self.g_psi2computations.prepare('PPPdPPPPiii')
self.g_psi1compDer = module.get_function('psi1compDer')
self.g_psi1compDer.prepare('PPPPPPPdPPPPiii')
self.g_psi2compDer = module.get_function('psi2compDer')
self.g_psi2compDer.prepare('PPPPPPPdPPPPiii')
self.g_compDenom = module.get_function('compDenom')
self.g_compDenom.prepare('PPPPii')
def __deepcopy__(self, memo):
s = PSICOMP_RBF_GPU(threadnum=self.threadnum, blocknum=self.blocknum, GPU_direct=self.GPU_direct)
memo[id(self)] = s
return s
def _initGPUCache(self, N, M, Q):
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'log_denom1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_denom2_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
# derivatives
'dvar_gpu' :gpuarray.empty((self.blocknum,),np.float64, order='F'),
'dl_gpu' :gpuarray.empty((Q,self.blocknum),np.float64, order='F'),
'dZ_gpu' :gpuarray.empty((M,Q),np.float64, order='F'),
'dmu_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dS_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
# grad
'grad_l_gpu' :gpuarray.empty((Q,),np.float64, order='F'),
'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
}
else:
assert N==self.gpuCache['mu_gpu'].shape[0]
assert M==self.gpuCache['Z_gpu'].shape[0]
assert Q==self.gpuCache['l_gpu'].shape[0]
def sync_params(self, lengthscale, Z, mu, S):
if len(lengthscale)==1:
self.gpuCache['l_gpu'].fill(lengthscale)
else:
self.gpuCache['l_gpu'].set(np.asfortranarray(lengthscale))
self.gpuCache['Z_gpu'].set(np.asfortranarray(Z))
self.gpuCache['mu_gpu'].set(np.asfortranarray(mu))
self.gpuCache['S_gpu'].set(np.asfortranarray(S))
N,Q = self.gpuCache['S_gpu'].shape
# t=self.g_compDenom(self.gpuCache['log_denom1_gpu'],self.gpuCache['log_denom2_gpu'],self.gpuCache['l_gpu'],self.gpuCache['S_gpu'], np.int32(N), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_compDenom '+str(t)
self.g_compDenom.prepared_call((self.blocknum,1),(self.threadnum,1,1), self.gpuCache['log_denom1_gpu'].gpudata,self.gpuCache['log_denom2_gpu'].gpudata,self.gpuCache['l_gpu'].gpudata,self.gpuCache['S_gpu'].gpudata, np.int32(N), np.int32(Q))
def reset_derivative(self):
self.gpuCache['dvar_gpu'].fill(0.)
self.gpuCache['dl_gpu'].fill(0.)
self.gpuCache['dZ_gpu'].fill(0.)
self.gpuCache['dmu_gpu'].fill(0.)
self.gpuCache['dS_gpu'].fill(0.)
self.gpuCache['grad_l_gpu'].fill(0.)
self.gpuCache['grad_mu_gpu'].fill(0.)
self.gpuCache['grad_S_gpu'].fill(0.)
def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
"""
N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
psi0 = np.empty((N,))
psi0[:] = variance
self.g_psi1computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi1_gpu.gpudata, log_denom1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi2_gpu.gpudata, psi2n_gpu.gpudata, log_denom2_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
# t = self.g_psi1computations(psi1_gpu, log_denom1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1computations '+str(t)
# t = self.g_psi2computations(psi2_gpu, psi2n_gpu, log_denom2_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2computations '+str(t)
if self.GPU_direct:
return psi0, psi1_gpu, psi2_gpu
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
dvar_gpu = self.gpuCache['dvar_gpu']
dl_gpu = self.gpuCache['dl_gpu']
dZ_gpu = self.gpuCache['dZ_gpu']
dmu_gpu = self.gpuCache['dmu_gpu']
dS_gpu = self.gpuCache['dS_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu']
if self.GPU_direct:
dL_dpsi1_gpu = dL_dpsi1
dL_dpsi2_gpu = dL_dpsi2
dL_dpsi0_sum = dL_dpsi0.get().sum() #gpuarray.sum(dL_dpsi0).get()
else:
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
self.reset_derivative()
# t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1compDer '+str(t)
# t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2compDer '+str(t)
self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
dL_dvar = dL_dpsi0_sum + dvar_gpu.get().sum()#gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
dL_dS = grad_S_gpu.get()
dL_dZ = dZ_gpu.get()
if ARD:
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
dL_dlengscale = grad_l_gpu.get()
else:
dL_dlengscale = dl_gpu.get().sum() #gpuarray.sum(dl_gpu).get()
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS

View file

@ -0,0 +1,92 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the Psi statistics computation of the linear kernel for SSGPLVM
"""
from ....util.linalg import tdot
import numpy as np
def psicomputations(variance, Z, variational_posterior):
"""
Compute psi-statistics for ss-linear kernel
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# psi0 N
# psi1 NxM
# psi2 MxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = (gamma*(np.square(mu)+S)*variance).sum(axis=-1)
psi1 = np.inner(variance*gamma*mu,Z)
psi2 = np.inner(np.square(variance)*(gamma*((1-gamma)*np.square(mu)+S)).sum(axis=0)*Z,Z)+tdot(psi1.T)
return psi0, psi1, psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ = _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma)
# Compute for psi0 and psi1
mu2S = np.square(mu)+S
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu)
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu)
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z)
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance)
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu)
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2_dvariance Q
# _psi2_dZ MxQ
# _psi2_dgamma NxQ
# _psi2_dmu NxQ
# _psi2_dS NxQ
mu2 = np.square(mu)
gamma2 = np.square(gamma)
variance2 = np.square(variance)
mu2S = mu2+S # NxQ
gvm = np.einsum('nq,nq,q->nq',gamma,mu,variance)
common_sum = np.einsum('nq,mq->nm',gvm,Z)
# common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
Z_expect = np.einsum('mo,mq,oq->q',dL_dpsi2,Z,Z)
dL_dpsi2T = dL_dpsi2+dL_dpsi2.T
tmp = np.einsum('mo,oq->mq',dL_dpsi2T,Z)
common_expect = np.einsum('mq,nm->nq',tmp,common_sum)
# common_expect = np.einsum('mo,mq,no->nq',dL_dpsi2+dL_dpsi2.T,Z,common_sum)
Z2_expect = np.einsum('om,nm->no',dL_dpsi2T,common_sum)
Z1_expect = np.einsum('om,mq->oq',dL_dpsi2T,Z)
dL_dvar = np.einsum('nq,q,q->q',2.*(gamma*mu2S-gamma2*mu2),variance,Z_expect)+\
np.einsum('nq,nq,nq->q',common_expect,gamma,mu)
dL_dgamma = np.einsum('q,q,nq->nq',Z_expect,variance2,(mu2S-2.*gamma*mu2))+\
np.einsum('nq,q,nq->nq',common_expect,variance,mu)
dL_dmu = np.einsum('q,q,nq,nq->nq',Z_expect,variance2,mu,2.*(gamma-gamma2))+\
np.einsum('nq,nq,q->nq',common_expect,gamma,variance)
dL_dS = np.einsum('q,nq,q->nq',Z_expect,gamma,variance2)
# dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,(mu2S-gamma*mu2))+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum))
dL_dZ = Z1_expect*np.einsum('nq,q,nq->q',gamma,variance2,(mu2S-gamma*mu2))+np.einsum('nq,q,nq,nm->mq',gamma,variance,mu,Z2_expect)
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ

View file

@ -0,0 +1,394 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the psi statistics computation
"""
import numpy as np
try:
from scipy import weave
def _psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
variance = float(variance)
psi0 = np.empty(N)
psi0[:] = variance
psi1 = np.empty((N,M))
psi2n = np.empty((N,M,M))
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
Z = param_to_array(Z)
support_code = """
#include <math.h>
"""
code = """
for(int n=0; n<N; n++) {
for(int m1=0;m1<M;m1++) {
double log_psi1=0;
for(int m2=0;m2<=m1;m2++) {
double log_psi2_n=0;
for(int q=0;q<Q;q++) {
double Snq = S(n,q);
double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
if(m2==0) {
// Compute Psi_1
double muZ = mu(n,q)-Z(m1,q);
double psi1_exp1 = log_gamma(n,q) - (muZ*muZ/(Snq+lq) +log_denom1(n,q))/2.;
double psi1_exp2 = log_gamma1(n,q) -Zm1q*Zm1q/(2.*lq);
log_psi1 += (psi1_exp1>psi1_exp2)?psi1_exp1+log1p(exp(psi1_exp2-psi1_exp1)):psi1_exp2+log1p(exp(psi1_exp1-psi1_exp2));
}
// Compute Psi_2
double muZhat = mu(n,q) - (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+ Zm2q*Zm2q;
double dZ = Zm1q - Zm2q;
double psi2_exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double psi2_exp2 = log_gamma1(n,q) - Z2/(2.*lq);
log_psi2_n += (psi2_exp1>psi2_exp2)?psi2_exp1+log1p(exp(psi2_exp2-psi2_exp1)):psi2_exp2+log1p(exp(psi2_exp1-psi2_exp2));
}
double exp_psi2_n = exp(log_psi2_n);
psi2n(n,m1,m2) = variance*variance*exp_psi2_n;
if(m1!=m2) { psi2n(n,m2,m1) = variance*variance*exp_psi2_n;}
}
psi1(n,m1) = variance*exp(log_psi1);
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','log_denom1','log_denom2','log_gamma','log_gamma1'], type_converters=weave.converters.blitz)
psi2 = psi2n.sum(axis=0)
return psi0,psi1,psi2,psi2n
from GPy.util.caching import Cacher
psicomputations = Cacher(_psicomputations, limit=1)
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
_,psi1,_,psi2n = psicomputations(variance, lengthscale, Z, variational_posterior)
mu = variational_posterior.mean
S = variational_posterior.variance
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
l2 = np.square(lengthscale)
log_denom1 = np.log(S/l2+1)
log_denom2 = np.log(2*S/l2+1)
log_gamma,log_gamma1 = variational_posterior.gamma_log_prob()
gamma, gamma1 = variational_posterior.gamma_probabilities()
variance = float(variance)
dvar = np.zeros(1)
dmu = np.zeros((N,Q))
dS = np.zeros((N,Q))
dgamma = np.zeros((N,Q))
dl = np.zeros(Q)
dZ = np.zeros((M,Q))
dvar += np.sum(dL_dpsi0)
from ....util.misc import param_to_array
S = param_to_array(S)
mu = param_to_array(mu)
Z = param_to_array(Z)
support_code = """
#include <math.h>
"""
code = """
for(int n=0; n<N; n++) {
for(int m1=0;m1<M;m1++) {
double log_psi1=0;
for(int m2=0;m2<M;m2++) {
double log_psi2_n=0;
for(int q=0;q<Q;q++) {
double Snq = S(n,q);
double lq = l2(q);
double Zm1q = Z(m1,q);
double Zm2q = Z(m2,q);
double gnq = gamma(n,q);
double g1nq = gamma1(n,q);
double mu_nq = mu(n,q);
if(m2==0) {
// Compute Psi_1
double lpsi1 = psi1(n,m1)*dL_dpsi1(n,m1);
if(q==0) {dvar(0) += lpsi1/variance;}
double Zmu = Zm1q - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gamma(n,q)-(Zmu*Zmu/(Snq+lq)+log_denom1(n,q))/(2.);
double exp2 = log_gamma1(n,q)-Zm1q*Zm1q/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu(n,q) += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS(n,q) += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum)/2.;
dgamma(n,q) += lpsi1*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dl(q) += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zm1q*Zm1q/(lq*lq)*d_exp2)/(2.*exp_sum);
dZ(m1,q) += lpsi1*(-Zmu/denom*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
// Compute Psi_2
double lpsi2 = psi2n(n,m1,m2)*dL_dpsi2(m1,m2);
if(q==0) {dvar(0) += lpsi2*2/variance;}
double dZm1m2 = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZm1m2*dZm1m2/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2(n,q)/2. + log_gamma(n,q);
double exp2 = log_gamma1(n,q) - Z2/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu(n,q) += -2.*lpsi2*muZhat/denom*d_exp1/exp_sum;
dS(n,q) += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma(n,q) += lpsi2*(d_exp1*g1nq-d_exp2*gnq)/exp_sum;
dl(q) += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZm1m2*dZm1m2/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
dZ(m1,q) += 2.*lpsi2*((muZhat/denom-dZm1m2/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
}
}
}
"""
weave.inline(code, support_code=support_code, arg_names=['dL_dpsi1','dL_dpsi2','psi1','psi2n','N','M','Q','variance','l2','Z','mu','S','gamma','gamma1','log_denom1','log_denom2','log_gamma','log_gamma1','dvar','dl','dmu','dS','dgamma','dZ'], type_converters=weave.converters.blitz)
dl *= 2.*lengthscale
if not ARD:
dl = dl.sum()
return dvar, dl, dZ, dmu, dS, dgamma
except:
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S, gamma)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S, gamma)
return psi0, psi1, psi2
def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S[:, None, :] / lengthscale2 + 1. # Nx1xQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #Nx1xQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom) # NxMxQ
_psi1_common = gamma[:,None,:] / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #Nx1xQ
_psi1_exponent1 = np.log(gamma[:,None,:]) - (_psi1_dist_sq + np.log(_psi1_denom))/2. # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) - (np.square(Z[None,:,:])/lengthscale2)/2. # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
return _psi1
def _psi2computations(variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S[:, None, None, :] / lengthscale2 + 1. # Nx1x1xQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom)
_psi2_common = gamma[:,None,None,:]/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # Nx1x1xQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom)+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1, dgamma_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2, dgamma_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dgamma = dgamma_psi1 + dgamma_psi2
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_denom = S / lengthscale2 + 1. # NxQ
_psi1_denom_sqrt = np.sqrt(_psi1_denom) #NxQ
_psi1_dist = Z[None, :, :] - mu[:, None, :] # NxMxQ
_psi1_dist_sq = np.square(_psi1_dist) / (lengthscale2 * _psi1_denom[:,None,:]) # NxMxQ
_psi1_common = gamma / (lengthscale2*_psi1_denom*_psi1_denom_sqrt) #NxQ
_psi1_exponent1 = np.log(gamma[:,None,:]) -0.5 * (_psi1_dist_sq + np.log(_psi1_denom[:, None,:])) # NxMxQ
_psi1_exponent2 = np.log(1.-gamma[:,None,:]) -0.5 * (np.square(Z[None,:,:])/lengthscale2) # NxMxQ
_psi1_exponent_max = np.maximum(_psi1_exponent1,_psi1_exponent2)
_psi1_exponent = _psi1_exponent_max+np.log(np.exp(_psi1_exponent1-_psi1_exponent_max) + np.exp(_psi1_exponent2-_psi1_exponent_max)) #NxMxQ
_psi1_exp_sum = _psi1_exponent.sum(axis=-1) #NxM
_psi1_exp_dist_sq = np.exp(-0.5*_psi1_dist_sq) # NxMxQ
_psi1_exp_Z = np.exp(-0.5*np.square(Z[None,:,:])/lengthscale2) # 1xMxQ
_psi1_q = variance * np.exp(_psi1_exp_sum[:,:,None] - _psi1_exponent) # NxMxQ
_psi1 = variance * np.exp(_psi1_exp_sum) # NxM
_dL_dvariance = np.einsum('nm,nm->',dL_dpsi1, _psi1)/variance # 1
_dL_dgamma = np.einsum('nm,nmq,nmq->nq',dL_dpsi1, _psi1_q, (_psi1_exp_dist_sq/_psi1_denom_sqrt[:,None,:]-_psi1_exp_Z)) # NxQ
_dL_dmu = np.einsum('nm, nmq, nmq, nmq, nq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_dist,_psi1_common) # NxQ
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
_psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
_psi2_Zdist_sq = np.square(_psi2_Zdist / lengthscale) # M,M,Q
_psi2_Z_sq_sum = (np.square(Z[:,None,:])+np.square(Z[None,:,:]))/lengthscale2 # MxMxQ
# psi2
_psi2_denom = 2.*S / lengthscale2 + 1. # NxQ
_psi2_denom_sqrt = np.sqrt(_psi2_denom)
_psi2_mudist = mu[:,None,None,:]-_psi2_Zhat #N,M,M,Q
_psi2_mudist_sq = np.square(_psi2_mudist)/(lengthscale2*_psi2_denom[:,None,None,:])
_psi2_common = gamma/(lengthscale2 * _psi2_denom * _psi2_denom_sqrt) # NxQ
_psi2_exponent1 = -_psi2_Zdist_sq -_psi2_mudist_sq -0.5*np.log(_psi2_denom[:,None,None,:])+np.log(gamma[:,None,None,:]) #N,M,M,Q
_psi2_exponent2 = np.log(1.-gamma[:,None,None,:]) - 0.5*(_psi2_Z_sq_sum) # NxMxMxQ
_psi2_exponent_max = np.maximum(_psi2_exponent1, _psi2_exponent2)
_psi2_exponent = _psi2_exponent_max+np.log(np.exp(_psi2_exponent1-_psi2_exponent_max) + np.exp(_psi2_exponent2-_psi2_exponent_max))
_psi2_exp_sum = _psi2_exponent.sum(axis=-1) #NxM
_psi2_q = variance*variance * np.exp(_psi2_exp_sum[:,:,:,None]-_psi2_exponent) # NxMxMxQ
_psi2_exp_dist_sq = np.exp(-_psi2_Zdist_sq -_psi2_mudist_sq) # NxMxMxQ
_psi2_exp_Z = np.exp(-0.5*_psi2_Z_sq_sum) # MxMxQ
_psi2 = variance*variance * (np.exp(_psi2_exp_sum).sum(axis=0)) # MxM
_dL_dvariance = np.einsum('mo,mo->',dL_dpsi2,_psi2)*2./variance
_dL_dgamma = np.einsum('mo,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,(_psi2_exp_dist_sq/_psi2_denom_sqrt[:,None,None,:] - _psi2_exp_Z))
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -0,0 +1,474 @@
"""
The module for psi-statistics for RBF kernel for Spike-and-Slab GPLVM
"""
import numpy as np
from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """
// define THREADNUM
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NMM(n,m1,m2) ((m2*M+m1)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_NM(n,m) (m*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define IDX_MM(m1,m2) (m2*M+m1)
#define IDX_NQB(n,q,b) ((b*Q+q)*N+n)
#define IDX_QB(q,b) (b*Q+q)
// Divide data evenly
__device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) {
int residue = (total_data)%psize;
if(pidx<residue) {
int size = total_data/psize+1;
*start = size*pidx;
*end = *start+size;
} else {
int size = total_data/psize;
*start = size*pidx+residue;
*end = *start+size;
}
}
__device__ void reduce_sum(double* array, int array_size) {
int s;
if(array_size >= blockDim.x) {
for(int i=blockDim.x+threadIdx.x; i<array_size; i+= blockDim.x) {
array[threadIdx.x] += array[i];
}
array_size = blockDim.x;
}
__syncthreads();
for(int i=1; i<=array_size;i*=2) {s=i;}
if(threadIdx.x < array_size-s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
for(s=s/2;s>=1;s=s/2) {
if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
}
}
__global__ void compDenom(double *log_denom1, double *log_denom2, double *log_gamma, double*log_gamma1, double *gamma, double *l, double *S, int N, int Q)
{
int n_start, n_end;
divide_data(N, gridDim.x, blockIdx.x, &n_start, &n_end);
for(int i=n_start*Q+threadIdx.x; i<n_end*Q; i+=blockDim.x) {
int n=i/Q;
int q=i%Q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double gnq = gamma[IDX_NQ(n,q)];
log_denom1[IDX_NQ(n,q)] = log(Snq/lq+1.);
log_denom2[IDX_NQ(n,q)] = log(2.*Snq/lq+1.);
log_gamma[IDX_NQ(n,q)] = log(gnq);
log_gamma1[IDX_NQ(n,q)] = log(1.-gnq);
}
}
__global__ void psi1computations(double *psi1, double *log_denom1, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
for(int m=m_start; m<m_end; m++) {
for(int n=threadIdx.x; n<N; n+= blockDim.x) {
double log_psi1 = 0;
for(int q=0;q<Q;q++) {
double Zmq = Z[IDX_MQ(m,q)];
double muZ = mu[IDX_NQ(n,q)]-Zmq;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double exp1 = log_gamma[IDX_NQ(n,q)]-(muZ*muZ/(Snq+lq)+log_denom1[IDX_NQ(n,q)])/(2.);
double exp2 = log_gamma1[IDX_NQ(n,q)]-Zmq*Zmq/(2.*lq);
log_psi1 += (exp1>exp2)?exp1+log1p(exp(exp2-exp1)):exp2+log1p(exp(exp1-exp2));
}
psi1[IDX_NM(n,m)] = var*exp(log_psi1);
}
}
}
__global__ void psi2computations(double *psi2, double *psi2n, double *log_denom2, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int psi2_idx_start, psi2_idx_end;
__shared__ double psi2_local[THREADNUM];
divide_data((M+1)*M/2, gridDim.x, blockIdx.x, &psi2_idx_start, &psi2_idx_end);
for(int psi2_idx=psi2_idx_start; psi2_idx<psi2_idx_end; psi2_idx++) {
int m1 = int((sqrt(8.*psi2_idx+1.)-1.)/2.);
int m2 = psi2_idx - (m1+1)*m1/2;
psi2_local[threadIdx.x] = 0;
for(int n=threadIdx.x;n<N;n+=blockDim.x) {
double log_psi2_n = 0;
for(int q=0;q<Q;q++) {
double Zm1q = Z[IDX_MQ(m1,q)];
double Zm2q = Z[IDX_MQ(m2,q)];
double dZ = Zm1q - Zm2q;
double muZhat = mu[IDX_NQ(n,q)]- (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2[IDX_NQ(n,q)]/2. + log_gamma[IDX_NQ(n,q)];
double exp2 = log_gamma1[IDX_NQ(n,q)] - Z2/(2.*lq);
log_psi2_n += (exp1>exp2)?exp1+log1p(exp(exp2-exp1)):exp2+log1p(exp(exp1-exp2));
}
double exp_psi2_n = exp(log_psi2_n);
psi2n[IDX_NMM(n,m1,m2)] = var*var*exp_psi2_n;
if(m1!=m2) { psi2n[IDX_NMM(n,m2,m1)] = var*var*exp_psi2_n;}
psi2_local[threadIdx.x] += exp_psi2_n;
}
__syncthreads();
reduce_sum(psi2_local, THREADNUM);
if(threadIdx.x==0) {
psi2[IDX_MM(m1,m2)] = var*var*psi2_local[0];
if(m1!=m2) { psi2[IDX_MM(m2,m1)] = var*var*psi2_local[0]; }
}
__syncthreads();
}
}
__global__ void psi1compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dgamma, double *dL_dpsi1, double *psi1, double *log_denom1, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double dgamma_local = 0;
double Snq,mu_nq,gnq,log_gnq,log_gnq1,log_de;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)]; gnq = gamma[IDX_NQ(n,q)];
log_gnq = log_gamma[IDX_NQ(n,q)]; log_gnq1 = log_gamma1[IDX_NQ(n,q)];
log_de = log_denom1[IDX_NQ(n,q)];}
for(int m=m_start; m<m_end; m++) {
if(n<N) {
double lpsi1 = psi1[IDX_NM(n,m)]*dL_dpsi1[IDX_NM(n,m)];
if(q==0) {dvar_local += lpsi1;}
double Zmq = Z[IDX_MQ(m,q)];
double Zmu = Zmq - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gnq-(Zmu*Zmu/(Snq+lq)+log_de)/(2.);
double exp2 = log_gnq1-Zmq*Zmq/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu_local += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS_local += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum);
dgamma_local += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl_local += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zmq*Zmq/(lq*lq)*d_exp2)/(2.*exp_sum);
g_local[threadIdx.x] = lpsi1*(-Zmu/denom*d_exp1-Zmq/lq*d_exp2)/exp_sum;
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local/2.;
dgamma[IDX_NQB(n,q,blockIdx.x)] += dgamma_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]/var;}
}
__global__ void psi2compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dgamma, double *dL_dpsi2, double *psi2n, double *log_denom2, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double dgamma_local = 0;
double Snq,mu_nq,gnq,log_gnq,log_gnq1,log_de;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)]; gnq = gamma[IDX_NQ(n,q)];
log_gnq = log_gamma[IDX_NQ(n,q)]; log_gnq1 = log_gamma1[IDX_NQ(n,q)];
log_de = log_denom2[IDX_NQ(n,q)];}
for(int m1=m_start; m1<m_end; m1++) {
g_local[threadIdx.x] = 0;
for(int m2=0;m2<M;m2++) {
if(n<N) {
double lpsi2 = psi2n[IDX_NMM(n,m1,m2)]*dL_dpsi2[IDX_MM(m1,m2)];
if(q==0) {dvar_local += lpsi2;}
double Zm1q = Z[IDX_MQ(m1,q)];
double Zm2q = Z[IDX_MQ(m2,q)];
double dZ = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_de/2. + log_gnq;
double exp2 = log_gnq1 - Z2/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu_local += lpsi2*muZhat/denom*d_exp1/exp_sum;
dS_local += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma_local += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl_local += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZ*dZ/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
g_local[threadIdx.x] += 2.*lpsi2*((muZhat/denom-dZ/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m1,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += -2.*dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local;
dgamma[IDX_NQB(n,q,blockIdx.x)] += dgamma_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]*2/var;}
}
"""
class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
self.GPU_direct = GPU_direct
self.gpuCache = None
self.threadnum = threadnum
self.blocknum = blocknum
module = SourceModule("#define THREADNUM "+str(self.threadnum)+"\n"+gpu_code)
self.g_psi1computations = module.get_function('psi1computations')
self.g_psi1computations.prepare('PPPPdPPPPiii')
self.g_psi2computations = module.get_function('psi2computations')
self.g_psi2computations.prepare('PPPPPdPPPPiii')
self.g_psi1compDer = module.get_function('psi1compDer')
self.g_psi1compDer.prepare('PPPPPPPPPPPdPPPPPiii')
self.g_psi2compDer = module.get_function('psi2compDer')
self.g_psi2compDer.prepare('PPPPPPPPPPPdPPPPPiii')
self.g_compDenom = module.get_function('compDenom')
self.g_compDenom.prepare('PPPPPPPii')
def __deepcopy__(self, memo):
s = PSICOMP_SSRBF_GPU(threadnum=self.threadnum, blocknum=self.blocknum, GPU_direct=self.GPU_direct)
memo[id(self)] = s
return s
def _initGPUCache(self, N, M, Q):
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'log_denom1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_denom2_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_gamma1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
# derivatives
'dvar_gpu' :gpuarray.empty((self.blocknum,),np.float64, order='F'),
'dl_gpu' :gpuarray.empty((Q,self.blocknum),np.float64, order='F'),
'dZ_gpu' :gpuarray.empty((M,Q),np.float64, order='F'),
'dmu_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dS_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dgamma_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
# grad
'grad_l_gpu' :gpuarray.empty((Q,),np.float64, order='F'),
'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_gamma_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
}
else:
assert N==self.gpuCache['mu_gpu'].shape[0]
assert M==self.gpuCache['Z_gpu'].shape[0]
assert Q==self.gpuCache['l_gpu'].shape[0]
def sync_params(self, lengthscale, Z, mu, S, gamma):
if len(lengthscale)==1:
self.gpuCache['l_gpu'].fill(lengthscale)
else:
self.gpuCache['l_gpu'].set(np.asfortranarray(lengthscale))
self.gpuCache['Z_gpu'].set(np.asfortranarray(Z))
self.gpuCache['mu_gpu'].set(np.asfortranarray(mu))
self.gpuCache['S_gpu'].set(np.asfortranarray(S))
self.gpuCache['gamma_gpu'].set(np.asfortranarray(gamma))
N,Q = self.gpuCache['S_gpu'].shape
self.g_compDenom.prepared_call((self.blocknum,1),(self.threadnum,1,1), self.gpuCache['log_denom1_gpu'].gpudata,self.gpuCache['log_denom2_gpu'].gpudata,self.gpuCache['log_gamma_gpu'].gpudata,self.gpuCache['log_gamma1_gpu'].gpudata,self.gpuCache['gamma_gpu'].gpudata,self.gpuCache['l_gpu'].gpudata,self.gpuCache['S_gpu'].gpudata, np.int32(N), np.int32(Q))
def reset_derivative(self):
self.gpuCache['dvar_gpu'].fill(0.)
self.gpuCache['dl_gpu'].fill(0.)
self.gpuCache['dZ_gpu'].fill(0.)
self.gpuCache['dmu_gpu'].fill(0.)
self.gpuCache['dS_gpu'].fill(0.)
self.gpuCache['dgamma_gpu'].fill(0.)
self.gpuCache['grad_l_gpu'].fill(0.)
self.gpuCache['grad_mu_gpu'].fill(0.)
self.gpuCache['grad_S_gpu'].fill(0.)
self.gpuCache['grad_gamma_gpu'].fill(0.)
def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
"""
N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
log_gamma_gpu = self.gpuCache['log_gamma_gpu']
log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
psi0 = np.empty((N,))
psi0[:] = variance
self.g_psi1computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi2_gpu.gpudata, psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
if self.GPU_direct:
return psi0, psi1_gpu, psi2_gpu
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
gamma_gpu = self.gpuCache['gamma_gpu']
dvar_gpu = self.gpuCache['dvar_gpu']
dl_gpu = self.gpuCache['dl_gpu']
dZ_gpu = self.gpuCache['dZ_gpu']
dmu_gpu = self.gpuCache['dmu_gpu']
dS_gpu = self.gpuCache['dS_gpu']
dgamma_gpu = self.gpuCache['dgamma_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu']
grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
log_gamma_gpu = self.gpuCache['log_gamma_gpu']
log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
if self.GPU_direct:
dL_dpsi1_gpu = dL_dpsi1
dL_dpsi2_gpu = dL_dpsi2
dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
else:
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
self.reset_derivative()
# t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1compDer '+str(t)
# t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2compDer '+str(t)
self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
dL_dS = grad_S_gpu.get()
sum_axis(grad_gamma_gpu,dgamma_gpu,N*Q,self.blocknum)
dL_dgamma = grad_gamma_gpu.get()
dL_dZ = dZ_gpu.get()
if ARD:
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
dL_dlengscale = grad_l_gpu.get()
else:
dL_dlengscale = gpuarray.sum(dl_gpu).get()
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma

71
GPy/kern/_src/rbf.py Normal file
View file

@ -0,0 +1,71 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from stationary import Stationary
from psi_comp import PSICOMP_RBF
from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
from ...util.config import *
class RBF(Stationary):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
"""
_support_GPU = True
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
self.psicomp = PSICOMP_RBF()
if self.useGPU:
self.psicomp = PSICOMP_RBF_GPU()
else:
self.psicomp = PSICOMP_RBF()
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, r):
return -r*self.K_of_r(r)
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:
dc['psicomp'] = PSICOMP_RBF()
return dc
def __setstate__(self, state):
return super(RBF, self).__setstate__(state)
def spectrum(self, omega):
assert self.input_dim == 1 #TODO: higher dim spectra?
return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2)
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
def psi1(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
def psi2(self, Z, variational_posterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]

204
GPy/kern/_src/splitKern.py Normal file
View file

@ -0,0 +1,204 @@
"""
A new kernel
"""
import numpy as np
from kern import Kern,CombinationKernel
from .independent_outputs import index_to_slices
import itertools
class DiffGenomeKern(Kern):
def __init__(self, kernel, idx_p, Xp, index_dim=-1, name='DiffGenomeKern'):
self.idx_p = idx_p
self.index_dim=index_dim
self.kern = SplitKern(kernel,Xp, index_dim=index_dim)
super(DiffGenomeKern, self).__init__(input_dim=kernel.input_dim+1, active_dims=None, name=name)
self.add_parameter(self.kern)
def K(self, X, X2=None):
assert X2==None
K = self.kern.K(X,X2)
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
return K
slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p
K_c = K[idx_start:idx_end,idx_start:idx_end].copy()
K[idx_start:idx_end,:] = K[:self.idx_p,:]
K[:,idx_start:idx_end] = K[:,:self.idx_p]
K[idx_start:idx_end,idx_start:idx_end] = K_c
return K
def Kdiag(self,X):
Kdiag = self.kern.Kdiag(X)
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
return Kdiag
slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p
Kdiag[idx_start:idx_end] = Kdiag[:self.idx_p]
return Kdiag
def update_gradients_full(self,dL_dK,X,X2=None):
assert X2==None
if self.idx_p<=0 or self.idx_p>X.shape[0]/2:
self.kern.update_gradients_full(dL_dK, X)
return
slices = index_to_slices(X[:,self.index_dim])
idx_start = slices[1][0].start
idx_end = idx_start+self.idx_p
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,:], X[:self.idx_p],X)
grad_p1 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[:,idx_start:idx_end], X, X[:self.idx_p])
grad_p2 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[:self.idx_p],X[idx_start:idx_end])
grad_p3 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[idx_start:idx_end], X[:self.idx_p])
grad_p4 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,:], X[idx_start:idx_end],X)
grad_n1 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[:,idx_start:idx_end], X, X[idx_start:idx_end])
grad_n2 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK[idx_start:idx_end,idx_start:idx_end], X[idx_start:idx_end], X[idx_start:idx_end])
grad_n3 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dK, X)
self.kern.gradient += grad_p1+grad_p2-grad_p3-grad_p4-grad_n1-grad_n2+2*grad_n3
def update_gradients_diag(self, dL_dKdiag, X):
pass
class SplitKern(CombinationKernel):
def __init__(self, kernel, Xp, index_dim=-1, name='SplitKern'):
assert isinstance(index_dim, int), "The index dimension must be an integer!"
self.kern = kernel
self.kern_cross = SplitKern_cross(kernel,Xp)
super(SplitKern, self).__init__(kernels=[self.kern, self.kern_cross], extra_dims=[index_dim], name=name)
self.index_dim = index_dim
def K(self,X ,X2=None):
slices = index_to_slices(X[:,self.index_dim])
assert len(slices)<=2, 'The Split kernel only support two different indices'
if X2 is None:
target = np.zeros((X.shape[0], X.shape[0]))
# diagonal blocks
[[target.__setitem__((s,ss), self.kern.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
if len(slices)>1:
# cross blocks
[target.__setitem__((s,ss), self.kern_cross.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices[0], slices[1])]
# cross blocks
[target.__setitem__((s,ss), self.kern_cross.K(X[s,:], X[ss,:])) for s,ss in itertools.product(slices[1], slices[0])]
else:
slices2 = index_to_slices(X2[:,self.index_dim])
assert len(slices2)<=2, 'The Split kernel only support two different indices'
target = np.zeros((X.shape[0], X2.shape[0]))
# diagonal blocks
[[target.__setitem__((s,s2), self.kern.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
if len(slices)>1:
[target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[1], slices2[0])]
if len(slices2)>1:
[target.__setitem__((s,s2), self.kern_cross.K(X[s,:],X2[s2,:])) for s,s2 in itertools.product(slices[0], slices2[1])]
return target
def Kdiag(self,X):
return self.kern.Kdiag(X)
def update_gradients_full(self,dL_dK,X,X2=None):
slices = index_to_slices(X[:,self.index_dim])
target = np.zeros(self.kern.size)
def collate_grads(dL, X, X2, cross=False):
if cross:
self.kern_cross.update_gradients_full(dL,X,X2)
target[:] += self.kern_cross.kern.gradient
else:
self.kern.update_gradients_full(dL,X,X2)
target[:] += self.kern.gradient
if X2 is None:
assert dL_dK.shape==(X.shape[0],X.shape[0])
[[collate_grads(dL_dK[s,ss], X[s], X[ss]) for s,ss in itertools.product(slices_i, slices_i)] for slices_i in slices]
if len(slices)>1:
[collate_grads(dL_dK[s,ss], X[s], X[ss], True) for s,ss in itertools.product(slices[0], slices[1])]
[collate_grads(dL_dK[s,ss], X[s], X[ss], True) for s,ss in itertools.product(slices[1], slices[0])]
else:
assert dL_dK.shape==(X.shape[0],X2.shape[0])
slices2 = index_to_slices(X2[:,self.index_dim])
[[collate_grads(dL_dK[s,s2],X[s],X2[s2]) for s,s2 in itertools.product(slices[i], slices2[i])] for i in xrange(min(len(slices),len(slices2)))]
if len(slices)>1:
[collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[1], slices2[0])]
if len(slices2)>1:
[collate_grads(dL_dK[s,s2], X[s], X2[s2], True) for s,s2 in itertools.product(slices[0], slices2[1])]
self.kern.gradient = target
def update_gradients_diag(self, dL_dKdiag, X):
self.kern.update_gradients_diag(self, dL_dKdiag, X)
class SplitKern_cross(Kern):
def __init__(self, kernel, Xp, name='SplitKern_cross'):
assert isinstance(kernel, Kern)
self.kern = kernel
if not isinstance(Xp,np.ndarray):
Xp = np.array([[Xp]])
self.Xp = Xp
super(SplitKern_cross, self).__init__(input_dim=kernel.input_dim, active_dims=None, name=name)
def K(self, X, X2=None):
if X2 is None:
return np.dot(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X))/self.kern.K(self.Xp,self.Xp)
else:
return np.dot(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X2))/self.kern.K(self.Xp,self.Xp)
def Kdiag(self, X):
return np.inner(self.kern.K(X,self.Xp),self.kern.K(self.Xp,X).T)/self.kern.K(self.Xp,self.Xp)
def update_gradients_full(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
k1 = self.kern.K(X,self.Xp)
k2 = self.kern.K(self.Xp,X2)
k3 = self.kern.K(self.Xp,self.Xp)
dL_dk1 = np.einsum('ij,j->i',dL_dK,k2[0])/k3[0,0]
dL_dk2 = np.einsum('ij,i->j',dL_dK,k1[:,0])/k3[0,0]
dL_dk3 = np.einsum('ij,ij->',dL_dK,-np.dot(k1,k2)/(k3[0,0]*k3[0,0]))
self.kern.update_gradients_full(dL_dk1[:,None],X,self.Xp)
grad = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dk2[None,:],self.Xp,X2)
grad += self.kern.gradient.copy()
self.kern.update_gradients_full(np.array([[dL_dk3]]),self.Xp,self.Xp)
grad += self.kern.gradient.copy()
self.kern.gradient = grad
def update_gradients_diag(self, dL_dKdiag, X):
k1 = self.kern.K(X,self.Xp)
k2 = self.kern.K(self.Xp,X)
k3 = self.kern.K(self.Xp,self.Xp)
dL_dk1 = dL_dKdiag*k2[0]/k3
dL_dk2 = dL_dKdiag*k1[:,0]/k3
dL_dk3 = -dL_dKdiag*(k1[:,0]*k2[0]).sum()/(k3*k3)
self.kern.update_gradients_full(dL_dk1[:,None],X,self.Xp)
grad1 = self.kern.gradient.copy()
self.kern.update_gradients_full(dL_dk2[None,:],self.Xp,X)
grad2 = self.kern.gradient.copy()
self.kern.update_gradients_full(np.array([[dL_dk3]]),self.Xp,self.Xp)
grad3 = self.kern.gradient.copy()
self.kern.gradient = grad1+grad2+grad3

122
GPy/kern/_src/static.py Normal file
View file

@ -0,0 +1,122 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
class Static(Kern):
def __init__(self, input_dim, variance, active_dims, name):
super(Static, self).__init__(input_dim, active_dims, name)
self.variance = Param('variance', variance, Logexp())
self.link_parameters(self.variance)
def Kdiag(self, X):
ret = np.empty((X.shape[0],), dtype=np.float64)
ret[:] = self.variance
return ret
def gradients_X(self, dL_dK, X, X2=None):
return np.zeros(X.shape)
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape)
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
def psi0(self, Z, variational_posterior):
return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, variational_posterior):
return self.K(variational_posterior.mean, Z)
def psi2(self, Z, variational_posterior):
K = self.K(variational_posterior.mean, Z)
return np.einsum('ij,ik->jk',K,K) #K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
def input_sensitivity(self, summarize=True):
if summarize:
return super(Static, self).input_sensitivity(summarize=summarize)
else:
return np.ones(self.input_dim) * self.variance
class White(Static):
def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
super(White, self).__init__(input_dim, variance, active_dims, name)
def K(self, X, X2=None):
if X2 is None:
return np.eye(X.shape[0])*self.variance
else:
return np.zeros((X.shape[0], X2.shape[0]))
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.trace(dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum()
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()
class Bias(Static):
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
super(Bias, self).__init__(input_dim, variance, active_dims, name)
def K(self, X, X2=None):
shape = (X.shape[0], X.shape[0] if X2 is None else X2.shape[0])
ret = np.empty(shape, dtype=np.float64)
ret[:] = self.variance
return ret
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = dL_dK.sum()
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior):
ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()*variational_posterior.shape[0]
class Fixed(Static):
def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='fixed'):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
super(Fixed, self).__init__(input_dim, variance, active_dims, name)
self.fixed_K = covariance_matrix
def K(self, X, X2):
return self.variance * self.fixed_K
def Kdiag(self, X):
return self.variance * self.fixed_K.diag()
def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.einsum('ij,ij', dL_dK, self.fixed_K)
def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
def psi2(self, Z, variational_posterior):
return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum()

484
GPy/kern/_src/stationary.py Normal file
View file

@ -0,0 +1,484 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.linalg import tdot
from ... import util
import numpy as np
from scipy import integrate, weave
from ...util.config import config # for assesing whether to use weave
from ...util.caching import Cache_this
class Stationary(Kern):
"""
Stationary kernels (covariance functions).
Stationary covariance fucntion depend only on r, where r is defined as
r = \sqrt{ \sum_{q=1}^Q (x_q - x'_q)^2 }
The covariance function k(x, x' can then be written k(r).
In this implementation, r is scaled by the lengthscales parameter(s):
r = \sqrt{ \sum_{q=1}^Q \frac{(x_q - x'_q)^2}{\ell_q^2} }.
By default, there's only one lengthscale: seaprate lengthscales for each
dimension can be enables by setting ARD=True.
To implement a stationary covariance function using this class, one need
only define the covariance function k(r), and it derivative.
...
def K_of_r(self, r):
return foo
def dK_dr(self, r):
return bar
The lengthscale(s) and variance parameters are added to the structure automatically.
"""
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):
super(Stationary, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
self.ARD = ARD
if not ARD:
if lengthscale is None:
lengthscale = np.ones(1)
else:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size == 1, "Only 1 lengthscale needed for non-ARD kernel"
else:
if lengthscale is not None:
lengthscale = np.asarray(lengthscale)
assert lengthscale.size in [1, input_dim], "Bad number of lengthscales"
if lengthscale.size != input_dim:
lengthscale = np.ones(input_dim)*lengthscale
else:
lengthscale = np.ones(self.input_dim)
self.lengthscale = Param('lengthscale', lengthscale, Logexp())
self.variance = Param('variance', variance, Logexp())
assert self.variance.size==1
self.link_parameters(self.variance, self.lengthscale)
def K_of_r(self, r):
raise NotImplementedError, "implement the covariance function as a fn of r to use this class"
def dK_dr(self, r):
raise NotImplementedError, "implement derivative of the covariance function wrt r to use this class"
@Cache_this(limit=5, ignore_args=())
def K(self, X, X2=None):
"""
Kernel function applied on inputs X and X2.
In the stationary case there is an inner function depending on the
distances from X to X2, called r.
K(X, X2) = K_of_r((X-X2)**2)
"""
r = self._scaled_dist(X, X2)
return self.K_of_r(r)
@Cache_this(limit=3, ignore_args=())
def dK_dr_via_X(self, X, X2):
#a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2))
def _unscaled_dist(self, X, X2=None):
"""
Compute the Euclidean distance between each row of X and X2, or between
each pair of rows of X if X2 is None.
"""
#X, = self._slice_X(X)
if X2 is None:
Xsq = np.sum(np.square(X),1)
r2 = -2.*tdot(X) + (Xsq[:,None] + Xsq[None,:])
util.diag.view(r2)[:,]= 0. # force diagnoal to be zero: sometime numerically a little negative
r2 = np.clip(r2, 0, np.inf)
return np.sqrt(r2)
else:
#X2, = self._slice_X(X2)
X1sq = np.sum(np.square(X),1)
X2sq = np.sum(np.square(X2),1)
r2 = -2.*np.dot(X, X2.T) + X1sq[:,None] + X2sq[None,:]
r2 = np.clip(r2, 0, np.inf)
return np.sqrt(r2)
@Cache_this(limit=5, ignore_args=())
def _scaled_dist(self, X, X2=None):
"""
Efficiently compute the scaled distance, r.
r = \sqrt( \sum_{q=1}^Q (x_q - x'q)^2/l_q^2 )
Note that if thre is only one lengthscale, l comes outside the sum. In
this case we compute the unscaled distance first (in a separate
function for caching) and divide by lengthscale afterwards
"""
if self.ARD:
if X2 is not None:
X2 = X2 / self.lengthscale
return self._unscaled_dist(X/self.lengthscale, X2)
else:
return self._unscaled_dist(X, X2)/self.lengthscale
def Kdiag(self, X):
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
def update_gradients_diag(self, dL_dKdiag, X):
"""
Given the derivative of the objective with respect to the diagonal of
the covariance matrix, compute the derivative wrt the parameters of
this kernel and stor in the <parameter>.gradient field.
See also update_gradients_full
"""
self.variance.gradient = np.sum(dL_dKdiag)
self.lengthscale.gradient = 0.
def update_gradients_full(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt the covariance matrix
(dL_dK), compute the gradient wrt the parameters of this kernel,
and store in the parameters object as e.g. self.variance.gradient
"""
self.variance.gradient = np.einsum('ij,ij,i', self.K(X, X2), dL_dK, 1./self.variance)
#now the lengthscale gradient(s)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
if self.ARD:
#rinv = self._inv_dis# this is rather high memory? Should we loop instead?t(X, X2)
#d = X[:, None, :] - X2[None, :, :]
#x_xl3 = np.square(d)
#self.lengthscale.gradient = -((dL_dr*rinv)[:,:,None]*x_xl3).sum(0).sum(0)/self.lengthscale**3
tmp = dL_dr*self._inv_dist(X, X2)
if X2 is None: X2 = X
if config.getboolean('weave', 'working'):
try:
self.lengthscale.gradient = self.weave_lengthscale_grads(tmp, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
self.lengthscale.gradient = np.array([np.einsum('ij,ij,...', tmp, np.square(X[:,q:q+1] - X2[:,q:q+1].T), -1./self.lengthscale[q]**3) for q in xrange(self.input_dim)])
else:
r = self._scaled_dist(X, X2)
self.lengthscale.gradient = -np.sum(dL_dr*r)/self.lengthscale
def _inv_dist(self, X, X2=None):
"""
Compute the elementwise inverse of the distance matrix, expecpt on the
diagonal, where we return zero (the distance on the diagonal is zero).
This term appears in derviatives.
"""
dist = self._scaled_dist(X, X2).copy()
return 1./np.where(dist != 0., dist, np.inf)
def weave_lengthscale_grads(self, tmp, X, X2):
"""Use scipy.weave to compute derivatives wrt the lengthscales"""
N,M = tmp.shape
Q = X.shape[1]
if hasattr(X, 'values'):X = X.values
if hasattr(X2, 'values'):X2 = X2.values
grads = np.zeros(self.input_dim)
code = """
double gradq;
for(int q=0; q<Q; q++){
gradq = 0;
for(int n=0; n<N; n++){
for(int m=0; m<M; m++){
gradq += tmp(n,m)*(X(n,q)-X2(m,q))*(X(n,q)-X2(m,q));
}
}
grads(q) = gradq;
}
"""
weave.inline(code, ['tmp', 'X', 'X2', 'grads', 'N', 'M', 'Q'], type_converters=weave.converters.blitz, support_code="#include <math.h>")
return -grads/self.lengthscale**3
def gradients_X(self, dL_dK, X, X2=None):
"""
Given the derivative of the objective wrt K (dL_dK), compute the derivative wrt X
"""
if config.getboolean('weave', 'working'):
try:
return self.gradients_X_weave(dL_dK, X, X2)
except:
print "\n Weave compilation failed. Falling back to (slower) numpy implementation\n"
config.set('weave', 'working', 'False')
return self.gradients_X_(dL_dK, X, X2)
else:
return self.gradients_X_(dL_dK, X, X2)
def gradients_X_(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
if X2 is None:
tmp = tmp + tmp.T
X2 = X
#The high-memory numpy way:
#d = X[:, None, :] - X2[None, :, :]
#ret = np.sum(tmp[:,:,None]*d,1)/self.lengthscale**2
#the lower memory way with a loop
ret = np.empty(X.shape, dtype=np.float64)
for q in xrange(self.input_dim):
np.sum(tmp*(X[:,q][:,None]-X2[:,q][None,:]), axis=1, out=ret[:,q])
ret /= self.lengthscale**2
return ret
def gradients_X_weave(self, dL_dK, X, X2=None):
invdist = self._inv_dist(X, X2)
dL_dr = self.dK_dr_via_X(X, X2) * dL_dK
tmp = invdist*dL_dr
if X2 is None:
tmp = tmp + tmp.T
X2 = X
code = """
int n,m,d;
double retnd;
#pragma omp parallel for private(n,d, retnd, m)
for(d=0;d<D;d++){
for(n=0;n<N;n++){
retnd = 0.0;
for(m=0;m<M;m++){
retnd += tmp(n,m)*(X(n,d)-X2(m,d));
}
ret(n,d) = retnd;
}
}
"""
if hasattr(X, 'values'):X = X.values #remove the GPy wrapping to make passing into weave safe
if hasattr(X2, 'values'):X2 = X2.values
ret = np.zeros(X.shape)
N,D = X.shape
N,M = tmp.shape
from scipy import weave
support_code = """
#include <omp.h>
#include <stdio.h>
"""
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
weave.inline(code, ['ret', 'N', 'D', 'M', 'tmp', 'X', 'X2'], type_converters=weave.converters.blitz, support_code=support_code, **weave_options)
return ret/self.lengthscale**2
def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape)
def input_sensitivity(self, summarize=True):
return np.ones(self.input_dim)/self.lengthscale**2
class Exponential(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Exponential'):
super(Exponential, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r)
def dK_dr(self, r):
return -0.5*self.K_of_r(r)
class OU(Stationary):
"""
OU kernel:
.. math::
k(r) = \\sigma^2 \exp(- r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='OU'):
super(OU, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-r)
def dK_dr(self,r):
return -1.*self.variance*np.exp(-r)
class Matern32(Stationary):
"""
Matern 3/2 kernel:
.. math::
k(r) = \\sigma^2 (1 + \\sqrt{3} r) \exp(- \sqrt{3} r) \\ \\ \\ \\ \\text{ where } r = \sqrt{\sum_{i=1}^input_dim \\frac{(x_i-y_i)^2}{\ell_i^2} }
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat32'):
super(Matern32, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * (1. + np.sqrt(3.) * r) * np.exp(-np.sqrt(3.) * r)
def dK_dr(self,r):
return -3.*self.variance*r*np.exp(-np.sqrt(3.)*r)
def Gram_matrix(self, F, F1, F2, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the
RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x, i):
return(3. / self.lengthscale ** 2 * F[i](x) + 2 * np.sqrt(3) / self.lengthscale * F1[i](x) + F2[i](x))
n = F.shape[0]
G = np.zeros((n, n))
for i in range(n):
for j in range(i, n):
G[i, j] = G[j, i] = integrate.quad(lambda x : L(x, i) * L(x, j), lower, upper)[0]
Flower = np.array([f(lower) for f in F])[:, None]
F1lower = np.array([f(lower) for f in F1])[:, None]
return(self.lengthscale ** 3 / (12.*np.sqrt(3) * self.variance) * G + 1. / self.variance * np.dot(Flower, Flower.T) + self.lengthscale ** 2 / (3.*self.variance) * np.dot(F1lower, F1lower.T))
class Matern52(Stationary):
"""
Matern 5/2 kernel:
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Mat52'):
super(Matern52, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance*(1+np.sqrt(5.)*r+5./3*r**2)*np.exp(-np.sqrt(5.)*r)
def dK_dr(self, r):
return self.variance*(10./3*r -5.*r -5.*np.sqrt(5.)/3*r**2)*np.exp(-np.sqrt(5.)*r)
def Gram_matrix(self, F, F1, F2, F3, lower, upper):
"""
Return the Gram matrix of the vector of functions F with respect to the RKHS norm. The use of this function is limited to input_dim=1.
:param F: vector of functions
:type F: np.array
:param F1: vector of derivatives of F
:type F1: np.array
:param F2: vector of second derivatives of F
:type F2: np.array
:param F3: vector of third derivatives of F
:type F3: np.array
:param lower,upper: boundaries of the input domain
:type lower,upper: floats
"""
assert self.input_dim == 1
def L(x,i):
return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0]
G = np.zeros((n,n))
for i in range(n):
for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2))
class ExpQuad(Stationary):
"""
The Exponentiated quadratic covariance function.
.. math::
k(r) = \sigma^2 (1 + \sqrt{5} r + \\frac53 r^2) \exp(- \sqrt{5} r)
notes::
- Yes, this is exactly the same as the RBF covariance function, but the
RBF implementation also has some features for doing variational kernels
(the psi-statistics).
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='ExpQuad'):
super(ExpQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, r):
return -r*self.K_of_r(r)
class Cosine(Stationary):
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='Cosine'):
super(Cosine, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.cos(r)
def dK_dr(self, r):
return -self.variance * np.sin(r)
class RatQuad(Stationary):
"""
Rational Quadratic Kernel
.. math::
k(r) = \sigma^2 \\bigg( 1 + \\frac{r^2}{2} \\bigg)^{- \\alpha}
"""
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='RatQuad'):
super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
self.power = Param('power', power, Logexp())
self.link_parameters(self.power)
def K_of_r(self, r):
r2 = np.power(r, 2.)
return self.variance*np.power(1. + r2/2., -self.power)
def dK_dr(self, r):
r2 = np.power(r, 2.)
return -self.variance*self.power*r*np.power(1. + r2/2., - self.power - 1.)
def update_gradients_full(self, dL_dK, X, X2=None):
super(RatQuad, self).update_gradients_full(dL_dK, X, X2)
r = self._scaled_dist(X, X2)
r2 = np.power(r, 2.)
dK_dpow = -self.variance * np.power(2., self.power) * np.power(r2 + 2., -self.power) * np.log(0.5*(r2+2.))
grad = np.sum(dL_dK*dK_dpow)
self.power.gradient = grad
def update_gradients_diag(self, dL_dKdiag, X):
super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
self.power.gradient = 0.

75
GPy/kern/_src/symbolic.py Normal file
View file

@ -0,0 +1,75 @@
# Check Matthew Rocklin's blog post.
import sympy as sym
import numpy as np
from kern import Kern
from ...core.symbolic import Symbolic_core
class Symbolic(Kern, Symbolic_core):
"""
"""
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
Kern.__init__(self, input_dim, active_dims, name=name)
kdiag = k
self.cacheable = ['X', 'Z']
Symbolic_core.__init__(self, {'k':k,'kdiag':kdiag}, cacheable=self.cacheable, derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)
self.output_dim = output_dim
def __add__(self,other):
return spkern(self._sym_k+other._sym_k)
def _set_expressions(self, expressions):
"""This method is overwritten because we need to modify kdiag by substituting z for x. We do this by calling the parent expression method to extract variables from expressions, then subsitute the z variables that are present with x."""
Symbolic_core._set_expressions(self, expressions)
Symbolic_core._set_variables(self, self.cacheable)
# Substitute z with x to obtain kdiag.
for x, z in zip(self.variables['X'], self.variables['Z']):
expressions['kdiag'] = expressions['kdiag'].subs(z, x)
Symbolic_core._set_expressions(self, expressions)
def K(self,X,X2=None):
if X2 is None:
return self.eval_function('k', X=X, Z=X)
else:
return self.eval_function('k', X=X, Z=X2)
def Kdiag(self,X):
d = self.eval_function('kdiag', X=X)
if not d.shape[0] == X.shape[0]:
d = np.tile(d, (X.shape[0], 1))
return d
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
g = self.eval_gradients_X('k', dL_dK, X=X, Z=X2)
if X2 is None:
g *= 2
return g
def gradients_X_diag(self, dL_dK, X):
return self.eval_gradients_X('kdiag', dL_dK, X=X)
def update_gradients_full(self, dL_dK, X, X2=None):
# Need to extract parameters to local variables first
if X2 is None:
# need to double this inside ...
gradients = self.eval_update_gradients('k', dL_dK, X=X)
else:
gradients = self.eval_update_gradients('k', dL_dK, X=X, Z=X2)
for name, val in gradients:
setattr(getattr(self, name), 'gradient', val)
def update_gradients_diag(self, dL_dKdiag, X):
gradients = self.eval_update_gradients('kdiag', dL_dKdiag, X)
for name, val in gradients:
setattr(getattr(self, name), 'gradient', val)

View file

@ -0,0 +1,61 @@
#include <math.h>
#include <float.h>
#include <stdlib.h>
double DiracDelta(double x){
// TODO: this doesn't seem to be a dirac delta ... should return infinity. Neil
if((x<0.000001) & (x>-0.000001))//go on, laugh at my c++ skills
return 1.0;
else
return 0.0;
};
double DiracDelta(double x,int foo){
return 0.0;
};
double sinc(double x){
if (x==0)
return 1.0;
else
return sin(x)/x;
}
double sinc_grad(double x){
if (x==0)
return 0.0;
else
return (x*cos(x) - sin(x))/(x*x);
}
double erfcx(double x){
double xneg=-sqrt(log(DBL_MAX/2));
double xmax = 1/(sqrt(M_PI)*DBL_MIN);
xmax = DBL_MAX<xmax ? DBL_MAX : xmax;
// Find values where erfcx can be evaluated
double t = 3.97886080735226 / (abs(x) + 3.97886080735226);
double u = t-0.5;
double y = (((((((((u * 0.00127109764952614092 + 1.19314022838340944e-4) * u
- 0.003963850973605135) * u - 8.70779635317295828e-4) * u
+ 0.00773672528313526668) * u + 0.00383335126264887303) * u
- 0.0127223813782122755) * u - 0.0133823644533460069) * u
+ 0.0161315329733252248) * u + 0.0390976845588484035) * u + 0.00249367200053503304;
if (x<xneg)
return -INFINITY;
else if (x<0)
return 2*exp(x*x)-y;
else if (x>xmax)
return 0.0;
else
return y;
}
double ln_diff_erf(double x0, double x1){
if (x0==x1)
return INFINITY;
else if(x0<0 && x1>0 || x0>0 && x1<0)
return log(erf(x0)-erf(x1));
else if(x1>0)
return log(erfcx(x1)-erfcx(x0)*exp(x1*x1)- x0*x0)-x1*x1;
else
return log(erfcx(-x0)-erfcx(-x1)*exp(x0*x0 - x1*x1))-x0*x0;
}

View file

@ -0,0 +1,9 @@
#include <math.h>
double DiracDelta(double x);
double DiracDelta(double x, int foo);
double sinc(double x);
double sinc_grad(double x);
double erfcx(double x);
double ln_diff_erf(double x0, double x1);

View file

@ -90,7 +90,7 @@ class ODE_1(Kernpart):
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target) np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def dK_dtheta(self, dL_dK, X, X2, target): def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.abs(X - X2.T) dist = np.abs(X - X2.T)
@ -137,11 +137,7 @@ class ODE_1(Kernpart):
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2 k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 ) k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
dkdvar = k1+k2+k3 dkdvar = k1+k2+k3
#target[0] dk dvarU
#target[1] dk dvarY
#target[2] dk d theta1
#target[3] dk d theta2
target[0] += np.sum(self.varianceY*dkdvar * dL_dK) target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
target[1] += np.sum(self.varianceU*dkdvar * dL_dK) target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK) target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)

View file

@ -124,7 +124,7 @@ class Eq_ode1(Kernpart):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] #target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass pass
def dK_dtheta(self,dL_dK,X,X2,target): def _param_grad_helper(self,dL_dK,X,X2,target):
# First extract times and indices. # First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK) self._extract_t_indices(X, X2, dL_dK=dL_dK)
@ -193,7 +193,7 @@ class Eq_ode1(Kernpart):
def dKdiag_dtheta(self,dL_dKdiag,index,target): def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass pass
def dK_dX(self,dL_dK,X,X2,target): def gradients_X(self,dL_dK,X,X2,target):
pass pass
def _extract_t_indices(self, X, X2=None, dL_dK=None): def _extract_t_indices(self, X, X2=None, dL_dK=None):

View file

@ -50,7 +50,7 @@ class FiniteDimensional(Kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
product = np.diag(self.K(X, X)) product = np.diag(self.K(X, X))
np.add(target,product,target) np.add(target,product,target)
def dK_dtheta(self,X,X2,target): def _param_grad_helper(self,X,X2,target):
"""Return shape is NxMx(Ntheta)""" """Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F]) FX = np.column_stack([f(X) for f in self.F])

Some files were not shown because too many files have changed in this diff Show more