Merge branch 'devel' of github.com:SheffieldML/GPy into devel

This commit is contained in:
Max Zwiessele 2015-09-18 16:21:59 +01:00
commit 111895f03a
25 changed files with 413 additions and 587 deletions

View file

@ -1,27 +1,41 @@
language: python sudo: false
python:
- "2.7" os:
- linux
# - osx
language: python
#addons:
# apt:
# packages:
# - gfortran
# - libatlas-dev
# - libatlas-base-dev
# - liblapack-dev
python:
- 2.7
- 3.3
- 3.4
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
before_install: before_install:
#Install a mini version of anaconda such that we can easily install our dependencies
- wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh - wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh
- chmod +x miniconda.sh - chmod +x miniconda.sh
- ./miniconda.sh -b - ./miniconda.sh -b
- export PATH=/home/travis/miniconda/bin:$PATH - export PATH=/home/travis/miniconda/bin:$PATH
- conda update --yes conda # - conda update --yes conda
# Workaround for a permissions issue with Travis virtual machine images
# that breaks Python's multiprocessing:
# https://github.com/travis-ci/travis-cookbooks/issues/155
- sudo rm -rf /dev/shm
- sudo ln -s /run/shm /dev/shm
install: install:
- conda install --yes python=$TRAVIS_PYTHON_VERSION atlas numpy=1.9 scipy=0.16 matplotlib nose sphinx pip nose - conda install --yes python=$TRAVIS_PYTHON_VERSION numpy=1.9 scipy=0.16 nose pip six
#- pip install . - pip install .
- python setup.py build_ext --inplace
#--use-mirrors
#
# command to run tests, e.g. python setup.py test
script: script:
- nosetests GPy/testing - cd $HOME
- mkdir empty
- cd empty
- nosetests GPy.testing
cache:
directories:
- $HOME/.cache/pip

View file

@ -98,7 +98,7 @@ class GP(Model):
inference_method = exact_gaussian_inference.ExactGaussianInference() inference_method = exact_gaussian_inference.ExactGaussianInference()
else: else:
inference_method = expectation_propagation.EP() inference_method = expectation_propagation.EP()
print("defaulting to ", inference_method, "for latent function inference") print("defaulting to " + str(inference_method) + " for latent function inference")
self.inference_method = inference_method self.inference_method = inference_method
logger.info("adding kernel and likelihood as parameters") logger.info("adding kernel and likelihood as parameters")

View file

@ -255,7 +255,7 @@ class Model(Parameterized):
opt.model = self opt.model = self
else: else:
optimizer = optimization.get_optimizer(optimizer) optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model=self, max_iters=max_iters, **kwargs) opt = optimizer(x_init=start, model=self, max_iters=max_iters, **kwargs)
with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo: with VerboseOptimization(self, opt, maxiters=max_iters, verbose=messages, ipython_notebook=ipython_notebook, clear_after_finish=clear_after_finish) as vo:
opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads) opt.run(f_fp=self._objective_grads, f=self._objective, fp=self._grads)

View file

@ -40,7 +40,7 @@ class SparseGP(GP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, inference_method=None, def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, X_variance=None, inference_method=None,
name='sparse gp', Y_metadata=None, normalizer=False): name='sparse gp', Y_metadata=None, normalizer=False):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
@ -73,11 +73,12 @@ class SparseGP(GP):
self.Z = Param('inducing inputs',Z) self.Z = Param('inducing inputs',Z)
self.link_parameter(self.Z, index=0) self.link_parameter(self.Z, index=0)
if trigger_update: self.update_model(True) if trigger_update: self.update_model(True)
if trigger_update: self._trigger_params_changed()
def parameters_changed(self): def parameters_changed(self):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata)
self._update_gradients()
def _update_gradients(self):
self.likelihood.update_gradients(self.grad_dict['dL_dthetaL']) self.likelihood.update_gradients(self.grad_dict['dL_dthetaL'])
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):

View file

@ -15,7 +15,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
""" """
try:import pods try:import pods
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets') except ImportError:raise ImportWarning('Need pods for example datasets. See https://github.com/sods/ods, or pip install pods.')
data = pods.datasets.oil() data = pods.datasets.oil()
X = data['X'] X = data['X']
Xtest = data['Xtest'] Xtest = data['Xtest']
@ -26,6 +26,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
# Create GP model # Create GP model
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing) m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, num_inducing=num_inducing)
m.Ytest = Ytest
# Contrain all parameters to be positive # Contrain all parameters to be positive
#m.tie_params('.*len') #m.tie_params('.*len')
@ -33,8 +34,7 @@ def oil(num_inducing=50, max_iters=100, kernel=None, optimize=True, plot=True):
# Optimize # Optimize
if optimize: if optimize:
for _ in range(5): m.optimize(messages=1)
m.optimize(max_iters=int(max_iters/5))
print(m) print(m)
#Test #Test
@ -50,9 +50,8 @@ def toy_linear_1d_classification(seed=default_seed, optimize=True, plot=True):
:type seed: int :type seed: int
""" """
try:import pods try:import pods
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets') except ImportError:raise ImportWarning('Need pods for example datasets. See https://github.com/sods/ods, or pip install pods.')
data = pods.datasets.toy_linear_1d_classification(seed=seed) data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1] Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0 Y[Y.flatten() == -1] = 0
@ -150,6 +149,42 @@ def sparse_toy_linear_1d_classification(num_inducing=10, seed=default_seed, opti
print(m) print(m)
return m return m
def sparse_toy_linear_1d_classification_uncertain_input(num_inducing=10, seed=default_seed, optimize=True, plot=True):
"""
Sparse 1D classification example
:param seed: seed value for data generation (default is 4).
:type seed: int
"""
try:import pods
except ImportError:print('pods unavailable, see https://github.com/sods/ods for example datasets')
import numpy as np
data = pods.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y.flatten() == -1] = 0
X = data['X']
X_var = np.random.uniform(0.3,0.5,X.shape)
# Model definition
m = GPy.models.SparseGPClassificationUncertainInput(X, X_var, Y, num_inducing=num_inducing)
m['.*len'] = 4.
# Optimize
if optimize:
m.optimize()
# Plot
if plot:
from matplotlib import pyplot as plt
fig, axes = plt.subplots(2, 1)
m.plot_f(ax=axes[0])
m.plot(ax=axes[1])
print(m)
return m
def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True): def toy_heaviside(seed=default_seed, max_iters=100, optimize=True, plot=True):
""" """
Simple 1D classification example using a heavy side gp transformation Simple 1D classification example using a heavy side gp transformation
@ -218,7 +253,7 @@ def crescent_data(model_type='Full', num_inducing=10, seed=default_seed, kernel=
m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing) m = GPy.models.FITCClassification(data['X'], Y, kernel=kernel, num_inducing=num_inducing)
m['.*len'] = 3. m['.*len'] = 3.
if optimize: if optimize:
m.optimize() m.optimize(messages=1)
if plot: if plot:
m.plot() m.plot()

View file

@ -64,8 +64,7 @@ class InferenceMethodList(LatentFunctionInference, list):
from .exact_gaussian_inference import ExactGaussianInference from .exact_gaussian_inference import ExactGaussianInference
from .laplace import Laplace,LaplaceBlock from .laplace import Laplace,LaplaceBlock
from GPy.inference.latent_function_inference.var_dtc import VarDTC from GPy.inference.latent_function_inference.var_dtc import VarDTC
from .expectation_propagation import EP from .expectation_propagation import EP, EPDTC
from .expectation_propagation_dtc import EPDTC
from .dtc import DTC from .dtc import DTC
from .fitc import FITC from .fitc import FITC
from .var_dtc_parallel import VarDTC_minibatch from .var_dtc_parallel import VarDTC_minibatch

View file

@ -28,8 +28,8 @@ class DTC(LatentFunctionInference):
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#make sure the noise is not hetero #make sure the noise is not hetero
beta = 1./likelihood.gaussian_variance(Y_metadata) precision = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1: if precision.size > 1:
raise NotImplementedError("no hetero noise with this implementation of DTC") raise NotImplementedError("no hetero noise with this implementation of DTC")
Kmm = kern.K(Z) Kmm = kern.K(Z)
@ -42,7 +42,7 @@ class DTC(LatentFunctionInference):
Kmmi, L, Li, _ = pdinv(Kmm) Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A # Compute A
LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta) LiUTbeta = np.dot(Li, U.T)*np.sqrt(precision)
A = tdot(LiUTbeta) + np.eye(num_inducing) A = tdot(LiUTbeta) + np.eye(num_inducing)
# factor A # factor A
@ -50,7 +50,7 @@ class DTC(LatentFunctionInference):
# back substutue to get b, P, v # back substutue to get b, P, v
tmp, _ = dtrtrs(L, Uy, lower=1) tmp, _ = dtrtrs(L, Uy, lower=1)
b, _ = dtrtrs(LA, tmp*beta, lower=1) b, _ = dtrtrs(LA, tmp*precision, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1) tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1) v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
@ -59,8 +59,8 @@ class DTC(LatentFunctionInference):
#compute log marginal #compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \ -np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*num_data*output_dim*np.log(beta) + \ 0.5*num_data*output_dim*np.log(precision) + \
-0.5*beta*np.sum(np.square(Y)) + \ -0.5*precision*np.sum(np.square(Y)) + \
0.5*np.sum(np.square(b)) 0.5*np.sum(np.square(b))
# Compute dL_dKmm # Compute dL_dKmm
@ -70,11 +70,11 @@ class DTC(LatentFunctionInference):
# Compute dL_dU # Compute dL_dU
vY = np.dot(v.reshape(-1,1),Y.T) vY = np.dot(v.reshape(-1,1),Y.T)
dL_dU = vY - np.dot(vvT_P, U.T) dL_dU = vY - np.dot(vvT_P, U.T)
dL_dU *= beta dL_dU *= precision
#compute dL_dR #compute dL_dR
Uv = np.dot(U, v) Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*beta**2 dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./precision + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1))*precision**2
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
@ -97,8 +97,8 @@ class vDTC(object):
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#make sure the noise is not hetero #make sure the noise is not hetero
beta = 1./likelihood.gaussian_variance(Y_metadata) precision = 1./likelihood.gaussian_variance(Y_metadata)
if beta.size > 1: if precision.size > 1:
raise NotImplementedError("no hetero noise with this implementation of DTC") raise NotImplementedError("no hetero noise with this implementation of DTC")
Kmm = kern.K(Z) Kmm = kern.K(Z)
@ -111,9 +111,9 @@ class vDTC(object):
Kmmi, L, Li, _ = pdinv(Kmm) Kmmi, L, Li, _ = pdinv(Kmm)
# Compute A # Compute A
LiUTbeta = np.dot(Li, U.T)*np.sqrt(beta) LiUTbeta = np.dot(Li, U.T)*np.sqrt(precision)
A_ = tdot(LiUTbeta) A_ = tdot(LiUTbeta)
trace_term = -0.5*(np.sum(Knn)*beta - np.trace(A_)) trace_term = -0.5*(np.sum(Knn)*precision - np.trace(A_))
A = A_ + np.eye(num_inducing) A = A_ + np.eye(num_inducing)
# factor A # factor A
@ -121,7 +121,7 @@ class vDTC(object):
# back substutue to get b, P, v # back substutue to get b, P, v
tmp, _ = dtrtrs(L, Uy, lower=1) tmp, _ = dtrtrs(L, Uy, lower=1)
b, _ = dtrtrs(LA, tmp*beta, lower=1) b, _ = dtrtrs(LA, tmp*precision, lower=1)
tmp, _ = dtrtrs(LA, b, lower=1, trans=1) tmp, _ = dtrtrs(LA, b, lower=1, trans=1)
v, _ = dtrtrs(L, tmp, lower=1, trans=1) v, _ = dtrtrs(L, tmp, lower=1, trans=1)
tmp, _ = dtrtrs(LA, Li, lower=1, trans=0) tmp, _ = dtrtrs(LA, Li, lower=1, trans=0)
@ -131,8 +131,8 @@ class vDTC(object):
#compute log marginal #compute log marginal
log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \ log_marginal = -0.5*num_data*output_dim*np.log(2*np.pi) + \
-np.sum(np.log(np.diag(LA)))*output_dim + \ -np.sum(np.log(np.diag(LA)))*output_dim + \
0.5*num_data*output_dim*np.log(beta) + \ 0.5*num_data*output_dim*np.log(precision) + \
-0.5*beta*np.sum(np.square(Y)) + \ -0.5*precision*np.sum(np.square(Y)) + \
0.5*np.sum(np.square(b)) + \ 0.5*np.sum(np.square(b)) + \
trace_term trace_term
@ -145,15 +145,15 @@ class vDTC(object):
vY = np.dot(v.reshape(-1,1),Y.T) vY = np.dot(v.reshape(-1,1),Y.T)
#dL_dU = vY - np.dot(vvT_P, U.T) #dL_dU = vY - np.dot(vvT_P, U.T)
dL_dU = vY - np.dot(vvT_P - Kmmi, U.T) dL_dU = vY - np.dot(vvT_P - Kmmi, U.T)
dL_dU *= beta dL_dU *= precision
#compute dL_dR #compute dL_dR
Uv = np.dot(U, v) Uv = np.dot(U, v)
dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./beta + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*beta**2 dL_dR = 0.5*(np.sum(U*np.dot(U,P), 1) - 1./precision + np.sum(np.square(Y), 1) - 2.*np.sum(Uv*Y, 1) + np.sum(np.square(Uv), 1) )*precision**2
dL_dR -=beta*trace_term/num_data dL_dR -=precision*trace_term/num_data
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR)
grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*beta, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL} grad_dict = {'dL_dKmm': dL_dK, 'dL_dKdiag':np.zeros_like(Knn) + -0.5*precision, 'dL_dKnm':dL_dU.T, 'dL_dthetaL':dL_dthetaL}
#construct a posterior object #construct a posterior object
post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L) post = Posterior(woodbury_inv=Kmmi-P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=L)

View file

@ -22,21 +22,7 @@ class ExactGaussianInference(LatentFunctionInference):
def __init__(self): def __init__(self):
pass#self._YYTfactor_cache = caching.cache() pass#self._YYTfactor_cache = caching.cache()
def get_YYTfactor(self, Y): def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, precision=None):
"""
find a matrix L which satisfies LL^T = YY^T.
Note that L may have fewer columns than Y, else L=Y.
"""
N, D = Y.shape
if (N>D):
return Y
else:
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
return Y
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None):
""" """
Returns a Posterior class containing essential quantities of the posterior Returns a Posterior class containing essential quantities of the posterior
""" """
@ -46,13 +32,17 @@ class ExactGaussianInference(LatentFunctionInference):
else: else:
m = mean_function.f(X) m = mean_function.f(X)
if precision is None:
precision = likelihood.gaussian_variance(Y_metadata)
YYT_factor = self.get_YYTfactor(Y-m) YYT_factor = Y-m
if K is None:
K = kern.K(X) K = kern.K(X)
Ky = K.copy() Ky = K.copy()
diag.add(Ky, likelihood.gaussian_variance(Y_metadata)+1e-8) diag.add(Ky, precision+1e-8)
Wi, LW, LWi, W_logdet = pdinv(Ky) Wi, LW, LWi, W_logdet = pdinv(Ky)
alpha, _ = dpotrs(LW, YYT_factor, lower=1) alpha, _ = dpotrs(LW, YYT_factor, lower=1)

View file

@ -1,12 +1,14 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ...util.linalg import pdinv,jitchol,DSYR,tdot,dtrtrs, dpotrs from ...util.linalg import jitchol, DSYR, dtrtrs, dtrtri
from .posterior import Posterior from ...core.parameterization.observable_array import ObsAr
from . import LatentFunctionInference from . import ExactGaussianInference, VarDTC
from ...util import diag
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
class EP(LatentFunctionInference): class EPBase(object):
def __init__(self, epsilon=1e-6, eta=1., delta=1.): def __init__(self, epsilon=1e-6, eta=1., delta=1.):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
@ -19,6 +21,7 @@ class EP(LatentFunctionInference):
:param delta: damping EP updates factor. :param delta: damping EP updates factor.
:type delta: float64 :type delta: float64
""" """
super(EPBase, self).__init__()
self.epsilon, self.eta, self.delta = epsilon, eta, delta self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.reset() self.reset()
@ -33,32 +36,22 @@ class EP(LatentFunctionInference):
# TODO: update approximation in the end as well? Maybe even with a switch? # TODO: update approximation in the end as well? Maybe even with a switch?
pass pass
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, Z=None): class EP(EPBase, ExactGaussianInference):
assert mean_function is None, "inference with a mean function not implemented" def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)" assert output_dim ==1, "ep in 1D only (for now!)"
if K is None:
K = kern.K(X) K = kern.K(X)
if self._ep_approximation is None: if self._ep_approximation is None:
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation #if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata) mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
else: else:
#if we've already run EP, just use the existing approximation stored in self._ep_approximation #if we've already run EP, just use the existing approximation stored in self._ep_approximation
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
Wi, LW, LWi, W_logdet = pdinv(K + np.diag(1./tau_tilde)) return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, precision=1./tau_tilde, K=K)
alpha, _ = dpotrs(LW, mu_tilde, lower=1)
log_marginal = 0.5*(-num_data * log_2_pi - W_logdet - np.sum(alpha * mu_tilde)) # TODO: add log Z_hat??
dL_dK = 0.5 * (tdot(alpha[:,None]) - Wi)
dL_dthetaL = np.zeros(likelihood.size)#TODO: derivatives of the likelihood parameters
return Posterior(woodbury_inv=Wi, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL}
def expectation_propagation(self, K, Y, likelihood, Y_metadata): def expectation_propagation(self, K, Y, likelihood, Y_metadata):
@ -69,6 +62,7 @@ class EP(LatentFunctionInference):
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data) mu = np.zeros(num_data)
Sigma = K.copy() Sigma = K.copy()
diag.add(Sigma, 1e-7)
#Initial values - Marginal moments #Initial values - Marginal moments
Z_hat = np.empty(num_data,dtype=np.float64) Z_hat = np.empty(num_data,dtype=np.float64)
@ -79,7 +73,7 @@ class EP(LatentFunctionInference):
if self.old_mutilde is None: if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data)) tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
else: else:
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!" assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde tau_tilde = v_tilde/mu_tilde
@ -124,3 +118,120 @@ class EP(LatentFunctionInference):
mu_tilde = v_tilde/tau_tilde mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat return mu, Sigma, mu_tilde, tau_tilde, Z_hat
class EPDTC(EPBase, VarDTC):
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
assert Y.shape[1]==1, "ep in 1D only (for now!)"
Kmm = kern.K(Z)
if psi1 is None:
try:
Kmn = kern.K(Z, X)
except TypeError:
Kmn = kern.psi1(Z, X).T
else:
Kmn = psi1.T
if self._ep_approximation is None:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
return super(EPDTC, self).inference(kern, X, Z, likelihood, mu_tilde,
mean_function=mean_function,
Y_metadata=Y_metadata,
precision=tau_tilde,
Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2)
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
num_data, output_dim = Y.shape
assert output_dim == 1, "This EP methods only works for 1D outputs"
LLT0 = Kmm.copy()
#diag.add(LLT0, 1e-8)
Lm = jitchol(LLT0)
Lmi = dtrtri(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data)
LLT = Kmm.copy() #Sigma = K.copy()
Sigma_diag = Qnn_diag.copy() + 1e-8
#Initial values - Marginal moments
Z_hat = np.zeros(num_data,dtype=np.float64)
mu_hat = np.zeros(num_data,dtype=np.float64)
sigma2_hat = np.zeros(num_data,dtype=np.float64)
#initial values - Gaussian factors
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
else:
assert self.old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
iterations = 0
tau_tilde_old = 0.
v_tilde_old = 0.
update_order = np.random.permutation(num_data)
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
for i in update_order:
#Cavity distribution parameters
tau_cav = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde[i] += delta_tau
v_tilde[i] += delta_v
#Posterior distribution parameters update
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT+np.eye(LLT.shape[0])*1e-7)
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (delta_v-delta_tau*mu[i])*si
#mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
#diag.add(LLT, 1e-8)
L = jitchol(LLT)
V, _ = dtrtrs(L,Kmn,lower=1)
V2, _ = dtrtrs(L.T,V,lower=0)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V2.T,V2)
mu = np.dot(Sigma,v_tilde)
#monitor convergence
#if iterations>0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
# Only to while loop once:?
tau_diff = 0
v_diff = 0
iterations += 1
mu_tilde = v_tilde/tau_tilde
return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_hat

View file

@ -1,352 +0,0 @@
# Copyright (c) 2012-2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from ...util import diag
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify, DSYR
from ...core.parameterization.variational import VariationalPosterior
from . import LatentFunctionInference
from .posterior import Posterior
log_2_pi = np.log(2*np.pi)
class EPDTC(LatentFunctionInference):
const_jitter = 1e-6
def __init__(self, epsilon=1e-6, eta=1., delta=1., limit=1):
from ...util.caching import Cacher
self.limit = limit
self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.epsilon, self.eta, self.delta = epsilon, eta, delta
self.reset()
def set_limit(self, limit):
self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit
def on_optimization_start(self):
self._ep_approximation = None
def on_optimization_end(self):
# TODO: update approximation in the end as well? Maybe even with a switch?
pass
def _get_trYYT(self, Y):
return np.sum(np.square(Y))
def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled.
return self.limit
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.limit = state
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
def _get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LLT = YYT.
Note that L may have fewer columns than Y.
"""
N, D = Y.shape
if (N>=D):
return Y
else:
return jitchol(tdot(Y))
def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective
def reset(self):
self.old_mutilde, self.old_vtilde = None, None
self._ep_approximation = None
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None):
assert mean_function is None, "inference with a mean function not implemented"
num_data, output_dim = Y.shape
assert output_dim ==1, "ep in 1D only (for now!)"
Kmm = kern.K(Z)
Kmn = kern.K(Z,X)
if self._ep_approximation is None:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
else:
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
psi0 = kern.psi0(Z, X)
psi1 = Kmn.T#kern.psi1(Z, X)
psi2 = kern.psi2(Z, X)
else:
uncertain_inputs = False
psi0 = kern.Kdiag(X)
psi1 = Kmn.T#kern.K(X, Z)
psi2 = None
#see whether we're using variational uncertain inputs
_, output_dim = Y.shape
#see whether we've got a different noise variance for each datum
#beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
beta = tau_tilde
VVT_factor = beta[:,None]*mu_tilde[:,None]
trYYT = self.get_trYYT(mu_tilde[:,None])
# do the inference:
het_noise = beta.size > 1
num_inducing = Z.shape[0]
num_data = Y.shape[0]
# kernel computations, using BGPLVM notation
Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm)
# The rather complex computations of A
if uncertain_inputs:
if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0)
else:
psi2_beta = psi2.sum(0) * beta
LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T))
else:
if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else:
tmp = psi1 * (np.sqrt(beta))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum()
# factor B
B = np.eye(num_inducing) + A
LB = jitchol(B)
psi1Vf = np.dot(psi1.T, VVT_factor)
# back substutue C into psi1Vf
tmp, _ = dtrtrs(Lm, psi1Vf, lower=1, trans=0)
_LBi_Lmi_psi1Vf, _ = dtrtrs(LB, tmp, lower=1, trans=0)
tmp, _ = dtrtrs(LB, _LBi_Lmi_psi1Vf, lower=1, trans=1)
Cpsi1Vf, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
# data fit and derivative of L w.r.t. Kmm
delit = tdot(_LBi_Lmi_psi1Vf)
data_fit = np.trace(delit)
DBi_plus_BiPBi = backsub_both_sides(LB, output_dim * np.eye(num_inducing) + delit)
delit = -0.5 * DBi_plus_BiPBi
delit += -0.5 * B * output_dim
delit += output_dim * np.eye(num_inducing)
# Compute dL_dKmm
dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs)
# log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places
dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta,
data_fit, num_data, output_dim, trYYT, mu_tilde[:,None])
dL_dthetaL = 0#likelihood.exact_inference_gradients(dL_dR,Y_metadata)
if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'dL_dthetaL':dL_dthetaL}
else:
grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL}
#get sufficient things for posterior prediction
#TODO: do we really want to do this in the loop?
if VVT_factor.shape[1] == Y.shape[1]:
woodbury_vector = Cpsi1Vf # == Cpsi1V
else:
print('foobar')
psi1V = np.dot(mu_tilde[:,None].T*beta, psi1).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)
Bi, _ = dpotri(LB, lower=1)
symmetrify(Bi)
Bi = -dpotri(LB, lower=1)[0]
diag.add(Bi, 1)
woodbury_inv = backsub_both_sides(Lm, Bi)
#construct a posterior object
post = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector, K=Kmm, mean=None, cov=None, K_chol=Lm)
return post, log_marginal, grad_dict
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
num_data, data_dim = Y.shape
assert data_dim == 1, "This EP methods only works for 1D outputs"
KmnKnm = np.dot(Kmn,Kmn.T)
Lm = jitchol(Kmm)
Lmi = dtrtrs(Lm,np.eye(Lm.shape[0]))[0] #chol_inv(Lm)
Kmmi = np.dot(Lmi.T,Lmi)
KmmiKmn = np.dot(Kmmi,Kmn)
Qnn_diag = np.sum(Kmn*KmmiKmn,-2)
LLT0 = Kmm.copy()
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(num_data)
LLT = Kmm.copy() #Sigma = K.copy()
Sigma_diag = Qnn_diag.copy()
#Initial values - Marginal moments
Z_hat = np.empty(num_data,dtype=np.float64)
mu_hat = np.empty(num_data,dtype=np.float64)
sigma2_hat = np.empty(num_data,dtype=np.float64)
#initial values - Gaussian factors
if self.old_mutilde is None:
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
else:
assert old_mutilde.size == num_data, "data size mis-match: did you change the data? try resetting!"
mu_tilde, v_tilde = self.old_mutilde, self.old_vtilde
tau_tilde = v_tilde/mu_tilde
#Approximation
tau_diff = self.epsilon + 1.
v_diff = self.epsilon + 1.
iterations = 0
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
update_order = np.random.permutation(num_data)
for i in update_order:
#Cavity distribution parameters
tau_cav = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
v_cav = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
#Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
#Site parameters update
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde[i] += delta_tau
v_tilde[i] += delta_v
#Posterior distribution parameters update
#DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
DSYR(LLT,Kmn[:,i].copy(),delta_tau)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1)
mu += (delta_v-delta_tau*mu[i])*si
#mu = np.dot(Sigma, v_tilde)
#(re) compute Sigma and mu using full Cholesky decompy
LLT = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T)
L = jitchol(LLT)
V,info = dtrtrs(L,Kmn,lower=1)
V2,info = dtrtrs(L.T,V,lower=0)
#Sigma_diag = np.sum(V*V,-2)
#Knmv_tilde = np.dot(Kmn,v_tilde)
#mu = np.dot(V2.T,Knmv_tilde)
Sigma = np.dot(V2.T,V2)
mu = np.dot(Sigma,v_tilde)
#monitor convergence
if iterations>0:
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
tau_tilde_old = tau_tilde.copy()
v_tilde_old = v_tilde.copy()
tau_diff = 0
v_diff = 0
iterations += 1
mu_tilde = v_tilde/tau_tilde
return mu, Sigma, mu_tilde, tau_tilde, Z_hat
def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, psi1, het_noise, uncertain_inputs):
dL_dpsi0 = -0.5 * output_dim * (beta[:,None] * np.ones([num_data, 1])).flatten()
dL_dpsi1 = np.dot(VVT_factor, Cpsi1Vf.T)
dL_dpsi2_beta = 0.5 * backsub_both_sides(Lm, output_dim * np.eye(num_inducing) - DBi_plus_BiPBi)
if het_noise:
if uncertain_inputs:
dL_dpsi2 = beta[:, None, None] * dL_dpsi2_beta[None, :, :]
else:
dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta, (psi1 * beta.reshape(num_data, 1)).T).T
dL_dpsi2 = None
else:
dL_dpsi2 = beta * dL_dpsi2_beta
if uncertain_inputs:
# repeat for each of the N psi_2 matrices
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
else:
# subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None
return dL_dpsi0, dL_dpsi1, dL_dpsi2
def _compute_dL_dR(likelihood, het_noise, uncertain_inputs, LB, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, psi0, psi1, beta, data_fit, num_data, output_dim, trYYT, Y):
# the partial derivative vector for the likelihood
if likelihood.size == 0:
# save computation here.
dL_dR = None
elif het_noise:
if uncertain_inputs:
raise NotImplementedError("heteroscedatic derivates with uncertain inputs not implemented")
else:
#from ...util.linalg import chol_inv
#LBi = chol_inv(LB)
LBi, _ = dtrtrs(LB,np.eye(LB.shape[0]))
Lmi_psi1, nil = dtrtrs(Lm, psi1.T, lower=1, trans=0)
_LBi_Lmi_psi1, _ = dtrtrs(LB, Lmi_psi1, lower=1, trans=0)
dL_dR = -0.5 * beta + 0.5 * (beta*Y)**2
dL_dR += 0.5 * output_dim * (psi0 - np.sum(Lmi_psi1**2,0))[:,None] * beta**2
dL_dR += 0.5*np.sum(mdot(LBi.T,LBi,Lmi_psi1)*Lmi_psi1,0)[:,None]*beta**2
dL_dR += -np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T * Y * beta**2
dL_dR += 0.5*np.dot(_LBi_Lmi_psi1Vf.T,_LBi_Lmi_psi1).T**2 * beta**2
else:
# likelihood is not heteroscedatic
dL_dR = -0.5 * num_data * output_dim * beta + 0.5 * trYYT * beta ** 2
dL_dR += 0.5 * output_dim * (psi0.sum() * beta ** 2 - np.trace(A) * beta)
dL_dR += beta * (0.5 * np.sum(A * DBi_plus_BiPBi) - data_fit)
return dL_dR
def _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, psi0, A, LB, trYYT, data_fit,Y):
#compute log marginal likelihood
if het_noise:
lik_1 = -0.5 * num_data * output_dim * np.log(2. * np.pi) + 0.5 * np.sum(np.log(beta)) - 0.5 * np.sum(beta * np.square(Y).sum(axis=-1))
lik_2 = -0.5 * output_dim * (np.sum(beta.flatten() * psi0) - np.trace(A))
else:
lik_1 = -0.5 * num_data * output_dim * (np.log(2. * np.pi) - np.log(beta)) - 0.5 * beta * trYYT
lik_2 = -0.5 * output_dim * (np.sum(beta * psi0) - np.trace(A))
lik_3 = -output_dim * (np.sum(np.log(np.diag(LB))))
lik_4 = 0.5 * data_fit
log_marginal = lik_1 + lik_2 + lik_3 + lik_4
return log_marginal

View file

@ -64,31 +64,30 @@ class VarDTC(LatentFunctionInference):
def get_VVTfactor(self, Y, prec): def get_VVTfactor(self, Y, prec):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, mean_function=None, precision=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
assert mean_function is None, "inference with a mean function not implemented"
num_data, output_dim = Y.shape
num_inducing = Z.shape[0]
_, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior) uncertain_inputs = isinstance(X, VariationalPosterior)
#see whether we've got a different noise variance for each datum if precision is None:
beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) #assume Gaussian likelihood
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! precision = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), self.const_jitter)
#self.YYTfactor = self.get_YYTfactor(Y)
#VVT_factor = self.get_VVTfactor(self.YYTfactor, beta) if precision.ndim == 1:
het_noise = beta.size > 1 precision = precision[:, None]
if beta.ndim == 1: het_noise = precision.size > 1
beta = beta[:, None]
VVT_factor = beta*Y VVT_factor = precision*Y
#VVT_factor = beta*Y #VVT_factor = precision*Y
trYYT = self.get_trYYT(Y) trYYT = self.get_trYYT(Y)
# do the inference:
num_inducing = Z.shape[0]
num_data = Y.shape[0]
# kernel computations, using BGPLVM notation # kernel computations, using BGPLVM notation
if Lm is None:
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
if Lm is None:
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
# The rather complex computations of A, and the psi stats # The rather complex computations of A, and the psi stats
@ -99,15 +98,16 @@ class VarDTC(LatentFunctionInference):
psi1 = kern.psi1(Z, X) psi1 = kern.psi1(Z, X)
if het_noise: if het_noise:
if psi2 is None: if psi2 is None:
assert len(psi2.shape) == 3 # Need to have not summed out N psi2_beta = (kern.psi2n(Z, X) * precision[:, :, None]).sum(0)
#FIXME: Need testing
psi2_beta = np.sum([psi2[X[i:i+1,:], :, :] * beta_i for i,beta_i in enumerate(beta)],0)
else: else:
psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0) psi2_beta = (psi2 * precision[:, :, None]).sum(0)
else: else:
if psi2 is None: if psi2 is None:
psi2 = kern.psi2(Z,X) psi2_beta = kern.psi2(Z,X) * precision
psi2_beta = psi2 * beta elif psi2.ndim == 3:
psi2_beta = psi2.sum(0) * precision
else:
psi2_beta = psi2 * precision
LmInv = dtrtri(Lm) LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T)) A = LmInv.dot(psi2_beta.dot(LmInv.T))
else: else:
@ -116,9 +116,9 @@ class VarDTC(LatentFunctionInference):
if psi1 is None: if psi1 is None:
psi1 = kern.K(X, Z) psi1 = kern.K(X, Z)
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta)) tmp = psi1 * (np.sqrt(precision))
else: else:
tmp = psi1 * (np.sqrt(beta)) tmp = psi1 * (np.sqrt(precision))
tmp, _ = dtrtrs(Lm, tmp.T, lower=1) tmp, _ = dtrtrs(Lm, tmp.T, lower=1)
A = tdot(tmp) #print A.sum() A = tdot(tmp) #print A.sum()
@ -144,19 +144,19 @@ class VarDTC(LatentFunctionInference):
dL_dKmm = backsub_both_sides(Lm, delit) dL_dKmm = backsub_both_sides(Lm, delit)
# derivatives of L w.r.t. psi # derivatives of L w.r.t. psi
dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, dL_dpsi0, dL_dpsi1, dL_dpsi2 = _compute_dL_dpsi(num_inducing, num_data, output_dim, precision, Lm,
VVT_factor, Cpsi1Vf, DBi_plus_BiPBi, VVT_factor, Cpsi1Vf, DBi_plus_BiPBi,
psi1, het_noise, uncertain_inputs) psi1, het_noise, uncertain_inputs)
# log marginal likelihood # log marginal likelihood
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, precision, het_noise,
psi0, A, LB, trYYT, data_fit, Y) psi0, A, LB, trYYT, data_fit, Y)
#noise derivatives #noise derivatives
dL_dR = _compute_dL_dR(likelihood, dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB, het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
psi0, psi1, beta, psi0, psi1, precision,
data_fit, num_data, output_dim, trYYT, Y, VVT_factor) data_fit, num_data, output_dim, trYYT, Y, VVT_factor)
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)
@ -181,7 +181,7 @@ class VarDTC(LatentFunctionInference):
else: else:
print('foobar') print('foobar')
import ipdb; ipdb.set_trace() import ipdb; ipdb.set_trace()
psi1V = np.dot(Y.T*beta, psi1).T psi1V = np.dot(Y.T*precision, psi1).T
tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0) tmp, _ = dtrtrs(Lm, psi1V, lower=1, trans=0)
tmp, _ = dpotrs(LB, tmp, lower=1) tmp, _ = dpotrs(LB, tmp, lower=1)
woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1) woodbury_vector, _ = dtrtrs(Lm, tmp, lower=1, trans=1)

View file

@ -229,12 +229,34 @@ class opt_SCG(Optimizer):
self.funct_eval = opt_result[2] self.funct_eval = opt_result[2]
self.status = opt_result[3] self.status = opt_result[3]
class Opt_Adadelta(Optimizer):
def __init__(self, step_rate=0.1, decay=0.9, momentum=0, *args, **kwargs):
Optimizer.__init__(self, *args, **kwargs)
self.opt_name = "Adadelta (climin)"
self.step_rate=step_rate
self.decay = decay
self.momentum = momentum
def opt(self, f_fp=None, f=None, fp=None):
assert not fp is None
import climin
opt = climin.adadelta.Adadelta(self.x_init, fp, step_rate=self.step_rate, decay=self.decay, momentum=self.momentum)
for info in opt:
if info['n_iter']>=self.max_iters:
self.x_opt = opt.wrt
self.status = 'maximum number of function evaluations exceeded '
break
def get_optimizer(f_min): def get_optimizer(f_min):
optimizers = {'fmin_tnc': opt_tnc, optimizers = {'fmin_tnc': opt_tnc,
'simplex': opt_simplex, 'simplex': opt_simplex,
'lbfgsb': opt_lbfgsb, 'lbfgsb': opt_lbfgsb,
'scg': opt_SCG} 'scg': opt_SCG,
'adadelta':Opt_Adadelta}
if rasm_available: if rasm_available:
optimizers['rasmussen'] = opt_rasm optimizers['rasmussen'] = opt_rasm

View file

@ -60,13 +60,14 @@ class Bernoulli(Likelihood):
if isinstance(self.gp_link, link_functions.Probit): if isinstance(self.gp_link, link_functions.Probit):
z = sign*v_i/np.sqrt(tau_i**2 + tau_i) z = sign*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = std_norm_cdf(z) Z_hat = std_norm_cdf(z)
Z_hat = np.where(Z_hat==0, 1e-15, Z_hat)
phi = std_norm_pdf(z) phi = std_norm_pdf(z)
mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i)) mu_hat = v_i/tau_i + sign*phi/(Z_hat*np.sqrt(tau_i**2 + tau_i))
sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat) sigma2_hat = 1./tau_i - (phi/((tau_i**2+tau_i)*Z_hat))*(z+phi/Z_hat)
elif isinstance(self.gp_link, link_functions.Heaviside): elif isinstance(self.gp_link, link_functions.Heaviside):
a = sign*v_i/np.sqrt(tau_i) a = sign*v_i/np.sqrt(tau_i)
Z_hat = std_norm_cdf(a) Z_hat = np.max(1e-13, std_norm_cdf(z))
N = std_norm_pdf(a) N = std_norm_pdf(a)
mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i) mu_hat = v_i/tau_i + sign*N/Z_hat/np.sqrt(tau_i)
sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i sigma2_hat = (1. - a*N/Z_hat - np.square(N/Z_hat))/tau_i
@ -257,4 +258,4 @@ class Bernoulli(Likelihood):
return Ysim.reshape(orig_shape) return Ysim.reshape(orig_shape)
def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None): def exact_inference_gradients(self, dL_dKdiag,Y_metadata=None):
pass return np.zeros(self.size)

View file

@ -3,8 +3,8 @@
from .gp_regression import GPRegression from .gp_regression import GPRegression
from .gp_classification import GPClassification from .gp_classification import GPClassification
from .sparse_gp_regression import SparseGPRegression, SparseGPRegressionUncertainInput from .sparse_gp_regression import SparseGPRegression
from .sparse_gp_classification import SparseGPClassification from .sparse_gp_classification import SparseGPClassification, SparseGPClassificationUncertainInput
from .gplvm import GPLVM from .gplvm import GPLVM
from .bcgplvm import BCGPLVM from .bcgplvm import BCGPLVM
from .sparse_gplvm import SparseGPLVM from .sparse_gplvm import SparseGPLVM

View file

@ -81,11 +81,6 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
"""Get the gradients of the posterior distribution of X in its specific form.""" """Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient return X.mean.gradient, X.variance.gradient
def _inner_parameters_changed(self, kern, X, Z, likelihood, Y, Y_metadata, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, **kw):
posterior, log_marginal_likelihood, grad_dict = super(BayesianGPLVMMiniBatch, self)._inner_parameters_changed(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm, dL_dKmm=dL_dKmm,
psi0=psi0, psi1=psi1, psi2=psi2, **kw)
return posterior, log_marginal_likelihood, grad_dict
def _outer_values_update(self, full_values): def _outer_values_update(self, full_values):
""" """
Here you put the values, which were collected before in the right places. Here you put the values, which were collected before in the right places.

View file

@ -6,12 +6,11 @@ import numpy as np
from ..core import SparseGP from ..core import SparseGP
from .. import likelihoods from .. import likelihoods
from .. import kern from .. import kern
from ..likelihoods import likelihood from ..inference.latent_function_inference import EPDTC
from ..inference.latent_function_inference import expectation_propagation_dtc
class SparseGPClassification(SparseGP): class SparseGPClassification(SparseGP):
""" """
sparse Gaussian Process model for classification Sparse Gaussian Process model for classification
This is a thin wrapper around the sparse_GP class, with a set of sensible defaults This is a thin wrapper around the sparse_GP class, with a set of sensible defaults
@ -27,10 +26,7 @@ class SparseGPClassification(SparseGP):
""" """
#def __init__(self, X, Y=None, likelihood=None, kernel=None, normalize_X=False, normalize_Y=False, Z=None, num_inducing=10):
def __init__(self, X, Y=None, likelihood=None, kernel=None, Z=None, num_inducing=10, Y_metadata=None): def __init__(self, X, Y=None, likelihood=None, kernel=None, Z=None, num_inducing=10, Y_metadata=None):
if kernel is None: if kernel is None:
kernel = kern.RBF(X.shape[1]) kernel = kern.RBF(X.shape[1])
@ -42,5 +38,57 @@ class SparseGPClassification(SparseGP):
else: else:
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=expectation_propagation_dtc.EPDTC(), name='SparseGPClassification',Y_metadata=Y_metadata) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method=EPDTC(), name='SparseGPClassification',Y_metadata=Y_metadata)
#def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp', Y_metadata=None):
class SparseGPClassificationUncertainInput(SparseGP):
"""
Sparse Gaussian Process model for classification with uncertain inputs.
This is a thin wrapper around the sparse_GP class, with a set of sensible defaults
:param X: input observations
:type X: np.ndarray (num_data x input_dim)
:param X_variance: The uncertainty in the measurements of X (Gaussian variance, optional)
:type X_variance: np.ndarray (num_data x input_dim)
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
:type num_inducing: int
:rtype: model object
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10, Y_metadata=None, normalizer=None):
from ..core.parameterization.variational import NormalPosterior
if kernel is None:
kernel = kern.RBF(X.shape[1])
likelihood = likelihoods.Bernoulli()
if Z is None:
i = np.random.permutation(X.shape[0])[:num_inducing]
Z = X[i].copy()
else:
assert Z.shape[1] == X.shape[1]
X = NormalPosterior(X, X_variance)
SparseGP.__init__(self, X, Y, Z, kernel, likelihood,
inference_method=EPDTC(),
name='SparseGPClassification', Y_metadata=Y_metadata, normalizer=normalizer)
def parameters_changed(self):
#Compute the psi statistics for N once, but don't sum out N in psi2
self.psi0 = self.kern.psi0(self.Z, self.X)
self.psi1 = self.kern.psi1(self.Z, self.X)
self.psi2 = self.kern.psi2n(self.Z, self.X)
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y, self.Y_metadata, psi0=self.psi0, psi1=self.psi1, psi2=self.psi2)
self._update_gradients()

View file

@ -99,13 +99,8 @@ class SparseGPMiniBatch(SparseGP):
like them into this dictionary for inner use of the indices inside the like them into this dictionary for inner use of the indices inside the
algorithm. algorithm.
""" """
if psi2 is None: return self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
psi2_sum_n = None dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2, **kwargs)
else:
psi2_sum_n = psi2.sum(axis=0)
posterior, log_marginal_likelihood, grad_dict = self.inference_method.inference(kern, X, Z, likelihood, Y, Y_metadata, Lm=Lm,
dL_dKmm=dL_dKmm, psi0=psi0, psi1=psi1, psi2=psi2_sum_n, **kwargs)
return posterior, log_marginal_likelihood, grad_dict
def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None): def _inner_take_over_or_update(self, full_values=None, current_values=None, value_indices=None):
""" """

View file

@ -9,7 +9,6 @@ from .. import likelihoods
from .. import kern from .. import kern
from ..inference.latent_function_inference import VarDTC from ..inference.latent_function_inference import VarDTC
from ..core.parameterization.variational import NormalPosterior from ..core.parameterization.variational import NormalPosterior
from GPy.inference.latent_function_inference.var_dtc_parallel import VarDTC_minibatch
class SparseGPRegression(SparseGP_MPI): class SparseGPRegression(SparseGP_MPI):
""" """
@ -18,6 +17,7 @@ class SparseGPRegression(SparseGP_MPI):
This is a thin wrapper around the SparseGP class, with a set of sensible defalts This is a thin wrapper around the SparseGP class, with a set of sensible defalts
:param X: input observations :param X: input observations
:param X_variance: input uncertainties, one per input X
:param Y: observed values :param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white :param kernel: a GPy kernel, defaults to rbf+white
:param Z: inducing inputs (optional, see note) :param Z: inducing inputs (optional, see note)
@ -64,46 +64,3 @@ class SparseGPRegression(SparseGP_MPI):
update_gradients_sparsegp(self, mpi_comm=self.mpi_comm) update_gradients_sparsegp(self, mpi_comm=self.mpi_comm)
else: else:
super(SparseGPRegression, self).parameters_changed() super(SparseGPRegression, self).parameters_changed()
class SparseGPRegressionUncertainInput(SparseGP):
"""
Gaussian Process model for regression with Gaussian variance on the inputs (X_variance)
This is a thin wrapper around the SparseGP class, with a set of sensible defalts
"""
def __init__(self, X, X_variance, Y, kernel=None, Z=None, num_inducing=10, normalizer=None):
"""
:param X: input observations
:type X: np.ndarray (num_data x input_dim)
:param X_variance: The uncertainty in the measurements of X (Gaussian variance, optional)
:type X_variance: np.ndarray (num_data x input_dim)
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (num_inducing x input_dim) | None
:param num_inducing: number of inducing points (ignored if Z is passed, see note)
:type num_inducing: int
:rtype: model object
.. Note:: If no Z array is passed, num_inducing (default 10) points are selected from the data. Other wise num_inducing is ignored
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
num_data, input_dim = X.shape
# kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.RBF(input_dim) + kern.White(input_dim, variance=1e-3)
# Z defaults to a subset of the data
if Z is None:
i = np.random.permutation(num_data)[:min(num_inducing, num_data)]
Z = X[i].copy()
else:
assert Z.shape[1] == input_dim
likelihood = likelihoods.Gaussian()
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, X_variance=X_variance, inference_method=VarDTC(), normalizer=normalizer)
self.ensure_default_constraints()

View file

@ -2,6 +2,7 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
try: try:
import matplotlib
from . import matplot_dep from . import matplot_dep
except (ImportError, NameError): except (ImportError, NameError):
# Matplotlib not available # Matplotlib not available

View file

@ -1,18 +1,18 @@
# Copyright (c) 2014, GPy authors (see AUTHORS.txt). # Copyright (c) 2014, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import base_plots from . import base_plots
import models_plots from . import models_plots
import priors_plots from . import priors_plots
import variational_plots from . import variational_plots
import kernel_plots from . import kernel_plots
import dim_reduction_plots from . import dim_reduction_plots
import mapping_plots from . import mapping_plots
import Tango from . import Tango
import visualize from . import visualize
import latent_space_visualizations from . import latent_space_visualizations
import netpbmfile from . import netpbmfile
import inference_plots from . import inference_plots
import maps from . import maps
import img_plots from . import img_plots
from ssgplvm import SSGPLVM_plot from .ssgplvm import SSGPLVM_plot

View file

@ -1,12 +1,6 @@
# #Copyright (c) 2012, GPy authors (see AUTHORS.txt). # #Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
try:
#import Tango
from matplotlib import pyplot as pb from matplotlib import pyplot as pb
except:
pass
import numpy as np import numpy as np
def ax_default(fignum, ax): def ax_default(fignum, ax):

View file

@ -42,8 +42,12 @@ def plot_data(model, which_data_rows='all',
fig = plt.figure(num=fignum) fig = plt.figure(num=fignum)
ax = fig.add_subplot(111) ax = fig.add_subplot(111)
#data if hasattr(model, 'has_uncertain_inputs') and model.has_uncertain_inputs():
X = model.X.mean
X_variance = model.X.variance
else:
X = model.X X = model.X
X_variance = None
Y = model.Y Y = model.Y
#work out what the inputs are for plotting (1D or 2D) #work out what the inputs are for plotting (1D or 2D)
@ -54,9 +58,14 @@ def plot_data(model, which_data_rows='all',
plots = {} plots = {}
#one dimensional plotting #one dimensional plotting
if len(free_dims) == 1: if len(free_dims) == 1:
plots['dataplot'] = []
if X_variance is not None: plots['xerrorbar'] = []
for d in which_data_ycols: for d in which_data_ycols:
plots['dataplot'] = ax.plot(X[which_data_rows,free_dims], Y[which_data_rows, d], data_symbol, mew=mew) plots['dataplot'].append(ax.plot(X[which_data_rows, free_dims], Y[which_data_rows, d], data_symbol, mew=mew))
if X_variance is not None:
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt='none', elinewidth=.5, alpha=.5)
#2D plotting #2D plotting
elif len(free_dims) == 2: elif len(free_dims) == 2:
@ -219,10 +228,6 @@ def plot_fit(model, plot_limits=None, which_data_rows='all',
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), m_X[which_data_rows, which_data_ycols].flatten(), plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), m_X[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()), xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5) ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
else:
plots['xerrorbar'] = ax.errorbar(X[which_data_rows, free_dims].flatten(), Y[which_data_rows, which_data_ycols].flatten(),
xerr=2 * np.sqrt(X_variance[which_data_rows, free_dims].flatten()),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
#set the limits of the plot to some sensible values #set the limits of the plot to some sensible values
ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper)) ymin, ymax = min(np.append(Y[which_data_rows, which_data_ycols].flatten(), lower)), max(np.append(Y[which_data_rows, which_data_ycols].flatten(), upper))

View file

@ -447,16 +447,14 @@ class GradientTests(np.testing.TestCase):
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2)
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2)
def test_SparseGPRegression_rbf_linear_white_kern_2D_uncertain_inputs(self): def test_SparseGPRegression_rbf_white_kern_2D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 2d data with uncertain inputs'''
rbflin = GPy.kern.RBF(2) + GPy.kern.Linear(2) rbflin = GPy.kern.RBF(2) + GPy.kern.White(2)
raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=2, uncertain_inputs=1)
def test_SparseGPRegression_rbf_linear_white_kern_1D_uncertain_inputs(self): def test_SparseGPRegression_rbf_white_kern_1D_uncertain_inputs(self):
''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs''' ''' Testing the sparse GP regression with rbf, linear kernel on 1d data with uncertain inputs'''
rbflin = GPy.kern.RBF(1) + GPy.kern.Linear(1) rbflin = GPy.kern.RBF(1) + GPy.kern.White(1)
raise unittest.SkipTest("This is not implemented yet!")
self.check_model(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1) self.check_model(rbflin, model_type='SparseGPRegression', dimension=1, uncertain_inputs=1)
def test_GPLVM_rbf_bias_white_kern_2D(self): def test_GPLVM_rbf_bias_white_kern_2D(self):
@ -506,6 +504,17 @@ class GradientTests(np.testing.TestCase):
m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z) m = GPy.models.SparseGPClassification(X, Y, kernel=kernel, Z=Z)
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_sparse_EP_DTC_probit_uncertain_inputs(self):
N = 20
X = np.hstack([np.random.normal(5, 2, N / 2), np.random.normal(10, 2, N / 2)])[:, None]
Y = np.hstack([np.ones(N / 2), np.zeros(N / 2)])[:, None]
Z = np.linspace(0, 15, 4)[:, None]
X_var = np.random.uniform(0.1, 0.2, X.shape)
kernel = GPy.kern.RBF(1)
m = GPy.models.SparseGPClassificationUncertainInput(X, X_var, Y, kernel=kernel, Z=Z)
self.assertTrue(m.checkgrad())
def test_multioutput_regression_1D(self): def test_multioutput_regression_1D(self):
X1 = np.random.rand(50, 1) * 8 X1 = np.random.rand(50, 1) * 8
X2 = np.random.rand(30, 1) * 5 X2 = np.random.rand(30, 1) * 5

View file

@ -34,7 +34,7 @@ class RVTransformationTestCase(unittest.TestCase):
# The PDF of the transformed variables # The PDF of the transformed variables
p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0]) p_phi = lambda phi : np.exp(-m._objective_grads(phi)[0])
# To the empirical PDF of: # To the empirical PDF of:
theta_s = prior.rvs(100000) theta_s = prior.rvs(1e6)
phi_s = trans.finv(theta_s) phi_s = trans.finv(theta_s)
# which is essentially a kernel density estimation # which is essentially a kernel density estimation
kde = st.gaussian_kde(phi_s) kde = st.gaussian_kde(phi_s)
@ -56,7 +56,7 @@ class RVTransformationTestCase(unittest.TestCase):
# The following test cannot be very accurate # The following test cannot be very accurate
self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1) self.assertTrue(np.linalg.norm(pdf_phi - kde(phi)) / np.linalg.norm(kde(phi)) <= 1e-1)
# Check the gradients at a few random points # Check the gradients at a few random points
for i in range(10): for i in range(5):
m.theta = theta_s[i] m.theta = theta_s[i]
self.assertTrue(m.checkgrad(verbose=True)) self.assertTrue(m.checkgrad(verbose=True))

View file

@ -37,22 +37,23 @@ else:
link_args = ['-lgomp'] link_args = ['-lgomp']
ext_mods = [Extension(name='GPy.kern._src.stationary_cython', ext_mods = [Extension(name='GPy.kern._src.stationary_cython',
sources=['GPy/kern/_src/stationary_cython.c','GPy/kern/_src/stationary_utils.c'], sources=['GPy/kern/_src/stationary_cython.c',
include_dirs=[np.get_include()], 'GPy/kern/_src/stationary_utils.c'],
include_dirs=[np.get_include(),'.'],
extra_compile_args=compile_flags, extra_compile_args=compile_flags,
extra_link_args = link_args), extra_link_args = link_args),
Extension(name='GPy.util.choleskies_cython', Extension(name='GPy.util.choleskies_cython',
sources=['GPy/util/choleskies_cython.c'], sources=['GPy/util/choleskies_cython.c'],
include_dirs=[np.get_include()], include_dirs=[np.get_include(),'.'],
extra_link_args = link_args, extra_link_args = link_args,
extra_compile_args=compile_flags), extra_compile_args=compile_flags),
Extension(name='GPy.util.linalg_cython', Extension(name='GPy.util.linalg_cython',
sources=['GPy/util/linalg_cython.c'], sources=['GPy/util/linalg_cython.c'],
include_dirs=[np.get_include()], include_dirs=[np.get_include(),'.'],
extra_compile_args=compile_flags), extra_compile_args=compile_flags),
Extension(name='GPy.kern._src.coregionalize_cython', Extension(name='GPy.kern._src.coregionalize_cython',
sources=['GPy/kern/_src/coregionalize_cython.c'], sources=['GPy/kern/_src/coregionalize_cython.c'],
include_dirs=[np.get_include()], include_dirs=[np.get_include(),'.'],
extra_compile_args=compile_flags)] extra_compile_args=compile_flags)]
setup(name = 'GPy', setup(name = 'GPy',