Merge branch 'params' of github.com:SheffieldML/GPy into params

This commit is contained in:
Ricardo 2014-02-26 11:26:58 +00:00
commit b6edc1a298
16 changed files with 306 additions and 426 deletions

View file

@ -20,14 +20,13 @@ class Model(Parameterized):
self.optimization_runs = [] self.optimization_runs = []
self.sampling_runs = [] self.sampling_runs = []
self.preferred_optimizer = 'scg' self.preferred_optimizer = 'scg'
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
# def dK_d(self, param, dL_dK, X, X2)
g = np.zeros(self.size) g = np.zeros(self.size)
try: try:
# [g.__setitem__(s, self.gradient_mapping[p]().flat) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
[p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed] [p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
except ValueError: except ValueError:
raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name) raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name)
@ -61,85 +60,6 @@ class Model(Parameterized):
self.priors = state.pop() self.priors = state.pop()
Parameterized._setstate(self, state) Parameterized._setstate(self, state)
# def set_prior(self, regexp, what):
# """
#
# Sets priors on the model parameters.
#
# **Notes**
#
# Asserts that the prior is suitable for the constraint. If the
# wrong constraint is in place, an error is raised. If no
# constraint is in place, one is added (warning printed).
#
# For tied parameters, the prior will only be "counted" once, thus
# a prior object is only inserted on the first tied index
#
# :param regexp: regular expression of parameters on which priors need to be set.
# :type param: string, regexp, or integer array
# :param what: prior to set on parameter.
# :type what: GPy.core.Prior type
#
# """
# if self.priors is None:
# self.priors = [None for i in range(self._get_params().size)]
#
# which = self.grep_param_names(regexp)
#
# # check tied situation
# tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
# if len(tie_partial_matches):
# raise ValueError, "cannot place prior across partial ties"
# tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
# if len(tie_matches) > 1:
# raise ValueError, "cannot place prior across multiple ties"
# elif len(tie_matches) == 1:
# which = which[:1] # just place a prior object on the first parameter
#
#
# # check constraints are okay
#
# if what.domain is _POSITIVE:
# constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain is _POSITIVE]
# if len(constrained_positive_indices):
# constrained_positive_indices = np.hstack(constrained_positive_indices)
# else:
# constrained_positive_indices = np.zeros(shape=(0,))
# bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
# assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
# unconst = np.setdiff1d(which, constrained_positive_indices)
# if len(unconst):
# print "Warning: constraining parameters to be positive:"
# print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
# print '\n'
# self.constrain_positive(unconst)
# elif what.domain is _REAL:
# assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
# else:
# raise ValueError, "prior not recognised"
#
# # store the prior in a local list
# for w in which:
# self.priors[w] = what
def get_gradient(self, name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
:param name: the name of parameters required (as a regular expression).
:type name: regular expression
:param return_names: whether or not to return the names matched (default False)
:type return_names: bool
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._log_likelihood_gradients()[matches]
else:
raise AttributeError, "no parameter matches %s" % name
def randomize(self): def randomize(self):
""" """
Randomize the model. Randomize the model.
@ -183,7 +103,9 @@ class Model(Parameterized):
:param messages: whether to display during optimisation :param messages: whether to display during optimisation
:type messages: bool :type messages: bool
.. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine. .. note:: If num_processes is None, the number of workes in the
multiprocessing pool is automatically set to the number of processors
on the current machine.
""" """
initial_parameters = self._get_params_transformed() initial_parameters = self._get_params_transformed()
@ -234,32 +156,10 @@ class Model(Parameterized):
'variance', 'lengthscale', 'precision' and 'kappa'. If any of 'variance', 'lengthscale', 'precision' and 'kappa'. If any of
these terms are present in the name the parameter is these terms are present in the name the parameter is
constrained positive. constrained positive.
DEPRECATED.
""" """
raise DeprecationWarning, 'parameters now have default constraints' raise DeprecationWarning, 'parameters now have default constraints'
#positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
# param_names = self._get_param_names()
# for s in positive_strings:
# paramlist = self.grep_param_names(".*"+s)
# for param in paramlist:
# for p in param.flattened_parameters:
# rav_i = set(self._raveled_index_for(p))
# for constraint in self.constraints.iter_properties():
# rav_i -= set(self._constraint_indices(p, constraint))
# rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
# ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
# if ind.size != 0:
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
# if paramlist:
# self.__getitem__(None, paramlist).constrain_positive(warning=warning)
# currently_constrained = self.all_constrained_indices()
# to_make_positive = []
# for s in positive_strings:
# for i in self.grep_param_names(".*" + s):
# if not (i in currently_constrained):
# to_make_positive.append(i)
# if len(to_make_positive):
# self.constrain_positive(np.asarray(to_make_positive), warning=warning)
def objective_function(self, x): def objective_function(self, x):
""" """
@ -336,7 +236,9 @@ class Model(Parameterized):
:messages: whether to display during optimisation :messages: whether to display during optimisation
:type messages: bool :type messages: bool
:param optimizer: which optimizer to use (defaults to self.preferred optimizer) :param optimizer: which optimizer to use (defaults to self.preferred optimizer)
:type optimizer: string TODO: valid strings? :type optimizer: string
TODO: valid args
""" """
if optimizer is None: if optimizer is None:
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer
@ -473,81 +375,4 @@ class Model(Parameterized):
self._set_params_transformed(x) self._set_params_transformed(x)
return ret return ret
def input_sensitivity(self):
"""
return an array describing the sesitivity of the model to each input
NB. Right now, we're basing this on the lengthscales (or
variances) of the kernel. TODO: proper sensitivity analysis
where we integrate across the model inputs and evaluate the
effect on the variance of the model output. """
if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel"
k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
from ..kern import RBF, Linear#, RBFInv
if isinstance(k, RBF):
return 1. / k.lengthscale
#elif isinstance(k, RBFInv):
# return k.inv_lengthscale
elif isinstance(k, Linear):
return k.variances
else:
raise ValueError, "cannot determine sensitivity for this kernel"
def pseudo_EM(self, stop_crit=.1, **kwargs):
"""
EM - like algorithm for Expectation Propagation and Laplace approximation
:param stop_crit: convergence criterion
:type stop_crit: float
.. Note: kwargs are passed to update_likelihood and optimize functions.
"""
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
ll_change = stop_crit + 1.
iteration = 0
last_ll = -np.inf
convergence = False
alpha = 0
stop = False
# Handle **kwargs
ep_args = {}
for arg in kwargs.keys():
if arg in ('epsilon', 'power_ep'):
ep_args[arg] = kwargs[arg]
del kwargs[arg]
while not stop:
last_approximation = self.likelihood.copy()
last_params = self._get_params()
if len(ep_args) == 2:
self.update_likelihood_approximation(epsilon=ep_args['epsilon'], power_ep=ep_args['power_ep'])
elif len(ep_args) == 1:
if ep_args.keys()[0] == 'epsilon':
self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
elif ep_args.keys()[0] == 'power_ep':
self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
else:
self.update_likelihood_approximation()
new_ll = self.log_likelihood()
ll_change = new_ll - last_ll
if ll_change < 0:
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True
else:
self.optimize(**kwargs)
last_ll = self.log_likelihood()
if ll_change < stop_crit:
stop = True
iteration += 1
if stop:
print "%s iterations." % iteration
self.update_likelihood_approximation()

View file

@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
__print_threshold__ = 5 __print_threshold__ = 5
###### ######
class Param(Constrainable, ObservableArray, Gradcheckable, Indexable): class Param(Constrainable, ObservableArray, Gradcheckable):
""" """
Parameter object for GPy models. Parameter object for GPy models.

View file

@ -4,10 +4,10 @@
import numpy as np import numpy as np
from domains import _POSITIVE,_NEGATIVE, _BOUNDED from domains import _POSITIVE,_NEGATIVE, _BOUNDED
import sys
import weakref import weakref
_lim_val = -np.log(sys.float_info.epsilon) _exp_lim_val = np.finfo(np.float64).max
_lim_val = np.log(_exp_lim_val)#-np.log(sys.float_info.epsilon)
#=============================================================================== #===============================================================================
# Fixing constants # Fixing constants
@ -34,6 +34,16 @@ class Transformation(object):
def initialize(self, f): def initialize(self, f):
""" produce a sensible initial value for f(x)""" """ produce a sensible initial value for f(x)"""
raise NotImplementedError raise NotImplementedError
def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw):
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
import matplotlib.pyplot as plt
from ...plotting.matplot_dep import base_plots
x = np.linspace(-8,8)
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
axes = plt.gca()
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
def __str__(self): def __str__(self):
raise NotImplementedError raise NotImplementedError
def __repr__(self): def __repr__(self):
@ -55,6 +65,24 @@ class Logexp(Transformation):
def __str__(self): def __str__(self):
return '+ve' return '+ve'
class LogexpNeg(Transformation):
domain = _POSITIVE
def f(self, x):
return np.where(x>_lim_val, -x, -np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val))))
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
def finv(self, f):
return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))
def gradfactor(self, f):
return np.where(f>_lim_val, -1, -1 + np.exp(-f))
def initialize(self, f):
if np.any(f < 0.):
print "Warning: changing parameters to satisfy constraints"
return np.abs(f)
def __str__(self):
return '+ve'
class NegativeLogexp(Transformation): class NegativeLogexp(Transformation):
domain = _NEGATIVE domain = _NEGATIVE
logexp = Logexp() logexp = Logexp()

View file

@ -2,7 +2,6 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
from ..util.linalg import mdot
from gp import GP from gp import GP
from parameterization.param import Param from parameterization.param import Param
from ..inference.latent_function_inference import var_dtc from ..inference.latent_function_inference import var_dtc
@ -32,7 +31,7 @@ class SparseGP(GP):
""" """
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp'): def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'):
#pick a sensible inference method #pick a sensible inference method
if inference_method is None: if inference_method is None:
@ -58,11 +57,33 @@ class SparseGP(GP):
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y) self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood')) self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
if isinstance(self.X, VariationalPosterior): if isinstance(self.X, VariationalPosterior):
self.kern.update_gradients_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) #gradients wrt kernel
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) dL_dKmm = self.grad_dict.pop('dL_dKmm')
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
target = np.zeros(self.kern.size)
self.kern._collect_gradient(target)
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
self.kern._collect_gradient(target)
self.kern._set_gradient(target)
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
self.Z.gradient += self.kern.gradients_Z_expectations(
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
else: else:
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict) #gradients wrt kernel
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict) target = np.zeros(self.kern.size)
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
self.kern._collect_gradient(target)
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
self.kern._collect_gradient(target)
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
self.kern._collect_gradient(target)
self.kern._set_gradient(target)
#gradients wrt Z
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False): def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
""" """

View file

@ -16,16 +16,16 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
output_dim = 1 output_dim = 1
input_dim = 3 input_dim = 3
else: else:
input_dim = 1 input_dim = 2
output_dim = output_dim output_dim = output_dim
# generate GPLVM-like data # generate GPLVM-like data
X = _np.random.rand(num_inputs, input_dim) X = _np.random.rand(num_inputs, input_dim)
#lengthscales = _np.random.rand(input_dim) lengthscales = _np.random.rand(input_dim)
#k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True) k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
##+ GPy.kern.white(input_dim, 0.01) ##+ GPy.kern.white(input_dim, 0.01)
#) #)
k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001) #k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
K = k.K(X) K = k.K(X)
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
_np.random.seed(0) _np.random.seed(0)
data = GPy.util.datasets.oil() data = GPy.util.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., _np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2)) kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N] Y = data['X'][:N]
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k) m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1) m.data_labels = data['Y'][:N].argmax(axis=1)
@ -270,10 +270,11 @@ def bgplvm_simulation(optimize=True, verbose=1,
from GPy import kern from GPy import kern
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
if optimize: if optimize:
@ -281,7 +282,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.q.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m
@ -293,7 +294,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
from GPy.models import BayesianGPLVM from GPy.models import BayesianGPLVM
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9 D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim) _, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0] Y = Ylist[0]
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q) k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
@ -302,7 +303,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k) m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
m.inference_method = VarDTCMissingData() m.inference_method = VarDTCMissingData()
m.Y[inan] = _np.nan m.Y[inan] = _np.nan
m.q.variance *= .1 m.X.variance *= .1
m.parameters_changed() m.parameters_changed()
m.Yreal = Y m.Yreal = Y
@ -311,7 +312,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
m.optimize('bfgs', messages=verbose, max_iters=max_iters, m.optimize('bfgs', messages=verbose, max_iters=max_iters,
gtol=.05) gtol=.05)
if plot: if plot:
m.q.plot("BGPLVM Latent Space 1D") m.X.plot("BGPLVM Latent Space 1D")
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m

View file

@ -204,15 +204,14 @@ class VarDTCMissingData(object):
def inference(self, kern, X, Z, likelihood, Y): def inference(self, kern, X, Z, likelihood, Y):
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
psi0 = kern.psi0(Z, X) psi0_all = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X) psi1_all = kern.psi1(Z, X)
psi2 = kern.psi2(Z, X) psi2_all = kern.psi2(Z, X)
else: else:
uncertain_inputs = False uncertain_inputs = False
psi0 = kern.Kdiag(X) psi0_all = kern.Kdiag(X)
psi1 = kern.K(X, Z) psi1_all = kern.K(X, Z)
psi2 = None psi2_all = None
Ys, traces = self._Y(Y) Ys, traces = self._Y(Y)
beta_all = 1./likelihood.variance beta_all = 1./likelihood.variance

View file

@ -101,7 +101,7 @@ class Add(Kern):
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2 return psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import White from white import White
from rbf import RBF from rbf import RBF
#from rbf_inv import RBFInv #from rbf_inv import RBFInv
@ -124,10 +124,10 @@ class Add(Kern):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import White from white import White
from rbf import RBF from rbf import RBF
#from rbf_inv import rbfinv #from rbf_inv import rbfinv
@ -151,10 +151,10 @@ class Add(Kern):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
target += p1.gradients_z_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1]) target += p1.gradients_z_variational(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
return target return target
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from white import white from white import white
from rbf import rbf from rbf import rbf
#from rbf_inv import rbfinv #from rbf_inv import rbfinv
@ -179,7 +179,7 @@ class Add(Kern):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2. eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1]) a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
target_mu += a target_mu += a
target_S += b target_S += b
return target_mu, target_S return target_mu, target_S
@ -193,9 +193,9 @@ class Add(Kern):
kernel_plots.plot(self,*args) kernel_plots.plot(self,*args)
def input_sensitivity(self): def input_sensitivity(self):
in_sen = np.zeros((self.input_dim, self.num_params)) in_sen = np.zeros((self.num_params, self.input_dim))
for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)): for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)):
in_sen[i_s, i] = p.input_sensitivity() in_sen[i, i_s] = p.input_sensitivity()
return in_sen return in_sen
def _getstate(self): def _getstate(self):

View file

@ -36,35 +36,43 @@ class Kern(Parameterized):
raise NotImplementedError raise NotImplementedError
def gradients_X_diag(self, dL_dK, X): def gradients_X_diag(self, dL_dK, X):
raise NotImplementedError raise NotImplementedError
def update_gradients_full(self, dL_dK, X, X2): def update_gradients_full(self, dL_dK, X, X2):
"""Set the gradients of all parameters when doing full (N) inference.""" """Set the gradients of all parameters when doing full (N) inference."""
raise NotImplementedError raise NotImplementedError
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
target = np.zeros(self.size)
self.update_gradients_diag(dL_dKdiag, X)
self._collect_gradient(target)
self.update_gradients_full(dL_dKnm, X, Z)
self._collect_gradient(target)
self.update_gradients_full(dL_dKmm, Z, None)
self._collect_gradient(target)
self._set_gradient(target)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs.""" """
Set the gradients of all parameters when doing inference with
uncertain inputs, using expectations of the kernel.
The esential maths is
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
dL_dpsi1 * dpsi1_d{theta_i} +
dL_dpsi2 * dpsi2_d{theta_i}
"""
raise NotImplementedError raise NotImplementedError
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
grad = self.gradients_X(dL_dKmm, Z) def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
grad += self.gradients_X(dL_dKnm.T, Z, X) """
return grad Returns the derivative of the objective wrt Z, using the chain rule
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): through the expectation variables.
"""
raise NotImplementedError raise NotImplementedError
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
"""
Compute the gradients wrt the parameters of the variational
distruibution q(X), chain-ruling via the expectations of the kernel
"""
raise NotImplementedError raise NotImplementedError
def plot_ARD(self, *args, **kw): def plot_ARD(self, *args, **kw):
if "matplotlib" in sys.modules: """
from ...plotting.matplot_dep import kernel_plots See :class:`~GPy.plotting.matplot_dep.kernel_plots`
self.plot_ARD.__doc__ += kernel_plots.plot_ARD.__doc__ """
import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import kernel_plots from ...plotting.matplot_dep import kernel_plots
return kernel_plots.plot_ARD(self,*args,**kw) return kernel_plots.plot_ARD(self,*args,**kw)
@ -113,7 +121,8 @@ class Kern(Parameterized):
def prod(self, other, tensor=False): def prod(self, other, tensor=False):
""" """
Multiply two kernels (either on the same space, or on the tensor product of the input space). Multiply two kernels (either on the same space, or on the tensor
product of the input space).
:param other: the other kernel to be added :param other: the other kernel to be added
:type other: GPy.kern :type other: GPy.kern

View file

@ -22,22 +22,25 @@ class Linear(Kern):
:param input_dim: the number of input dimensions :param input_dim: the number of input dimensions
:type input_dim: int :type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i` :param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there is only one variance parameter) :type variances: array or list of the appropriate size (or float if there
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension. is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean :type ARD: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self, input_dim, variances=None, ARD=False, name='linear'): def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
super(Linear, self).__init__(input_dim, name) super(Linear, self).__init__(input_dim, name)
self.ARD = ARD self.ARD = ARD
if ARD == False: if not ARD:
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel" assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else: else:
variances = np.ones(1) variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,))
else: else:
if variances is not None: if variances is not None:
variances = np.asarray(variances) variances = np.asarray(variances)
@ -103,7 +106,6 @@ class Linear(Kern):
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
# variational #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
@ -117,33 +119,26 @@ class Linear(Kern):
ZAinner = self._ZAinner(variational_posterior, Z) ZAinner = self._ZAinner(variational_posterior, Z)
return np.dot(ZAinner, ZA.T) return np.dot(ZAinner, ZA.T)
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
mu, S = variational_posterior.mean, variational_posterior.variance #psi1
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
# psi0: # psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior) tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
if self.ARD: grad = tmp.sum(0) if self.ARD: self.variances.gradient += tmp.sum(0)
else: grad = np.atleast_1d(tmp.sum()) else: self.variances.gradient += tmp.sum()
#psi1
self.update_gradients_full(dL_dpsi1, mu, Z)
grad += self.variances.gradient
#psi2 #psi2
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :]) tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
if self.ARD: grad += tmp.sum(0).sum(0).sum(0) if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
else: grad += tmp.sum() else: self.variances.gradient += tmp.sum()
#from Kmm
self.update_gradients_full(dL_dKmm, Z, None)
self.variances.gradient += grad
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Kmm
grad = self.gradients_X(dL_dKmm, Z, None)
#psi1 #psi1
grad += self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean) grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
#psi2 #psi2
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad) self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
return grad return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape) grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
# psi0 # psi0
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances) grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
@ -160,7 +155,7 @@ class Linear(Kern):
#--------------------------------------------------# #--------------------------------------------------#
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S): def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, vp, target_mu, target_S):
# Think N,num_inducing,num_inducing,input_dim # Think N,num_inducing,num_inducing,input_dim
ZA = Z * self.variances ZA = Z * self.variances
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :] AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
@ -203,16 +198,15 @@ class Linear(Kern):
weave_options = {'headers' : ['<omp.h>'], weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
mu = vp.mean
mu = pv.mean
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu) N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target): def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
AZA = self.variances*self._ZAinner(pv, Z) AZA = self.variances*self._ZAinner(vp, Z)
code=""" code="""
int n,m,mm,q; int n,m,mm,q;
#pragma omp parallel for private(n,mm,q) #pragma omp parallel for private(n,mm,q)
@ -234,23 +228,23 @@ class Linear(Kern):
'extra_compile_args': ['-fopenmp -O3'], #-march=native'], 'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
'extra_link_args' : ['-lgomp']} 'extra_link_args' : ['-lgomp']}
N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1] N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
mu = param_to_array(pv.mean) mu = param_to_array(vp.mean)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'], arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options) type_converters=weave.converters.blitz,**weave_options)
def _mu2S(self, pv): def _mu2S(self, vp):
return np.square(pv.mean) + pv.variance return np.square(vp.mean) + vp.variance
def _ZAinner(self, pv, Z): def _ZAinner(self, vp, Z):
ZA = Z*self.variances ZA = Z*self.variances
inner = (pv.mean[:, None, :] * pv.mean[:, :, None]) inner = (vp.mean[:, None, :] * vp.mean[:, :, None])
diag_indices = np.diag_indices(pv.mean.shape[1], 2) diag_indices = np.diag_indices(vp.mean.shape[1], 2)
inner[:, diag_indices[0], diag_indices[1]] += pv.variance inner[:, diag_indices[0], diag_indices[1]] += vp.variance
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]! return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
def input_sensitivity(self): def input_sensitivity(self):
if self.ARD: return self.variances if self.ARD: return self.variances

View file

@ -35,92 +35,80 @@ class RBF(Stationary):
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def parameters_changed(self):
# reset cached results
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return self.Kdiag(variational_posterior.mean) return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
mu = variational_posterior.mean _, _, _, psi1 = self._psi1computations(Z, variational_posterior)
S = variational_posterior.variance return psi1
self._psi_computations(Z, mu, S)
return self._psi1
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
mu = variational_posterior.mean _, _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
S = variational_posterior.variance return psi2
self._psi_computations(Z, mu, S)
return self._psi2
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
#contributions from Kmm
sself.update_gradients_full(dL_dKmm, Z)
mu = variational_posterior.mean
S = variational_posterior.variance
self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2 l2 = self.lengthscale **2
#contributions from psi0: #contributions from psi0:
self.variance.gradient += np.sum(dL_dpsi0) self.variance.gradient = np.sum(dL_dpsi0)
self.lengthscale.gradient = 0.
#from psi1 #from psi1
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance) denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale) d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None] dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD: if not self.ARD:
self.lengthscale.gradient += dpsi1_dlength.sum() self.lengthscale.gradient += dpsi1_dlength.sum()
else: else:
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0) self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
#from psi2 #from psi2
d_var = 2.*self._psi2 / self.variance S = variational_posterior.variance
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * self._psi2_denom) denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
d_length = 2.*psi2[:, :, :, None] * (Zdist_sq[None, :,:,:] * denom[:,None,None,:] + mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * denom[:,None,None,:])
#TODO: combine denom and l2 as denom_l2??
#TODO: tidy the above!
#TODO: tensordot below?
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None] dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD: if not self.ARD:
self.lengthscale.gradient += dpsi2_dlength.sum() self.lengthscale.gradient += dpsi2_dlength.sum()
else: else:
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0) self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
mu = variational_posterior.mean
S = variational_posterior.variance def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2 l2 = self.lengthscale **2
#psi1 #psi1
denominator = (l2 * (self._psi1_denom)) denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator)) denominator = l2 * denom
dpsi1_dZ = -psi1[:, :, None] * (dist / denominator)
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0) grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
#psi2 #psi2
term1 = self._psi2_Zdist / l2 # num_inducing, num_inducing, input_dim denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
term2 = self._psi2_mudist / self._psi2_denom / l2 # N, num_inducing, num_inducing, input_dim term1 = Zdist / l2 # M, M, Q
dZ = self._psi2[:, :, :, None] * (term1[None] + term2) term2 = mudist / denom[:,None,None,:] / l2 # N, M, M, Q
dZ = psi2[:, :, :, None] * (term1[None, :, :, :] + term2) #N,M,M,Q
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0) grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
grad += self.gradients_X(dL_dKmm, Z, None)
return grad return grad
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
self._psi_computations(Z, mu, S)
l2 = self.lengthscale **2 l2 = self.lengthscale **2
#psi1 #psi1
tmp = self._psi1[:, :, None] / l2 / self._psi1_denom denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1) tmp = psi1[:, :, None] / l2 / denom
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1) grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
#psi2 #psi2
tmp = self._psi2[:, :, :, None] / l2 / self._psi2_denom denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1) tmp = psi2[:, :, :, None] / l2 / denom[:,None,None,:]
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1) grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * mudist).sum(1).sum(1)
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*mudist_sq - 1)).sum(1).sum(1)
return grad_mu, grad_S return grad_mu, grad_S
@ -182,83 +170,72 @@ class RBF(Stationary):
return target return target
#@cache_this TODO
def _psi_computations(self, Z, mu, S): def _psi1computations(self, Z, vp):
# here are the "statistics" for psi1 and psi2 mu, S = vp.mean, vp.variance
Z_changed = not fast_array_equal(Z, self._Z)
if Z_changed:
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
# something's changed. recompute EVERYTHING
l2 = self.lengthscale **2 l2 = self.lengthscale **2
denom = S[:, None, :] / l2 + 1. # N,1,Q
dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
dist_sq = np.square(dist) / l2 / denom # N,M,Q
exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
psi1 = self.variance * np.exp(exponent) # N,M
return denom, dist, dist_sq, psi1
# psi1
self._psi1_denom = S[:, None, :] / l2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = np.square(self._psi1_dist) / l2 / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] / l2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(l2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching #@cache_this TODO
self._Z, self._mu, self._S = Z, mu, S def _psi2computations(self, Z, vp):
mu, S = vp.mean, vp.variance
def weave_psi2(self, mu, Zhat): N, Q = mu.shape
N, input_dim = mu.shape M = Z.shape[0]
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim)) #compute required distances
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim)) Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
psi2_exponent = np.zeros((N, num_inducing, num_inducing)) Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
psi2 = np.empty((N, num_inducing, num_inducing)) Zdist_sq = np.square(Zdist / self.lengthscale) # M,M,Q
psi2_Zdist_sq = self._psi2_Zdist_sq #allocate memory for the things we want to compute
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim) mudist = np.empty((N, M, M, Q))
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim) mudist_sq = np.empty((N, M, M, Q))
variance_sq = np.float64(np.square(self.variance)) psi2 = np.empty((N, M, M))
if self.ARD:
lengthscale2 = self.lengthscale **2 l2 = self.lengthscale **2
else: denom = 2.*S / l2 + 1. # N,Q
lengthscale2 = np.ones(input_dim) * self.lengthscale2**2 half_log_denom = 0.5 * np.log(denom)
denom_l2 = denom*l2 # TODO: Max and James: divide??
variance_sq = float(np.square(self.variance))
code = """ code = """
double tmp; double tmp;
double exponent;
#pragma omp parallel for private(tmp) #pragma omp parallel for private(tmp, exponentgg)
for (int n=0; n<N; n++){ for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){ for (int m=0; m<M; m++){
for (int mm=0; mm<(m+1); mm++){ for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){ exponent = 0;
for (int q=0; q<Q; q++){
//compute mudist //compute mudist
tmp = mu(n,q) - Zhat(m,mm,q); tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp; mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp; mudist(n,mm,m,q) = tmp;
//now mudist_sq //now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q); tmp = tmp*tmp/denom_l2(n,q);
mudist_sq(n,m,mm,q) = tmp; mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp; mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent //now exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q); tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
psi2_exponent(n,mm,m) += tmp; exponent += tmp;
if (m !=mm){ if (m !=mm){
psi2_exponent(n,m,mm) += tmp; exponent += tmp;
} }
//psi2 would be computed like this, but np is faster //compute psi2 by exponentiating
//tmp = variance_sq*exp(psi2_exponent(n,m,mm)); tmp = variance_sq*exp(exponent);
//psi2(n,m,mm) = tmp; psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp; psi2(n,mm,m) = tmp;
} }
} }
} }
@ -272,10 +249,10 @@ class RBF(Stationary):
""" """
mu = param_to_array(mu) mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=['gomp'], weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'], arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options) type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2 return denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2
def input_sensitivity(self): def input_sensitivity(self):
if self.ARD: return 1./self.lengthscale if self.ARD: return 1./self.lengthscale

View file

@ -25,10 +25,10 @@ class Static(Kern):
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return np.zeros(X.shape) return np.zeros(X.shape)
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(Z.shape) return np.zeros(Z.shape)
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape) return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
@ -61,8 +61,8 @@ class White(Static):
def update_gradients_diag(self, dL_dKdiag, X): def update_gradients_diag(self, dL_dKdiag, X):
self.variance.gradient = dL_dKdiag.sum() self.variance.gradient = dL_dKdiag.sum()
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum() self.variance.gradient = dL_dpsi0.sum()
class Bias(Static): class Bias(Static):
@ -86,6 +86,6 @@ class Bias(Static):
ret[:] = self.variance**2 ret[:] = self.variance**2
return ret return ret
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()

View file

@ -312,4 +312,8 @@ class RatQuad(Stationary):
grad = np.sum(dL_dK*dK_dpow) grad = np.sum(dL_dK*dK_dpow)
self.power.gradient = grad self.power.gradient = grad
def update_gradients_diag(self, dL_dKdiag, X):
super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
self.power.gradient = 0.

View file

@ -49,6 +49,7 @@ class BayesianGPLVM(SparseGP):
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
self.parameters_changed()
def _getstate(self): def _getstate(self):
""" """
@ -66,7 +67,7 @@ class BayesianGPLVM(SparseGP):
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict) self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
# update for the KL divergence # update for the KL divergence
self.variational_prior.update_gradients_KL(self.X) self.variational_prior.update_gradients_KL(self.X)

View file

@ -6,26 +6,47 @@ import Tango
import pylab as pb import pylab as pb
import numpy as np import numpy as np
def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],axes=None,**kwargs): def ax_default(fignum, ax):
if axes is None: if ax is None:
axes = pb.gca() fig = pb.figure(fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
return fig, ax
def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw):
_, axes = ax_default(fignum, ax)
#here's the mean
return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],ax=None,fignum=None,xlabel='x',ylabel='y',**kwargs):
_, axes = ax_default(ax, fignum)
mu = mu.flatten() mu = mu.flatten()
x = x.flatten() x = x.flatten()
lower = lower.flatten() lower = lower.flatten()
upper = upper.flatten() upper = upper.flatten()
plots = []
#here's the mean #here's the mean
axes.plot(x,mu,color=edgecol,linewidth=2) plots.append(meanplot(x, mu, edgecol, axes))
#here's the box #here's the box
kwargs['linewidth']=0.5 kwargs['linewidth']=0.5
if not 'alpha' in kwargs.keys(): if not 'alpha' in kwargs.keys():
kwargs['alpha'] = 0.3 kwargs['alpha'] = 0.3
axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs) plots.append(axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs))
#this is the edge: #this is the edge:
axes.plot(x,upper,color=edgecol,linewidth=0.2) plots.append(meanplot(x, upper,color=edgecol,linewidth=0.2,axes=axes))
axes.plot(x,lower,color=edgecol,linewidth=0.2) plots.append(meanplot(x, lower,color=edgecol,linewidth=0.2,axes=axes))
axes.set_xlabel(xlabel)
axes.set_ylabel(ylabel)
return plots
def removeRightTicks(ax=None): def removeRightTicks(ax=None):
ax = ax or pb.gca() ax = ax or pb.gca()

View file

@ -2,6 +2,7 @@ import pylab as pb
import numpy as np import numpy as np
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
from ...util.misc import param_to_array from ...util.misc import param_to_array
from ...core.parameterization.variational import VariationalPosterior
from .base_plots import x_frame2D from .base_plots import x_frame2D
import itertools import itertools
import Tango import Tango
@ -19,7 +20,7 @@ def most_significant_input_dimensions(model, which_indices):
input_1, input_2 = 0, 1 input_1, input_2 = 0, 1
else: else:
try: try:
input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2] input_1, input_2 = np.argsort(model.kern.input_sensitivity())[::-1][:2]
except: except:
raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'" raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
else: else:
@ -43,26 +44,29 @@ def plot_latent(model, labels=None, which_indices=None,
labels = np.ones(model.num_data) labels = np.ones(model.num_data)
input_1, input_2 = most_significant_input_dimensions(model, which_indices) input_1, input_2 = most_significant_input_dimensions(model, which_indices)
X = param_to_array(model.X)
# first, plot the output variance as a function of the latent space #fethch the data points X that we'd like to plot
Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution) X = model.X
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1])) if isinstance(X, VariationalPosterior):
X = param_to_array(X.mean)
else:
X = param_to_array(X)
# create a function which computes the shading of latent space according to the output variance
def plot_function(x): def plot_function(x):
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
Xtest_full[:, [input_1, input_2]] = x Xtest_full[:, [input_1, input_2]] = x
mu, var, low, up = model.predict(Xtest_full) mu, var, low, up = model.predict(Xtest_full)
var = var[:, :1] var = var[:, :1]
return np.log(var) return np.log(var)
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
view = ImshowController(ax, plot_function, view = ImshowController(ax, plot_function,
tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)), tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)),
resolution, aspect=aspect, interpolation='bilinear', resolution, aspect=aspect, interpolation='bilinear',
cmap=pb.cm.binary) cmap=pb.cm.binary)
# ax.imshow(var.reshape(resolution, resolution).T,
# extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear', origin='lower')
# make sure labels are in order of input: # make sure labels are in order of input:
ulabels = [] ulabels = []
for lab in labels: for lab in labels:
@ -95,8 +99,8 @@ def plot_latent(model, labels=None, which_indices=None,
if not np.all(labels == 1.) and legend: if not np.all(labels == 1.) and legend:
ax.legend(loc=0, numpoints=1) ax.legend(loc=0, numpoints=1)
ax.set_xlim(xmin[0], xmax[0]) #ax.set_xlim(xmin[0], xmax[0])
ax.set_ylim(xmin[1], xmax[1]) #ax.set_ylim(xmin[1], xmax[1])
ax.grid(b=False) # remove the grid if present, it doesn't look good ax.grid(b=False) # remove the grid if present, it doesn't look good
ax.set_aspect('auto') # set a nice aspect ratio ax.set_aspect('auto') # set a nice aspect ratio

View file

@ -6,7 +6,7 @@ import pylab as pb
import Tango import Tango
from matplotlib.textpath import TextPath from matplotlib.textpath import TextPath
from matplotlib.transforms import offset_copy from matplotlib.transforms import offset_copy
from ...kern import Linear from .base_plots import ax_default
@ -52,11 +52,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
pass '' to not print a title pass '' to not print a title
pass None for a generic title pass None for a generic title
""" """
if ax is None: fig, ax = ax_default(fignum,ax)
fig = pb.figure(fignum)
ax = fig.add_subplot(111)
else:
fig = ax.figure
if title is None: if title is None:
ax.set_title('ARD parameters, %s kernel' % kernel.name) ax.set_title('ARD parameters, %s kernel' % kernel.name)
@ -70,13 +66,13 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
bottom = 0 bottom = 0
x = np.arange(kernel.input_dim) x = np.arange(kernel.input_dim)
for i in range(ard_params.shape[-1]): for i in range(ard_params.shape[0]):
c = Tango.nextMedium() c = Tango.nextMedium()
bars.append(plot_bars(fig, ax, x, ard_params[:,i], c, kernel._parameters_[i].name, bottom=bottom)) bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel._parameters_[i].name, bottom=bottom))
bottom += ard_params[:,i] bottom += ard_params[i,:]
ax.set_xlim(-.5, kernel.input_dim - .5) ax.set_xlim(-.5, kernel.input_dim - .5)
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[:,i]) add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[i,:])
if legend: if legend:
if title is '': if title is '':