mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 03:22:38 +02:00
Merge branch 'params' of github.com:SheffieldML/GPy into params
This commit is contained in:
commit
b6edc1a298
16 changed files with 306 additions and 426 deletions
|
|
@ -20,14 +20,13 @@ class Model(Parameterized):
|
||||||
self.optimization_runs = []
|
self.optimization_runs = []
|
||||||
self.sampling_runs = []
|
self.sampling_runs = []
|
||||||
self.preferred_optimizer = 'scg'
|
self.preferred_optimizer = 'scg'
|
||||||
# self._set_params(self._get_params()) has been taken out as it should only be called on leaf nodes
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
# def dK_d(self, param, dL_dK, X, X2)
|
|
||||||
g = np.zeros(self.size)
|
g = np.zeros(self.size)
|
||||||
try:
|
try:
|
||||||
# [g.__setitem__(s, self.gradient_mapping[p]().flat) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
|
|
||||||
[p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
|
[p._collect_gradient(g[s]) for p, s in itertools.izip(self._parameters_, self._param_slices_) if not p.is_fixed]
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name)
|
raise ValueError, 'Gradient for {} not defined, please specify gradients for parameters to optimize'.format(p.name)
|
||||||
|
|
@ -61,85 +60,6 @@ class Model(Parameterized):
|
||||||
self.priors = state.pop()
|
self.priors = state.pop()
|
||||||
Parameterized._setstate(self, state)
|
Parameterized._setstate(self, state)
|
||||||
|
|
||||||
# def set_prior(self, regexp, what):
|
|
||||||
# """
|
|
||||||
#
|
|
||||||
# Sets priors on the model parameters.
|
|
||||||
#
|
|
||||||
# **Notes**
|
|
||||||
#
|
|
||||||
# Asserts that the prior is suitable for the constraint. If the
|
|
||||||
# wrong constraint is in place, an error is raised. If no
|
|
||||||
# constraint is in place, one is added (warning printed).
|
|
||||||
#
|
|
||||||
# For tied parameters, the prior will only be "counted" once, thus
|
|
||||||
# a prior object is only inserted on the first tied index
|
|
||||||
#
|
|
||||||
# :param regexp: regular expression of parameters on which priors need to be set.
|
|
||||||
# :type param: string, regexp, or integer array
|
|
||||||
# :param what: prior to set on parameter.
|
|
||||||
# :type what: GPy.core.Prior type
|
|
||||||
#
|
|
||||||
# """
|
|
||||||
# if self.priors is None:
|
|
||||||
# self.priors = [None for i in range(self._get_params().size)]
|
|
||||||
#
|
|
||||||
# which = self.grep_param_names(regexp)
|
|
||||||
#
|
|
||||||
# # check tied situation
|
|
||||||
# tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
|
|
||||||
# if len(tie_partial_matches):
|
|
||||||
# raise ValueError, "cannot place prior across partial ties"
|
|
||||||
# tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
|
|
||||||
# if len(tie_matches) > 1:
|
|
||||||
# raise ValueError, "cannot place prior across multiple ties"
|
|
||||||
# elif len(tie_matches) == 1:
|
|
||||||
# which = which[:1] # just place a prior object on the first parameter
|
|
||||||
#
|
|
||||||
#
|
|
||||||
# # check constraints are okay
|
|
||||||
#
|
|
||||||
# if what.domain is _POSITIVE:
|
|
||||||
# constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain is _POSITIVE]
|
|
||||||
# if len(constrained_positive_indices):
|
|
||||||
# constrained_positive_indices = np.hstack(constrained_positive_indices)
|
|
||||||
# else:
|
|
||||||
# constrained_positive_indices = np.zeros(shape=(0,))
|
|
||||||
# bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
|
|
||||||
# assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
|
|
||||||
# unconst = np.setdiff1d(which, constrained_positive_indices)
|
|
||||||
# if len(unconst):
|
|
||||||
# print "Warning: constraining parameters to be positive:"
|
|
||||||
# print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
|
|
||||||
# print '\n'
|
|
||||||
# self.constrain_positive(unconst)
|
|
||||||
# elif what.domain is _REAL:
|
|
||||||
# assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
|
|
||||||
# else:
|
|
||||||
# raise ValueError, "prior not recognised"
|
|
||||||
#
|
|
||||||
# # store the prior in a local list
|
|
||||||
# for w in which:
|
|
||||||
# self.priors[w] = what
|
|
||||||
|
|
||||||
def get_gradient(self, name, return_names=False):
|
|
||||||
"""
|
|
||||||
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
|
||||||
|
|
||||||
:param name: the name of parameters required (as a regular expression).
|
|
||||||
:type name: regular expression
|
|
||||||
:param return_names: whether or not to return the names matched (default False)
|
|
||||||
:type return_names: bool
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(name)
|
|
||||||
if len(matches):
|
|
||||||
if return_names:
|
|
||||||
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
|
||||||
else:
|
|
||||||
return self._log_likelihood_gradients()[matches]
|
|
||||||
else:
|
|
||||||
raise AttributeError, "no parameter matches %s" % name
|
|
||||||
|
|
||||||
def randomize(self):
|
def randomize(self):
|
||||||
"""
|
"""
|
||||||
Randomize the model.
|
Randomize the model.
|
||||||
|
|
@ -183,7 +103,9 @@ class Model(Parameterized):
|
||||||
:param messages: whether to display during optimisation
|
:param messages: whether to display during optimisation
|
||||||
:type messages: bool
|
:type messages: bool
|
||||||
|
|
||||||
.. note:: If num_processes is None, the number of workes in the multiprocessing pool is automatically set to the number of processors on the current machine.
|
.. note:: If num_processes is None, the number of workes in the
|
||||||
|
multiprocessing pool is automatically set to the number of processors
|
||||||
|
on the current machine.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
initial_parameters = self._get_params_transformed()
|
initial_parameters = self._get_params_transformed()
|
||||||
|
|
@ -234,32 +156,10 @@ class Model(Parameterized):
|
||||||
'variance', 'lengthscale', 'precision' and 'kappa'. If any of
|
'variance', 'lengthscale', 'precision' and 'kappa'. If any of
|
||||||
these terms are present in the name the parameter is
|
these terms are present in the name the parameter is
|
||||||
constrained positive.
|
constrained positive.
|
||||||
|
|
||||||
|
DEPRECATED.
|
||||||
"""
|
"""
|
||||||
raise DeprecationWarning, 'parameters now have default constraints'
|
raise DeprecationWarning, 'parameters now have default constraints'
|
||||||
#positive_strings = ['variance', 'lengthscale', 'precision', 'kappa', 'sensitivity']
|
|
||||||
# param_names = self._get_param_names()
|
|
||||||
|
|
||||||
# for s in positive_strings:
|
|
||||||
# paramlist = self.grep_param_names(".*"+s)
|
|
||||||
# for param in paramlist:
|
|
||||||
# for p in param.flattened_parameters:
|
|
||||||
# rav_i = set(self._raveled_index_for(p))
|
|
||||||
# for constraint in self.constraints.iter_properties():
|
|
||||||
# rav_i -= set(self._constraint_indices(p, constraint))
|
|
||||||
# rav_i -= set(np.nonzero(self._fixes_for(p)!=UNFIXED)[0])
|
|
||||||
# ind = self._backtranslate_index(p, np.array(list(rav_i), dtype=int))
|
|
||||||
# if ind.size != 0:
|
|
||||||
# p[np.unravel_index(ind, p.shape)].constrain_positive(warning=warning)
|
|
||||||
# if paramlist:
|
|
||||||
# self.__getitem__(None, paramlist).constrain_positive(warning=warning)
|
|
||||||
# currently_constrained = self.all_constrained_indices()
|
|
||||||
# to_make_positive = []
|
|
||||||
# for s in positive_strings:
|
|
||||||
# for i in self.grep_param_names(".*" + s):
|
|
||||||
# if not (i in currently_constrained):
|
|
||||||
# to_make_positive.append(i)
|
|
||||||
# if len(to_make_positive):
|
|
||||||
# self.constrain_positive(np.asarray(to_make_positive), warning=warning)
|
|
||||||
|
|
||||||
def objective_function(self, x):
|
def objective_function(self, x):
|
||||||
"""
|
"""
|
||||||
|
|
@ -336,7 +236,9 @@ class Model(Parameterized):
|
||||||
:messages: whether to display during optimisation
|
:messages: whether to display during optimisation
|
||||||
:type messages: bool
|
:type messages: bool
|
||||||
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
|
:param optimizer: which optimizer to use (defaults to self.preferred optimizer)
|
||||||
:type optimizer: string TODO: valid strings?
|
:type optimizer: string
|
||||||
|
|
||||||
|
TODO: valid args
|
||||||
"""
|
"""
|
||||||
if optimizer is None:
|
if optimizer is None:
|
||||||
optimizer = self.preferred_optimizer
|
optimizer = self.preferred_optimizer
|
||||||
|
|
@ -473,81 +375,4 @@ class Model(Parameterized):
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def input_sensitivity(self):
|
|
||||||
"""
|
|
||||||
return an array describing the sesitivity of the model to each input
|
|
||||||
|
|
||||||
NB. Right now, we're basing this on the lengthscales (or
|
|
||||||
variances) of the kernel. TODO: proper sensitivity analysis
|
|
||||||
where we integrate across the model inputs and evaluate the
|
|
||||||
effect on the variance of the model output. """
|
|
||||||
|
|
||||||
if not hasattr(self, 'kern'):
|
|
||||||
raise ValueError, "this model has no kernel"
|
|
||||||
|
|
||||||
k = self.kern#[p for p in self.kern._parameters_ if hasattr(p, "ARD") and p.ARD]
|
|
||||||
from ..kern import RBF, Linear#, RBFInv
|
|
||||||
|
|
||||||
if isinstance(k, RBF):
|
|
||||||
return 1. / k.lengthscale
|
|
||||||
#elif isinstance(k, RBFInv):
|
|
||||||
# return k.inv_lengthscale
|
|
||||||
elif isinstance(k, Linear):
|
|
||||||
return k.variances
|
|
||||||
else:
|
|
||||||
raise ValueError, "cannot determine sensitivity for this kernel"
|
|
||||||
|
|
||||||
def pseudo_EM(self, stop_crit=.1, **kwargs):
|
|
||||||
"""
|
|
||||||
EM - like algorithm for Expectation Propagation and Laplace approximation
|
|
||||||
|
|
||||||
:param stop_crit: convergence criterion
|
|
||||||
:type stop_crit: float
|
|
||||||
|
|
||||||
.. Note: kwargs are passed to update_likelihood and optimize functions.
|
|
||||||
"""
|
|
||||||
assert isinstance(self.likelihood, likelihoods.EP) or isinstance(self.likelihood, likelihoods.EP_Mixed_Noise), "pseudo_EM is only available for EP likelihoods"
|
|
||||||
ll_change = stop_crit + 1.
|
|
||||||
iteration = 0
|
|
||||||
last_ll = -np.inf
|
|
||||||
|
|
||||||
convergence = False
|
|
||||||
alpha = 0
|
|
||||||
stop = False
|
|
||||||
|
|
||||||
# Handle **kwargs
|
|
||||||
ep_args = {}
|
|
||||||
for arg in kwargs.keys():
|
|
||||||
if arg in ('epsilon', 'power_ep'):
|
|
||||||
ep_args[arg] = kwargs[arg]
|
|
||||||
del kwargs[arg]
|
|
||||||
|
|
||||||
while not stop:
|
|
||||||
last_approximation = self.likelihood.copy()
|
|
||||||
last_params = self._get_params()
|
|
||||||
if len(ep_args) == 2:
|
|
||||||
self.update_likelihood_approximation(epsilon=ep_args['epsilon'], power_ep=ep_args['power_ep'])
|
|
||||||
elif len(ep_args) == 1:
|
|
||||||
if ep_args.keys()[0] == 'epsilon':
|
|
||||||
self.update_likelihood_approximation(epsilon=ep_args['epsilon'])
|
|
||||||
elif ep_args.keys()[0] == 'power_ep':
|
|
||||||
self.update_likelihood_approximation(power_ep=ep_args['power_ep'])
|
|
||||||
else:
|
|
||||||
self.update_likelihood_approximation()
|
|
||||||
new_ll = self.log_likelihood()
|
|
||||||
ll_change = new_ll - last_ll
|
|
||||||
|
|
||||||
if ll_change < 0:
|
|
||||||
self.likelihood = last_approximation # restore previous likelihood approximation
|
|
||||||
self._set_params(last_params) # restore model parameters
|
|
||||||
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
|
|
||||||
stop = True
|
|
||||||
else:
|
|
||||||
self.optimize(**kwargs)
|
|
||||||
last_ll = self.log_likelihood()
|
|
||||||
if ll_change < stop_crit:
|
|
||||||
stop = True
|
|
||||||
iteration += 1
|
|
||||||
if stop:
|
|
||||||
print "%s iterations." % iteration
|
|
||||||
self.update_likelihood_approximation()
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,7 @@ __precision__ = numpy.get_printoptions()['precision'] # numpy printing precision
|
||||||
__print_threshold__ = 5
|
__print_threshold__ = 5
|
||||||
######
|
######
|
||||||
|
|
||||||
class Param(Constrainable, ObservableArray, Gradcheckable, Indexable):
|
class Param(Constrainable, ObservableArray, Gradcheckable):
|
||||||
"""
|
"""
|
||||||
Parameter object for GPy models.
|
Parameter object for GPy models.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,10 +4,10 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
from domains import _POSITIVE,_NEGATIVE, _BOUNDED
|
||||||
import sys
|
|
||||||
import weakref
|
import weakref
|
||||||
|
|
||||||
_lim_val = -np.log(sys.float_info.epsilon)
|
_exp_lim_val = np.finfo(np.float64).max
|
||||||
|
_lim_val = np.log(_exp_lim_val)#-np.log(sys.float_info.epsilon)
|
||||||
|
|
||||||
#===============================================================================
|
#===============================================================================
|
||||||
# Fixing constants
|
# Fixing constants
|
||||||
|
|
@ -34,6 +34,16 @@ class Transformation(object):
|
||||||
def initialize(self, f):
|
def initialize(self, f):
|
||||||
""" produce a sensible initial value for f(x)"""
|
""" produce a sensible initial value for f(x)"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
def plot(self, xlabel=r'transformed $\theta$', ylabel=r'$\theta$', axes=None, *args,**kw):
|
||||||
|
import sys
|
||||||
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from ...plotting.matplot_dep import base_plots
|
||||||
|
x = np.linspace(-8,8)
|
||||||
|
base_plots.meanplot(x, self.f(x),axes=axes*args,**kw)
|
||||||
|
axes = plt.gca()
|
||||||
|
axes.set_xlabel(xlabel)
|
||||||
|
axes.set_ylabel(ylabel)
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
@ -55,6 +65,24 @@ class Logexp(Transformation):
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
return '+ve'
|
return '+ve'
|
||||||
|
|
||||||
|
|
||||||
|
class LogexpNeg(Transformation):
|
||||||
|
domain = _POSITIVE
|
||||||
|
def f(self, x):
|
||||||
|
return np.where(x>_lim_val, -x, -np.log(1. + np.exp(np.clip(x, -np.inf, _lim_val))))
|
||||||
|
#raises overflow warning: return np.where(x>_lim_val, x, np.log(1. + np.exp(x)))
|
||||||
|
def finv(self, f):
|
||||||
|
return np.where(f>_lim_val, 0, np.log(np.exp(-f) - 1.))
|
||||||
|
def gradfactor(self, f):
|
||||||
|
return np.where(f>_lim_val, -1, -1 + np.exp(-f))
|
||||||
|
def initialize(self, f):
|
||||||
|
if np.any(f < 0.):
|
||||||
|
print "Warning: changing parameters to satisfy constraints"
|
||||||
|
return np.abs(f)
|
||||||
|
def __str__(self):
|
||||||
|
return '+ve'
|
||||||
|
|
||||||
|
|
||||||
class NegativeLogexp(Transformation):
|
class NegativeLogexp(Transformation):
|
||||||
domain = _NEGATIVE
|
domain = _NEGATIVE
|
||||||
logexp = Logexp()
|
logexp = Logexp()
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,6 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from ..util.linalg import mdot
|
|
||||||
from gp import GP
|
from gp import GP
|
||||||
from parameterization.param import Param
|
from parameterization.param import Param
|
||||||
from ..inference.latent_function_inference import var_dtc
|
from ..inference.latent_function_inference import var_dtc
|
||||||
|
|
@ -32,7 +31,7 @@ class SparseGP(GP):
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, name='sparse gp'):
|
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None, X_variance=None, name='sparse gp'):
|
||||||
|
|
||||||
#pick a sensible inference method
|
#pick a sensible inference method
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
|
|
@ -58,11 +57,33 @@ class SparseGP(GP):
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self.inference_method.inference(self.kern, self.X, self.Z, self.likelihood, self.Y)
|
||||||
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
|
self.likelihood.update_gradients(self.grad_dict.pop('partial_for_likelihood'))
|
||||||
if isinstance(self.X, VariationalPosterior):
|
if isinstance(self.X, VariationalPosterior):
|
||||||
self.kern.update_gradients_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict)
|
#gradients wrt kernel
|
||||||
self.Z.gradient = self.kern.gradients_Z_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict)
|
dL_dKmm = self.grad_dict.pop('dL_dKmm')
|
||||||
|
self.kern.update_gradients_full(dL_dKmm, self.Z, None)
|
||||||
|
target = np.zeros(self.kern.size)
|
||||||
|
self.kern._collect_gradient(target)
|
||||||
|
self.kern.update_gradients_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
|
||||||
|
self.kern._collect_gradient(target)
|
||||||
|
self.kern._set_gradient(target)
|
||||||
|
|
||||||
|
#gradients wrt Z
|
||||||
|
self.Z.gradient = self.kern.gradients_X(dL_dKmm, self.Z)
|
||||||
|
self.Z.gradient += self.kern.gradients_Z_expectations(
|
||||||
|
self.grad_dict['dL_dpsi1'], self.grad_dict['dL_dpsi2'], Z=self.Z, variational_posterior=self.X)
|
||||||
else:
|
else:
|
||||||
self.kern.update_gradients_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
#gradients wrt kernel
|
||||||
self.Z.gradient = self.kern.gradients_Z_sparse(X=self.X, Z=self.Z, **self.grad_dict)
|
target = np.zeros(self.kern.size)
|
||||||
|
self.kern.update_gradients_diag(self.grad_dict['dL_dKdiag'], self.X)
|
||||||
|
self.kern._collect_gradient(target)
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKnm'], self.X, self.Z)
|
||||||
|
self.kern._collect_gradient(target)
|
||||||
|
self.kern.update_gradients_full(self.grad_dict['dL_dKmm'], self.Z, None)
|
||||||
|
self.kern._collect_gradient(target)
|
||||||
|
self.kern._set_gradient(target)
|
||||||
|
|
||||||
|
#gradients wrt Z
|
||||||
|
self.Z.gradient = self.kern.gradients_X(self.grad_dict['dL_dKmm'], self.Z)
|
||||||
|
self.Z.gradient += self.kern.gradients_X(self.grad_dict['dL_dKnm'].T, self.Z, self.X)
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
|
def _raw_predict(self, Xnew, X_variance_new=None, full_cov=False):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -16,16 +16,16 @@ def bgplvm_test_model(optimize=False, verbose=1, plot=False, output_dim=200, nan
|
||||||
output_dim = 1
|
output_dim = 1
|
||||||
input_dim = 3
|
input_dim = 3
|
||||||
else:
|
else:
|
||||||
input_dim = 1
|
input_dim = 2
|
||||||
output_dim = output_dim
|
output_dim = output_dim
|
||||||
|
|
||||||
# generate GPLVM-like data
|
# generate GPLVM-like data
|
||||||
X = _np.random.rand(num_inputs, input_dim)
|
X = _np.random.rand(num_inputs, input_dim)
|
||||||
#lengthscales = _np.random.rand(input_dim)
|
lengthscales = _np.random.rand(input_dim)
|
||||||
#k = (GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
|
k = GPy.kern.RBF(input_dim, .5, lengthscales, ARD=True)
|
||||||
##+ GPy.kern.white(input_dim, 0.01)
|
##+ GPy.kern.white(input_dim, 0.01)
|
||||||
#)
|
#)
|
||||||
k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
#k = GPy.kern.Linear(input_dim, ARD=1)# + GPy.kern.bias(input_dim) + GPy.kern.white(input_dim, 0.00001)
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
|
Y = _np.random.multivariate_normal(_np.zeros(num_inputs), K, (output_dim,)).T
|
||||||
|
|
||||||
|
|
@ -164,7 +164,7 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
|
||||||
_np.random.seed(0)
|
_np.random.seed(0)
|
||||||
data = GPy.util.datasets.oil()
|
data = GPy.util.datasets.oil()
|
||||||
|
|
||||||
kernel = GPy.kern.RBF(Q, 1., _np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
|
||||||
Y = data['X'][:N]
|
Y = data['X'][:N]
|
||||||
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
m = GPy.models.BayesianGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
|
||||||
m.data_labels = data['Y'][:N].argmax(axis=1)
|
m.data_labels = data['Y'][:N].argmax(axis=1)
|
||||||
|
|
@ -270,10 +270,11 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
from GPy.models import BayesianGPLVM
|
from GPy.models import BayesianGPLVM
|
||||||
|
|
||||||
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 3, 10
|
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9
|
||||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||||
Y = Ylist[0]
|
Y = Ylist[0]
|
||||||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||||
|
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
|
||||||
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
m = BayesianGPLVM(Y, Q, init="PCA", num_inducing=num_inducing, kernel=k)
|
||||||
|
|
||||||
if optimize:
|
if optimize:
|
||||||
|
|
@ -281,7 +282,7 @@ def bgplvm_simulation(optimize=True, verbose=1,
|
||||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.q.plot("BGPLVM Latent Space 1D")
|
m.X.plot("BGPLVM Latent Space 1D")
|
||||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
@ -293,7 +294,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
from GPy.models import BayesianGPLVM
|
from GPy.models import BayesianGPLVM
|
||||||
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
from GPy.inference.latent_function_inference.var_dtc import VarDTCMissingData
|
||||||
|
|
||||||
D1, D2, D3, N, num_inducing, Q = 15, 5, 8, 30, 5, 9
|
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 5, 9
|
||||||
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
|
||||||
Y = Ylist[0]
|
Y = Ylist[0]
|
||||||
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
k = kern.Linear(Q, ARD=True)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
|
||||||
|
|
@ -302,7 +303,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
m = BayesianGPLVM(Y.copy(), Q, init="random", num_inducing=num_inducing, kernel=k)
|
||||||
m.inference_method = VarDTCMissingData()
|
m.inference_method = VarDTCMissingData()
|
||||||
m.Y[inan] = _np.nan
|
m.Y[inan] = _np.nan
|
||||||
m.q.variance *= .1
|
m.X.variance *= .1
|
||||||
m.parameters_changed()
|
m.parameters_changed()
|
||||||
m.Yreal = Y
|
m.Yreal = Y
|
||||||
|
|
||||||
|
|
@ -311,7 +312,7 @@ def bgplvm_simulation_missing_data(optimize=True, verbose=1,
|
||||||
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
m.optimize('bfgs', messages=verbose, max_iters=max_iters,
|
||||||
gtol=.05)
|
gtol=.05)
|
||||||
if plot:
|
if plot:
|
||||||
m.q.plot("BGPLVM Latent Space 1D")
|
m.X.plot("BGPLVM Latent Space 1D")
|
||||||
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -204,15 +204,14 @@ class VarDTCMissingData(object):
|
||||||
def inference(self, kern, X, Z, likelihood, Y):
|
def inference(self, kern, X, Z, likelihood, Y):
|
||||||
if isinstance(X, VariationalPosterior):
|
if isinstance(X, VariationalPosterior):
|
||||||
uncertain_inputs = True
|
uncertain_inputs = True
|
||||||
psi0 = kern.psi0(Z, X)
|
psi0_all = kern.psi0(Z, X)
|
||||||
psi1 = kern.psi1(Z, X)
|
psi1_all = kern.psi1(Z, X)
|
||||||
psi2 = kern.psi2(Z, X)
|
psi2_all = kern.psi2(Z, X)
|
||||||
else:
|
else:
|
||||||
uncertain_inputs = False
|
uncertain_inputs = False
|
||||||
psi0 = kern.Kdiag(X)
|
psi0_all = kern.Kdiag(X)
|
||||||
psi1 = kern.K(X, Z)
|
psi1_all = kern.K(X, Z)
|
||||||
psi2 = None
|
psi2_all = None
|
||||||
|
|
||||||
|
|
||||||
Ys, traces = self._Y(Y)
|
Ys, traces = self._Y(Y)
|
||||||
beta_all = 1./likelihood.variance
|
beta_all = 1./likelihood.variance
|
||||||
|
|
|
||||||
|
|
@ -101,7 +101,7 @@ class Add(Kern):
|
||||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
return psi2
|
return psi2
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
from white import White
|
from white import White
|
||||||
from rbf import RBF
|
from rbf import RBF
|
||||||
#from rbf_inv import RBFInv
|
#from rbf_inv import RBFInv
|
||||||
|
|
@ -124,10 +124,10 @@ class Add(Kern):
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
||||||
|
|
||||||
|
|
||||||
p1.update_gradients_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
||||||
|
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
from white import White
|
from white import White
|
||||||
from rbf import RBF
|
from rbf import RBF
|
||||||
#from rbf_inv import rbfinv
|
#from rbf_inv import rbfinv
|
||||||
|
|
@ -151,10 +151,10 @@ class Add(Kern):
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z[:,is2], mu[:,is2], S[:,is2]) * 2.
|
||||||
|
|
||||||
|
|
||||||
target += p1.gradients_z_variational(dL_dKmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
target += p1.gradients_z_variational(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], S[:,is1], Z[:,is1])
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, mu, S, Z):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
from white import white
|
from white import white
|
||||||
from rbf import rbf
|
from rbf import rbf
|
||||||
#from rbf_inv import rbfinv
|
#from rbf_inv import rbfinv
|
||||||
|
|
@ -179,7 +179,7 @@ class Add(Kern):
|
||||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(z[:,is2], mu[:,is2], s[:,is2]) * 2.
|
||||||
|
|
||||||
|
|
||||||
a, b = p1.gradients_muS_variational(dL_dkmm, dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, mu[:,is1], s[:,is1], z[:,is1])
|
||||||
target_mu += a
|
target_mu += a
|
||||||
target_S += b
|
target_S += b
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
@ -193,9 +193,9 @@ class Add(Kern):
|
||||||
kernel_plots.plot(self,*args)
|
kernel_plots.plot(self,*args)
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
in_sen = np.zeros((self.input_dim, self.num_params))
|
in_sen = np.zeros((self.num_params, self.input_dim))
|
||||||
for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)):
|
for i, [p, i_s] in enumerate(zip(self._parameters_, self.input_slices)):
|
||||||
in_sen[i_s, i] = p.input_sensitivity()
|
in_sen[i, i_s] = p.input_sensitivity()
|
||||||
return in_sen
|
return in_sen
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
|
|
|
||||||
|
|
@ -26,45 +26,53 @@ class Kern(Parameterized):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def Kdiag(self, Xa):
|
def Kdiag(self, Xa):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def psi0(self,Z,variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def psi1(self,Z,variational_posterior):
|
def psi1(self, Z, variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def psi2(self,Z,variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X(self, dL_dK, X, X2):
|
def gradients_X(self, dL_dK, X, X2):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_X_diag(self, dL_dK, X):
|
def gradients_X_diag(self, dL_dK, X):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def update_gradients_full(self, dL_dK, X, X2):
|
def update_gradients_full(self, dL_dK, X, X2):
|
||||||
"""Set the gradients of all parameters when doing full (N) inference."""
|
"""Set the gradients of all parameters when doing full (N) inference."""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
target = np.zeros(self.size)
|
|
||||||
self.update_gradients_diag(dL_dKdiag, X)
|
|
||||||
self._collect_gradient(target)
|
|
||||||
self.update_gradients_full(dL_dKnm, X, Z)
|
|
||||||
self._collect_gradient(target)
|
|
||||||
self.update_gradients_full(dL_dKmm, Z, None)
|
|
||||||
self._collect_gradient(target)
|
|
||||||
self._set_gradient(target)
|
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
"""Set the gradients of all parameters when doing variational (M) inference with uncertain inputs."""
|
"""
|
||||||
|
Set the gradients of all parameters when doing inference with
|
||||||
|
uncertain inputs, using expectations of the kernel.
|
||||||
|
|
||||||
|
The esential maths is
|
||||||
|
|
||||||
|
dL_d{theta_i} = dL_dpsi0 * dpsi0_d{theta_i} +
|
||||||
|
dL_dpsi1 * dpsi1_d{theta_i} +
|
||||||
|
dL_dpsi2 * dpsi2_d{theta_i}
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_Z_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
|
||||||
grad = self.gradients_X(dL_dKmm, Z)
|
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
grad += self.gradients_X(dL_dKnm.T, Z, X)
|
"""
|
||||||
return grad
|
Returns the derivative of the objective wrt Z, using the chain rule
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
through the expectation variables.
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
|
||||||
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
|
"""
|
||||||
|
Compute the gradients wrt the parameters of the variational
|
||||||
|
distruibution q(X), chain-ruling via the expectations of the kernel
|
||||||
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def plot_ARD(self, *args, **kw):
|
def plot_ARD(self, *args, **kw):
|
||||||
if "matplotlib" in sys.modules:
|
"""
|
||||||
from ...plotting.matplot_dep import kernel_plots
|
See :class:`~GPy.plotting.matplot_dep.kernel_plots`
|
||||||
self.plot_ARD.__doc__ += kernel_plots.plot_ARD.__doc__
|
"""
|
||||||
|
import sys
|
||||||
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
|
||||||
from ...plotting.matplot_dep import kernel_plots
|
from ...plotting.matplot_dep import kernel_plots
|
||||||
return kernel_plots.plot_ARD(self,*args,**kw)
|
return kernel_plots.plot_ARD(self,*args,**kw)
|
||||||
|
|
@ -113,7 +121,8 @@ class Kern(Parameterized):
|
||||||
|
|
||||||
def prod(self, other, tensor=False):
|
def prod(self, other, tensor=False):
|
||||||
"""
|
"""
|
||||||
Multiply two kernels (either on the same space, or on the tensor product of the input space).
|
Multiply two kernels (either on the same space, or on the tensor
|
||||||
|
product of the input space).
|
||||||
|
|
||||||
:param other: the other kernel to be added
|
:param other: the other kernel to be added
|
||||||
:type other: GPy.kern
|
:type other: GPy.kern
|
||||||
|
|
|
||||||
|
|
@ -22,22 +22,25 @@ class Linear(Kern):
|
||||||
:param input_dim: the number of input dimensions
|
:param input_dim: the number of input dimensions
|
||||||
:type input_dim: int
|
:type input_dim: int
|
||||||
:param variances: the vector of variances :math:`\sigma^2_i`
|
:param variances: the vector of variances :math:`\sigma^2_i`
|
||||||
:type variances: array or list of the appropriate size (or float if there is only one variance parameter)
|
:type variances: array or list of the appropriate size (or float if there
|
||||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel has only one variance parameter \sigma^2, otherwise there is one variance parameter per dimension.
|
is only one variance parameter)
|
||||||
|
:param ARD: Auto Relevance Determination. If False, the kernel has only one
|
||||||
|
variance parameter \sigma^2, otherwise there is one variance
|
||||||
|
parameter per dimension.
|
||||||
:type ARD: Boolean
|
:type ARD: Boolean
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
|
def __init__(self, input_dim, variances=None, ARD=False, name='linear'):
|
||||||
super(Linear, self).__init__(input_dim, name)
|
super(Linear, self).__init__(input_dim, name)
|
||||||
self.ARD = ARD
|
self.ARD = ARD
|
||||||
if ARD == False:
|
if not ARD:
|
||||||
if variances is not None:
|
if variances is not None:
|
||||||
variances = np.asarray(variances)
|
variances = np.asarray(variances)
|
||||||
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
|
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
|
||||||
else:
|
else:
|
||||||
variances = np.ones(1)
|
variances = np.ones(1)
|
||||||
self._Xcache, self._X2cache = np.empty(shape=(2,))
|
|
||||||
else:
|
else:
|
||||||
if variances is not None:
|
if variances is not None:
|
||||||
variances = np.asarray(variances)
|
variances = np.asarray(variances)
|
||||||
|
|
@ -103,7 +106,6 @@ class Linear(Kern):
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
# variational #
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
|
|
@ -117,33 +119,26 @@ class Linear(Kern):
|
||||||
ZAinner = self._ZAinner(variational_posterior, Z)
|
ZAinner = self._ZAinner(variational_posterior, Z)
|
||||||
return np.dot(ZAinner, ZA.T)
|
return np.dot(ZAinner, ZA.T)
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
mu, S = variational_posterior.mean, variational_posterior.variance
|
#psi1
|
||||||
|
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
|
||||||
# psi0:
|
# psi0:
|
||||||
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
|
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
|
||||||
if self.ARD: grad = tmp.sum(0)
|
if self.ARD: self.variances.gradient += tmp.sum(0)
|
||||||
else: grad = np.atleast_1d(tmp.sum())
|
else: self.variances.gradient += tmp.sum()
|
||||||
#psi1
|
|
||||||
self.update_gradients_full(dL_dpsi1, mu, Z)
|
|
||||||
grad += self.variances.gradient
|
|
||||||
#psi2
|
#psi2
|
||||||
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
|
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * (2. * Z)[None, None, :, :])
|
||||||
if self.ARD: grad += tmp.sum(0).sum(0).sum(0)
|
if self.ARD: self.variances.gradient += tmp.sum(0).sum(0).sum(0)
|
||||||
else: grad += tmp.sum()
|
else: self.variances.gradient += tmp.sum()
|
||||||
#from Kmm
|
|
||||||
self.update_gradients_full(dL_dKmm, Z, None)
|
|
||||||
self.variances.gradient += grad
|
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
|
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
# Kmm
|
|
||||||
grad = self.gradients_X(dL_dKmm, Z, None)
|
|
||||||
#psi1
|
#psi1
|
||||||
grad += self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
|
grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
|
||||||
#psi2
|
#psi2
|
||||||
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
|
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, variational_posterior, Z):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
|
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
|
||||||
# psi0
|
# psi0
|
||||||
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
|
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
|
||||||
|
|
@ -160,7 +155,7 @@ class Linear(Kern):
|
||||||
#--------------------------------------------------#
|
#--------------------------------------------------#
|
||||||
|
|
||||||
|
|
||||||
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, pv, target_mu, target_S):
|
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, vp, target_mu, target_S):
|
||||||
# Think N,num_inducing,num_inducing,input_dim
|
# Think N,num_inducing,num_inducing,input_dim
|
||||||
ZA = Z * self.variances
|
ZA = Z * self.variances
|
||||||
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
|
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
|
||||||
|
|
@ -203,16 +198,15 @@ class Linear(Kern):
|
||||||
weave_options = {'headers' : ['<omp.h>'],
|
weave_options = {'headers' : ['<omp.h>'],
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
'extra_link_args' : ['-lgomp']}
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
mu = vp.mean
|
||||||
mu = pv.mean
|
|
||||||
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, pv, target):
|
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
|
||||||
AZA = self.variances*self._ZAinner(pv, Z)
|
AZA = self.variances*self._ZAinner(vp, Z)
|
||||||
code="""
|
code="""
|
||||||
int n,m,mm,q;
|
int n,m,mm,q;
|
||||||
#pragma omp parallel for private(n,mm,q)
|
#pragma omp parallel for private(n,mm,q)
|
||||||
|
|
@ -234,23 +228,23 @@ class Linear(Kern):
|
||||||
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
'extra_link_args' : ['-lgomp']}
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
|
||||||
N,num_inducing,input_dim = pv.mean.shape[0],Z.shape[0],pv.mean.shape[1]
|
N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
|
||||||
mu = param_to_array(pv.mean)
|
mu = param_to_array(vp.mean)
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
|
||||||
type_converters=weave.converters.blitz,**weave_options)
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
def _mu2S(self, pv):
|
def _mu2S(self, vp):
|
||||||
return np.square(pv.mean) + pv.variance
|
return np.square(vp.mean) + vp.variance
|
||||||
|
|
||||||
def _ZAinner(self, pv, Z):
|
def _ZAinner(self, vp, Z):
|
||||||
ZA = Z*self.variances
|
ZA = Z*self.variances
|
||||||
inner = (pv.mean[:, None, :] * pv.mean[:, :, None])
|
inner = (vp.mean[:, None, :] * vp.mean[:, :, None])
|
||||||
diag_indices = np.diag_indices(pv.mean.shape[1], 2)
|
diag_indices = np.diag_indices(vp.mean.shape[1], 2)
|
||||||
inner[:, diag_indices[0], diag_indices[1]] += pv.variance
|
inner[:, diag_indices[0], diag_indices[1]] += vp.variance
|
||||||
|
|
||||||
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x N x input_dim]!
|
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
if self.ARD: return self.variances
|
if self.ARD: return self.variances
|
||||||
|
|
|
||||||
|
|
@ -35,92 +35,80 @@ class RBF(Stationary):
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def parameters_changed(self):
|
|
||||||
# reset cached results
|
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
|
||||||
|
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
return self.Kdiag(variational_posterior.mean)
|
return self.Kdiag(variational_posterior.mean)
|
||||||
|
|
||||||
def psi1(self, Z, variational_posterior):
|
def psi1(self, Z, variational_posterior):
|
||||||
mu = variational_posterior.mean
|
_, _, _, psi1 = self._psi1computations(Z, variational_posterior)
|
||||||
S = variational_posterior.variance
|
return psi1
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
return self._psi1
|
|
||||||
|
|
||||||
def psi2(self, Z, variational_posterior):
|
def psi2(self, Z, variational_posterior):
|
||||||
mu = variational_posterior.mean
|
_, _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
|
||||||
S = variational_posterior.variance
|
return psi2
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
return self._psi2
|
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
#contributions from Kmm
|
|
||||||
sself.update_gradients_full(dL_dKmm, Z)
|
|
||||||
|
|
||||||
mu = variational_posterior.mean
|
|
||||||
S = variational_posterior.variance
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
l2 = self.lengthscale **2
|
l2 = self.lengthscale **2
|
||||||
|
|
||||||
#contributions from psi0:
|
#contributions from psi0:
|
||||||
self.variance.gradient += np.sum(dL_dpsi0)
|
self.variance.gradient = np.sum(dL_dpsi0)
|
||||||
|
self.lengthscale.gradient = 0.
|
||||||
|
|
||||||
#from psi1
|
#from psi1
|
||||||
self.variance.gradient += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
|
||||||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
|
||||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||||
if not self.ARD:
|
if not self.ARD:
|
||||||
self.lengthscale.gradient += dpsi1_dlength.sum()
|
self.lengthscale.gradient += dpsi1_dlength.sum()
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
|
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
|
||||||
|
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
|
||||||
|
|
||||||
#from psi2
|
#from psi2
|
||||||
d_var = 2.*self._psi2 / self.variance
|
S = variational_posterior.variance
|
||||||
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * self._psi2_denom)
|
denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
|
||||||
|
d_length = 2.*psi2[:, :, :, None] * (Zdist_sq[None, :,:,:] * denom[:,None,None,:] + mudist_sq + S[:, None, None, :] / l2) / (self.lengthscale * denom[:,None,None,:])
|
||||||
|
#TODO: combine denom and l2 as denom_l2??
|
||||||
|
#TODO: tidy the above!
|
||||||
|
#TODO: tensordot below?
|
||||||
|
|
||||||
self.variance.gradient += np.sum(dL_dpsi2 * d_var)
|
|
||||||
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
||||||
if not self.ARD:
|
if not self.ARD:
|
||||||
self.lengthscale.gradient += dpsi2_dlength.sum()
|
self.lengthscale.gradient += dpsi2_dlength.sum()
|
||||||
else:
|
else:
|
||||||
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
|
self.lengthscale.gradient += dpsi2_dlength.sum(0).sum(0).sum(0)
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
|
||||||
mu = variational_posterior.mean
|
|
||||||
S = variational_posterior.variance
|
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
l2 = self.lengthscale **2
|
l2 = self.lengthscale **2
|
||||||
|
|
||||||
#psi1
|
#psi1
|
||||||
denominator = (l2 * (self._psi1_denom))
|
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
|
||||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
denominator = l2 * denom
|
||||||
|
dpsi1_dZ = -psi1[:, :, None] * (dist / denominator)
|
||||||
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
grad = np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||||
|
|
||||||
#psi2
|
#psi2
|
||||||
term1 = self._psi2_Zdist / l2 # num_inducing, num_inducing, input_dim
|
denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
|
||||||
term2 = self._psi2_mudist / self._psi2_denom / l2 # N, num_inducing, num_inducing, input_dim
|
term1 = Zdist / l2 # M, M, Q
|
||||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
term2 = mudist / denom[:,None,None,:] / l2 # N, M, M, Q
|
||||||
|
dZ = psi2[:, :, :, None] * (term1[None, :, :, :] + term2) #N,M,M,Q
|
||||||
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
grad += 2*(dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||||
|
|
||||||
grad += self.gradients_X(dL_dKmm, Z, None)
|
|
||||||
|
|
||||||
return grad
|
return grad
|
||||||
|
|
||||||
def gradients_q_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
mu = variational_posterior.mean
|
|
||||||
S = variational_posterior.variance
|
|
||||||
self._psi_computations(Z, mu, S)
|
|
||||||
l2 = self.lengthscale **2
|
l2 = self.lengthscale **2
|
||||||
#psi1
|
#psi1
|
||||||
tmp = self._psi1[:, :, None] / l2 / self._psi1_denom
|
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
|
||||||
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
tmp = psi1[:, :, None] / l2 / denom
|
||||||
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
|
||||||
|
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
|
||||||
#psi2
|
#psi2
|
||||||
tmp = self._psi2[:, :, :, None] / l2 / self._psi2_denom
|
denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
|
||||||
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
tmp = psi2[:, :, :, None] / l2 / denom[:,None,None,:]
|
||||||
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
grad_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * mudist).sum(1).sum(1)
|
||||||
|
grad_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*mudist_sq - 1)).sum(1).sum(1)
|
||||||
|
|
||||||
return grad_mu, grad_S
|
return grad_mu, grad_S
|
||||||
|
|
||||||
|
|
@ -182,83 +170,72 @@ class RBF(Stationary):
|
||||||
return target
|
return target
|
||||||
|
|
||||||
|
|
||||||
|
#@cache_this TODO
|
||||||
|
def _psi1computations(self, Z, vp):
|
||||||
|
mu, S = vp.mean, vp.variance
|
||||||
|
l2 = self.lengthscale **2
|
||||||
|
denom = S[:, None, :] / l2 + 1. # N,1,Q
|
||||||
|
dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
|
||||||
|
dist_sq = np.square(dist) / l2 / denom # N,M,Q
|
||||||
|
exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
|
||||||
|
psi1 = self.variance * np.exp(exponent) # N,M
|
||||||
|
return denom, dist, dist_sq, psi1
|
||||||
|
|
||||||
def _psi_computations(self, Z, mu, S):
|
|
||||||
# here are the "statistics" for psi1 and psi2
|
|
||||||
Z_changed = not fast_array_equal(Z, self._Z)
|
|
||||||
if Z_changed:
|
|
||||||
# Z has changed, compute Z specific stuff
|
|
||||||
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
|
||||||
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
|
||||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
|
|
||||||
|
|
||||||
if Z_changed or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
|
|
||||||
# something's changed. recompute EVERYTHING
|
|
||||||
l2 = self.lengthscale **2
|
|
||||||
|
|
||||||
# psi1
|
#@cache_this TODO
|
||||||
self._psi1_denom = S[:, None, :] / l2 + 1.
|
def _psi2computations(self, Z, vp):
|
||||||
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
|
mu, S = vp.mean, vp.variance
|
||||||
self._psi1_dist_sq = np.square(self._psi1_dist) / l2 / self._psi1_denom
|
|
||||||
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
|
|
||||||
self._psi1 = self.variance * np.exp(self._psi1_exponent)
|
|
||||||
|
|
||||||
# psi2
|
N, Q = mu.shape
|
||||||
self._psi2_denom = 2.*S[:, None, None, :] / l2 + 1. # N,M,M,Q
|
M = Z.shape[0]
|
||||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
|
|
||||||
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
|
||||||
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(l2*self._psi2_denom)
|
|
||||||
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
|
||||||
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
|
|
||||||
|
|
||||||
# store matrices for caching
|
#compute required distances
|
||||||
self._Z, self._mu, self._S = Z, mu, S
|
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||||
|
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||||
|
Zdist_sq = np.square(Zdist / self.lengthscale) # M,M,Q
|
||||||
|
|
||||||
def weave_psi2(self, mu, Zhat):
|
#allocate memory for the things we want to compute
|
||||||
N, input_dim = mu.shape
|
mudist = np.empty((N, M, M, Q))
|
||||||
num_inducing = Zhat.shape[0]
|
mudist_sq = np.empty((N, M, M, Q))
|
||||||
|
psi2 = np.empty((N, M, M))
|
||||||
|
|
||||||
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
|
l2 = self.lengthscale **2
|
||||||
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
|
denom = 2.*S / l2 + 1. # N,Q
|
||||||
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
|
half_log_denom = 0.5 * np.log(denom)
|
||||||
psi2 = np.empty((N, num_inducing, num_inducing))
|
denom_l2 = denom*l2 # TODO: Max and James: divide??
|
||||||
|
|
||||||
psi2_Zdist_sq = self._psi2_Zdist_sq
|
variance_sq = float(np.square(self.variance))
|
||||||
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
|
||||||
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
|
||||||
variance_sq = np.float64(np.square(self.variance))
|
|
||||||
if self.ARD:
|
|
||||||
lengthscale2 = self.lengthscale **2
|
|
||||||
else:
|
|
||||||
lengthscale2 = np.ones(input_dim) * self.lengthscale2**2
|
|
||||||
code = """
|
code = """
|
||||||
double tmp;
|
double tmp;
|
||||||
|
double exponent;
|
||||||
|
|
||||||
#pragma omp parallel for private(tmp)
|
#pragma omp parallel for private(tmp, exponentgg)
|
||||||
for (int n=0; n<N; n++){
|
for (int n=0; n<N; n++){
|
||||||
for (int m=0; m<num_inducing; m++){
|
for (int m=0; m<M; m++){
|
||||||
for (int mm=0; mm<(m+1); mm++){
|
for (int mm=0; mm<(m+1); mm++){
|
||||||
for (int q=0; q<input_dim; q++){
|
exponent = 0;
|
||||||
|
for (int q=0; q<Q; q++){
|
||||||
//compute mudist
|
//compute mudist
|
||||||
tmp = mu(n,q) - Zhat(m,mm,q);
|
tmp = mu(n,q) - Zhat(m,mm,q);
|
||||||
mudist(n,m,mm,q) = tmp;
|
mudist(n,m,mm,q) = tmp;
|
||||||
mudist(n,mm,m,q) = tmp;
|
mudist(n,mm,m,q) = tmp;
|
||||||
|
|
||||||
//now mudist_sq
|
//now mudist_sq
|
||||||
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
|
tmp = tmp*tmp/denom_l2(n,q);
|
||||||
mudist_sq(n,m,mm,q) = tmp;
|
mudist_sq(n,m,mm,q) = tmp;
|
||||||
mudist_sq(n,mm,m,q) = tmp;
|
mudist_sq(n,mm,m,q) = tmp;
|
||||||
|
|
||||||
//now psi2_exponent
|
//now exponent
|
||||||
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
|
tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
|
||||||
psi2_exponent(n,mm,m) += tmp;
|
exponent += tmp;
|
||||||
if (m !=mm){
|
if (m !=mm){
|
||||||
psi2_exponent(n,m,mm) += tmp;
|
exponent += tmp;
|
||||||
}
|
}
|
||||||
//psi2 would be computed like this, but np is faster
|
//compute psi2 by exponentiating
|
||||||
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
|
tmp = variance_sq*exp(exponent);
|
||||||
//psi2(n,m,mm) = tmp;
|
psi2(n,m,mm) = tmp;
|
||||||
//psi2(n,mm,m) = tmp;
|
psi2(n,mm,m) = tmp;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -272,10 +249,10 @@ class RBF(Stationary):
|
||||||
"""
|
"""
|
||||||
mu = param_to_array(mu)
|
mu = param_to_array(mu)
|
||||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
|
||||||
type_converters=weave.converters.blitz, **self.weave_options)
|
type_converters=weave.converters.blitz, **self.weave_options)
|
||||||
|
|
||||||
return mudist, mudist_sq, psi2_exponent, psi2
|
return denom, Zdist, Zdist_sq, mudist, mudist_sq, psi2
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
if self.ARD: return 1./self.lengthscale
|
if self.ARD: return 1./self.lengthscale
|
||||||
|
|
|
||||||
|
|
@ -25,10 +25,10 @@ class Static(Kern):
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
return np.zeros(X.shape)
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
def gradients_Z_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
return np.zeros(Z.shape)
|
return np.zeros(Z.shape)
|
||||||
|
|
||||||
def gradients_muS_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
|
return np.zeros(variational_posterior.shape), np.zeros(variational_posterior.shape)
|
||||||
|
|
||||||
def psi0(self, Z, variational_posterior):
|
def psi0(self, Z, variational_posterior):
|
||||||
|
|
@ -61,8 +61,8 @@ class White(Static):
|
||||||
def update_gradients_diag(self, dL_dKdiag, X):
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
self.variance.gradient = dL_dKdiag.sum()
|
self.variance.gradient = dL_dKdiag.sum()
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self.variance.gradient = np.trace(dL_dKmm) + dL_dpsi0.sum()
|
self.variance.gradient = dL_dpsi0.sum()
|
||||||
|
|
||||||
|
|
||||||
class Bias(Static):
|
class Bias(Static):
|
||||||
|
|
@ -86,6 +86,6 @@ class Bias(Static):
|
||||||
ret[:] = self.variance**2
|
ret[:] = self.variance**2
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def update_gradients_variational(self, dL_dKmm, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self.variance.gradient = dL_dKmm.sum() + dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
|
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -312,4 +312,8 @@ class RatQuad(Stationary):
|
||||||
grad = np.sum(dL_dK*dK_dpow)
|
grad = np.sum(dL_dK*dK_dpow)
|
||||||
self.power.gradient = grad
|
self.power.gradient = grad
|
||||||
|
|
||||||
|
def update_gradients_diag(self, dL_dKdiag, X):
|
||||||
|
super(RatQuad, self).update_gradients_diag(dL_dKdiag, X)
|
||||||
|
self.power.gradient = 0.
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,7 @@ class BayesianGPLVM(SparseGP):
|
||||||
|
|
||||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||||
self.add_parameter(self.X, index=0)
|
self.add_parameter(self.X, index=0)
|
||||||
|
self.parameters_changed()
|
||||||
|
|
||||||
def _getstate(self):
|
def _getstate(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -66,7 +67,7 @@ class BayesianGPLVM(SparseGP):
|
||||||
super(BayesianGPLVM, self).parameters_changed()
|
super(BayesianGPLVM, self).parameters_changed()
|
||||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||||
|
|
||||||
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_q_variational(posterior_variational=self.X, Z=self.Z, **self.grad_dict)
|
self.X.mean.gradient, self.X.variance.gradient = self.kern.gradients_qX_expectations(variational_posterior=self.X, Z=self.Z, **self.grad_dict)
|
||||||
|
|
||||||
# update for the KL divergence
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X)
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
|
|
|
||||||
|
|
@ -6,26 +6,47 @@ import Tango
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],axes=None,**kwargs):
|
def ax_default(fignum, ax):
|
||||||
if axes is None:
|
if ax is None:
|
||||||
axes = pb.gca()
|
fig = pb.figure(fignum)
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
else:
|
||||||
|
fig = ax.figure
|
||||||
|
return fig, ax
|
||||||
|
|
||||||
|
def meanplot(x, mu, color=Tango.colorsHex['darkBlue'], ax=None, fignum=None, linewidth=2,**kw):
|
||||||
|
_, axes = ax_default(fignum, ax)
|
||||||
|
#here's the mean
|
||||||
|
return axes.plot(x,mu,color=color,linewidth=linewidth,**kw)
|
||||||
|
|
||||||
|
def gpplot(x,mu,lower,upper,edgecol=Tango.colorsHex['darkBlue'],fillcol=Tango.colorsHex['lightBlue'],ax=None,fignum=None,xlabel='x',ylabel='y',**kwargs):
|
||||||
|
_, axes = ax_default(ax, fignum)
|
||||||
|
|
||||||
mu = mu.flatten()
|
mu = mu.flatten()
|
||||||
x = x.flatten()
|
x = x.flatten()
|
||||||
lower = lower.flatten()
|
lower = lower.flatten()
|
||||||
upper = upper.flatten()
|
upper = upper.flatten()
|
||||||
|
|
||||||
|
plots = []
|
||||||
|
|
||||||
#here's the mean
|
#here's the mean
|
||||||
axes.plot(x,mu,color=edgecol,linewidth=2)
|
plots.append(meanplot(x, mu, edgecol, axes))
|
||||||
|
|
||||||
#here's the box
|
#here's the box
|
||||||
kwargs['linewidth']=0.5
|
kwargs['linewidth']=0.5
|
||||||
if not 'alpha' in kwargs.keys():
|
if not 'alpha' in kwargs.keys():
|
||||||
kwargs['alpha'] = 0.3
|
kwargs['alpha'] = 0.3
|
||||||
axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs)
|
plots.append(axes.fill(np.hstack((x,x[::-1])),np.hstack((upper,lower[::-1])),color=fillcol,**kwargs))
|
||||||
|
|
||||||
#this is the edge:
|
#this is the edge:
|
||||||
axes.plot(x,upper,color=edgecol,linewidth=0.2)
|
plots.append(meanplot(x, upper,color=edgecol,linewidth=0.2,axes=axes))
|
||||||
axes.plot(x,lower,color=edgecol,linewidth=0.2)
|
plots.append(meanplot(x, lower,color=edgecol,linewidth=0.2,axes=axes))
|
||||||
|
|
||||||
|
axes.set_xlabel(xlabel)
|
||||||
|
axes.set_ylabel(ylabel)
|
||||||
|
|
||||||
|
return plots
|
||||||
|
|
||||||
|
|
||||||
def removeRightTicks(ax=None):
|
def removeRightTicks(ax=None):
|
||||||
ax = ax or pb.gca()
|
ax = ax or pb.gca()
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import pylab as pb
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
|
from latent_space_visualizations.controllers.imshow_controller import ImshowController,ImAnnotateController
|
||||||
from ...util.misc import param_to_array
|
from ...util.misc import param_to_array
|
||||||
|
from ...core.parameterization.variational import VariationalPosterior
|
||||||
from .base_plots import x_frame2D
|
from .base_plots import x_frame2D
|
||||||
import itertools
|
import itertools
|
||||||
import Tango
|
import Tango
|
||||||
|
|
@ -19,7 +20,7 @@ def most_significant_input_dimensions(model, which_indices):
|
||||||
input_1, input_2 = 0, 1
|
input_1, input_2 = 0, 1
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
input_1, input_2 = np.argsort(model.input_sensitivity())[::-1][:2]
|
input_1, input_2 = np.argsort(model.kern.input_sensitivity())[::-1][:2]
|
||||||
except:
|
except:
|
||||||
raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
|
raise ValueError, "cannot automatically determine which dimensions to plot, please pass 'which_indices'"
|
||||||
else:
|
else:
|
||||||
|
|
@ -43,26 +44,29 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
labels = np.ones(model.num_data)
|
labels = np.ones(model.num_data)
|
||||||
|
|
||||||
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
|
input_1, input_2 = most_significant_input_dimensions(model, which_indices)
|
||||||
X = param_to_array(model.X)
|
|
||||||
|
|
||||||
# first, plot the output variance as a function of the latent space
|
#fethch the data points X that we'd like to plot
|
||||||
Xtest, xx, yy, xmin, xmax = x_frame2D(X[:, [input_1, input_2]], resolution=resolution)
|
X = model.X
|
||||||
Xtest_full = np.zeros((Xtest.shape[0], model.X.shape[1]))
|
if isinstance(X, VariationalPosterior):
|
||||||
|
X = param_to_array(X.mean)
|
||||||
|
else:
|
||||||
|
X = param_to_array(X)
|
||||||
|
|
||||||
|
|
||||||
|
# create a function which computes the shading of latent space according to the output variance
|
||||||
def plot_function(x):
|
def plot_function(x):
|
||||||
|
Xtest_full = np.zeros((x.shape[0], model.X.shape[1]))
|
||||||
Xtest_full[:, [input_1, input_2]] = x
|
Xtest_full[:, [input_1, input_2]] = x
|
||||||
mu, var, low, up = model.predict(Xtest_full)
|
mu, var, low, up = model.predict(Xtest_full)
|
||||||
var = var[:, :1]
|
var = var[:, :1]
|
||||||
return np.log(var)
|
return np.log(var)
|
||||||
|
|
||||||
|
#Create an IMshow controller that can re-plot the latent space shading at a good resolution
|
||||||
view = ImshowController(ax, plot_function,
|
view = ImshowController(ax, plot_function,
|
||||||
tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)),
|
tuple(X[:, [input_1, input_2]].min(0)) + tuple(X[:, [input_1, input_2]].max(0)),
|
||||||
resolution, aspect=aspect, interpolation='bilinear',
|
resolution, aspect=aspect, interpolation='bilinear',
|
||||||
cmap=pb.cm.binary)
|
cmap=pb.cm.binary)
|
||||||
|
|
||||||
# ax.imshow(var.reshape(resolution, resolution).T,
|
|
||||||
# extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary, interpolation='bilinear', origin='lower')
|
|
||||||
|
|
||||||
# make sure labels are in order of input:
|
# make sure labels are in order of input:
|
||||||
ulabels = []
|
ulabels = []
|
||||||
for lab in labels:
|
for lab in labels:
|
||||||
|
|
@ -95,8 +99,8 @@ def plot_latent(model, labels=None, which_indices=None,
|
||||||
if not np.all(labels == 1.) and legend:
|
if not np.all(labels == 1.) and legend:
|
||||||
ax.legend(loc=0, numpoints=1)
|
ax.legend(loc=0, numpoints=1)
|
||||||
|
|
||||||
ax.set_xlim(xmin[0], xmax[0])
|
#ax.set_xlim(xmin[0], xmax[0])
|
||||||
ax.set_ylim(xmin[1], xmax[1])
|
#ax.set_ylim(xmin[1], xmax[1])
|
||||||
ax.grid(b=False) # remove the grid if present, it doesn't look good
|
ax.grid(b=False) # remove the grid if present, it doesn't look good
|
||||||
ax.set_aspect('auto') # set a nice aspect ratio
|
ax.set_aspect('auto') # set a nice aspect ratio
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ import pylab as pb
|
||||||
import Tango
|
import Tango
|
||||||
from matplotlib.textpath import TextPath
|
from matplotlib.textpath import TextPath
|
||||||
from matplotlib.transforms import offset_copy
|
from matplotlib.transforms import offset_copy
|
||||||
from ...kern import Linear
|
from .base_plots import ax_default
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -52,11 +52,7 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
pass '' to not print a title
|
pass '' to not print a title
|
||||||
pass None for a generic title
|
pass None for a generic title
|
||||||
"""
|
"""
|
||||||
if ax is None:
|
fig, ax = ax_default(fignum,ax)
|
||||||
fig = pb.figure(fignum)
|
|
||||||
ax = fig.add_subplot(111)
|
|
||||||
else:
|
|
||||||
fig = ax.figure
|
|
||||||
|
|
||||||
if title is None:
|
if title is None:
|
||||||
ax.set_title('ARD parameters, %s kernel' % kernel.name)
|
ax.set_title('ARD parameters, %s kernel' % kernel.name)
|
||||||
|
|
@ -70,13 +66,13 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
bottom = 0
|
bottom = 0
|
||||||
x = np.arange(kernel.input_dim)
|
x = np.arange(kernel.input_dim)
|
||||||
|
|
||||||
for i in range(ard_params.shape[-1]):
|
for i in range(ard_params.shape[0]):
|
||||||
c = Tango.nextMedium()
|
c = Tango.nextMedium()
|
||||||
bars.append(plot_bars(fig, ax, x, ard_params[:,i], c, kernel._parameters_[i].name, bottom=bottom))
|
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel._parameters_[i].name, bottom=bottom))
|
||||||
bottom += ard_params[:,i]
|
bottom += ard_params[i,:]
|
||||||
|
|
||||||
ax.set_xlim(-.5, kernel.input_dim - .5)
|
ax.set_xlim(-.5, kernel.input_dim - .5)
|
||||||
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[:,i])
|
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[i,:])
|
||||||
|
|
||||||
if legend:
|
if legend:
|
||||||
if title is '':
|
if title is '':
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue