mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 19:42:39 +02:00
[ard] enhanced ard handling and plotting
Conflicts: GPy/kern/_src/linear.py GPy/models/ss_gplvm.py
This commit is contained in:
parent
3972b4bd9a
commit
d000893878
8 changed files with 323 additions and 118 deletions
|
|
@ -285,11 +285,11 @@ class GP(Model):
|
||||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||||
data_symbol=data_symbol, **kw)
|
data_symbol=data_symbol, **kw)
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self, summarize=True):
|
||||||
"""
|
"""
|
||||||
Returns the sensitivity for each dimension of this model
|
Returns the sensitivity for each dimension of this model
|
||||||
"""
|
"""
|
||||||
return self.kern.input_sensitivity()
|
return self.kern.input_sensitivity(summarize=summarize)
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,13 @@ class Add(CombinationKernel):
|
||||||
This kernel will take over the active dims of it's subkernels passed in.
|
This kernel will take over the active dims of it's subkernels passed in.
|
||||||
"""
|
"""
|
||||||
def __init__(self, subkerns, name='add'):
|
def __init__(self, subkerns, name='add'):
|
||||||
|
for i, kern in enumerate(subkerns[:]):
|
||||||
|
if isinstance(kern, Add):
|
||||||
|
del subkerns[i]
|
||||||
|
for part in kern.parts[::-1]:
|
||||||
|
kern.remove_parameter(part)
|
||||||
|
subkerns.insert(i, part)
|
||||||
|
|
||||||
super(Add, self).__init__(subkerns, name)
|
super(Add, self).__init__(subkerns, name)
|
||||||
|
|
||||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||||
|
|
@ -160,7 +167,7 @@ class Add(CombinationKernel):
|
||||||
[np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))]
|
[np.add(target_grads[i],grads[i],target_grads[i]) for i in xrange(len(grads))]
|
||||||
return target_grads
|
return target_grads
|
||||||
|
|
||||||
def add(self, other, name='sum'):
|
def add(self, other):
|
||||||
if isinstance(other, Add):
|
if isinstance(other, Add):
|
||||||
other_params = other.parameters[:]
|
other_params = other.parameters[:]
|
||||||
for p in other_params:
|
for p in other_params:
|
||||||
|
|
@ -171,5 +178,11 @@ class Add(CombinationKernel):
|
||||||
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
|
self.input_dim, self.active_dims = self.get_input_dim_active_dims(self.parts)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self, summarize=True):
|
||||||
return reduce(np.add, [k.input_sensitivity() for k in self.parts])
|
if summarize:
|
||||||
|
return reduce(np.add, [k.input_sensitivity(summarize) for k in self.parts])
|
||||||
|
else:
|
||||||
|
i_s = np.zeros((len(self.parts), self.input_dim))
|
||||||
|
from operator import setitem
|
||||||
|
[setitem(i_s, (i, Ellipsis), k.input_sensitivity(summarize)) for i, k in enumerate(self.parts)]
|
||||||
|
return i_s
|
||||||
|
|
|
||||||
|
|
@ -23,9 +23,9 @@ class Kern(Parameterized):
|
||||||
|
|
||||||
input_dim:
|
input_dim:
|
||||||
|
|
||||||
is the number of dimensions to work on. Make sure to give the
|
is the number of dimensions to work on. Make sure to give the
|
||||||
tight dimensionality of inputs.
|
tight dimensionality of inputs.
|
||||||
You most likely want this to be the integer telling the number of
|
You most likely want this to be the integer telling the number of
|
||||||
input dimensions of the kernel.
|
input dimensions of the kernel.
|
||||||
If this is not an integer (!) we will work on the whole input matrix X,
|
If this is not an integer (!) we will work on the whole input matrix X,
|
||||||
and not check whether dimensions match or not (!).
|
and not check whether dimensions match or not (!).
|
||||||
|
|
@ -134,7 +134,7 @@ class Kern(Parameterized):
|
||||||
from ...plotting.matplot_dep import kernel_plots
|
from ...plotting.matplot_dep import kernel_plots
|
||||||
return kernel_plots.plot_ARD(self,*args,**kw)
|
return kernel_plots.plot_ARD(self,*args,**kw)
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self, summarize=True):
|
||||||
"""
|
"""
|
||||||
Returns the sensitivity for each dimension of this kernel.
|
Returns the sensitivity for each dimension of this kernel.
|
||||||
"""
|
"""
|
||||||
|
|
@ -144,6 +144,9 @@ class Kern(Parameterized):
|
||||||
""" Overloading of the '+' operator. for more control, see self.add """
|
""" Overloading of the '+' operator. for more control, see self.add """
|
||||||
return self.add(other)
|
return self.add(other)
|
||||||
|
|
||||||
|
def __iadd__(self, other):
|
||||||
|
return self.add(other)
|
||||||
|
|
||||||
def add(self, other, name='add'):
|
def add(self, other, name='add'):
|
||||||
"""
|
"""
|
||||||
Add another kernel to this one.
|
Add another kernel to this one.
|
||||||
|
|
@ -235,7 +238,12 @@ class CombinationKernel(Kern):
|
||||||
active_dims = np.arange(input_dim)
|
active_dims = np.arange(input_dim)
|
||||||
return input_dim, active_dims
|
return input_dim, active_dims
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self, summarize=True):
|
||||||
|
"""
|
||||||
|
If summize is true, we want to get the summerized view of the sensitivities,
|
||||||
|
otherwise put everything into an array with shape (#kernels, input_dim)
|
||||||
|
in the order of appearance of the kernels in the parameterized object.
|
||||||
|
"""
|
||||||
raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
|
raise NotImplementedError("Choose the kernel you want to get the sensitivity for. You need to override the default behaviour for getting the input sensitivity to be able to get the input sensitivity. For sum kernel it is the sum of all sensitivities, TODO: product kernel? Other kernels?, also TODO: shall we return all the sensitivities here in the combination kernel? So we can combine them however we want? This could lead to just plot all the sensitivities here...")
|
||||||
|
|
||||||
def _check_active_dims(self, X):
|
def _check_active_dims(self, X):
|
||||||
|
|
|
||||||
|
|
@ -132,7 +132,6 @@ class Linear(Kern):
|
||||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
|
return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
|
||||||
|
|
||||||
|
|
||||||
class LinearFull(Kern):
|
class LinearFull(Kern):
|
||||||
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
|
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):
|
||||||
super(LinearFull, self).__init__(input_dim, active_dims, name)
|
super(LinearFull, self).__init__(input_dim, active_dims, name)
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,11 @@ class Static(Kern):
|
||||||
K = self.K(variational_posterior.mean, Z)
|
K = self.K(variational_posterior.mean, Z)
|
||||||
return np.einsum('ij,ik->jk',K,K) #K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
|
return np.einsum('ij,ik->jk',K,K) #K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
|
||||||
|
|
||||||
|
def input_sensitivity(self, summarize=True):
|
||||||
|
if summarize:
|
||||||
|
return super(Static, self).input_sensitivity(summarize=summarize)
|
||||||
|
else:
|
||||||
|
return np.ones(self.input_dim) * self.variance
|
||||||
|
|
||||||
class White(Static):
|
class White(Static):
|
||||||
def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
|
def __init__(self, input_dim, variance=1., active_dims=None, name='white'):
|
||||||
|
|
@ -63,7 +68,6 @@ class White(Static):
|
||||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||||
self.variance.gradient = dL_dpsi0.sum()
|
self.variance.gradient = dL_dpsi0.sum()
|
||||||
|
|
||||||
|
|
||||||
class Bias(Static):
|
class Bias(Static):
|
||||||
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
|
def __init__(self, input_dim, variance=1., active_dims=None, name='bias'):
|
||||||
super(Bias, self).__init__(input_dim, variance, active_dims, name)
|
super(Bias, self).__init__(input_dim, variance, active_dims, name)
|
||||||
|
|
|
||||||
|
|
@ -179,7 +179,7 @@ class Stationary(Kern):
|
||||||
def gradients_X_diag(self, dL_dKdiag, X):
|
def gradients_X_diag(self, dL_dKdiag, X):
|
||||||
return np.zeros(X.shape)
|
return np.zeros(X.shape)
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self, summarize=True):
|
||||||
return np.ones(self.input_dim)/self.lengthscale**2
|
return np.ones(self.input_dim)/self.lengthscale**2
|
||||||
|
|
||||||
class Exponential(Stationary):
|
class Exponential(Stationary):
|
||||||
|
|
@ -340,7 +340,7 @@ class RatQuad(Stationary):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='ExpQuad'):
|
def __init__(self, input_dim, variance=1., lengthscale=None, power=2., ARD=False, active_dims=None, name='RatQuad'):
|
||||||
super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
super(RatQuad, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||||
self.power = Param('power', power, Logexp())
|
self.power = Param('power', power, Logexp())
|
||||||
self.add_parameters(self.power)
|
self.add_parameters(self.power)
|
||||||
|
|
|
||||||
|
|
@ -2,14 +2,18 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import itertools
|
||||||
|
from matplotlib import pyplot
|
||||||
|
|
||||||
from ..core.sparse_gp import SparseGP
|
from ..core.sparse_gp import SparseGP
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
|
from ..inference.optimization import SCG
|
||||||
|
from ..util import linalg
|
||||||
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
|
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
|
||||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
|
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
|
||||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||||
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
|
|
||||||
|
|
||||||
class SSGPLVM(SparseGP):
|
class SSGPLVM(SparseGP):
|
||||||
"""
|
"""
|
||||||
|
|
@ -23,13 +27,9 @@ class SSGPLVM(SparseGP):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, input_dim, X=None, X_variance=None, Gamma=None, init='PCA', num_inducing=10,
|
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, pi=None, learnPi=True, **kwargs):
|
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs):
|
||||||
|
|
||||||
self.mpi_comm = mpi_comm
|
|
||||||
self.__IN_OPTIMIZATION__ = False
|
|
||||||
self.group_spike = group_spike
|
|
||||||
|
|
||||||
if X == None:
|
if X == None:
|
||||||
from ..util.initialization import initialize_latent
|
from ..util.initialization import initialize_latent
|
||||||
X, fracs = initialize_latent(init, input_dim, Y)
|
X, fracs = initialize_latent(init, input_dim, Y)
|
||||||
|
|
@ -40,66 +40,49 @@ class SSGPLVM(SparseGP):
|
||||||
|
|
||||||
if X_variance is None: # The variance of the variational approximation (S)
|
if X_variance is None: # The variance of the variational approximation (S)
|
||||||
X_variance = np.random.uniform(0,.1,X.shape)
|
X_variance = np.random.uniform(0,.1,X.shape)
|
||||||
|
|
||||||
if Gamma is None:
|
gamma = np.empty_like(X, order='F') # The posterior probabilities of the binary variable in the variational approximation
|
||||||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
|
||||||
gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
|
|
||||||
gamma[gamma>1.-1e-9] = 1.-1e-9
|
if group_spike:
|
||||||
gamma[gamma<1e-9] = 1e-9
|
gamma[:] = gamma.mean(axis=0)
|
||||||
else:
|
|
||||||
gamma = Gamma.copy()
|
|
||||||
|
|
||||||
if Z is None:
|
if Z is None:
|
||||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||||
assert Z.shape[1] == X.shape[1]
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
if likelihood is None:
|
if likelihood is None:
|
||||||
likelihood = Gaussian()
|
likelihood = Gaussian()
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||||
if kernel.useGPU:
|
|
||||||
kernel.psicomp = PSICOMP_SSRBF_GPU()
|
|
||||||
|
|
||||||
if inference_method is None:
|
|
||||||
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
|
|
||||||
|
|
||||||
if pi is None:
|
pi = np.empty((input_dim))
|
||||||
pi = np.empty((input_dim))
|
pi[:] = 0.5
|
||||||
pi[:] = 0.5
|
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
|
||||||
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=learnPi) # the prior probability of the latent binary variable b
|
|
||||||
|
X = np.asfortranarray(X)
|
||||||
|
X_variance = np.asfortranarray(X_variance)
|
||||||
|
gamma = np.asfortranarray(gamma)
|
||||||
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
||||||
|
|
||||||
|
if group_spike:
|
||||||
|
kernel.group_spike_prob = True
|
||||||
|
self.variational_prior.group_spike_prob = True
|
||||||
|
|
||||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||||
self.add_parameter(self.X, index=0)
|
self.add_parameter(self.X, index=0)
|
||||||
self.add_parameter(self.variational_prior)
|
self.add_parameter(self.variational_prior)
|
||||||
|
|
||||||
if mpi_comm != None:
|
|
||||||
from ..util.mpi import divide_data
|
|
||||||
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
|
|
||||||
self.N_range = (N_start, N_end)
|
|
||||||
self.N_list = np.array(N_list)
|
|
||||||
self.Y_local = self.Y[N_start:N_end]
|
|
||||||
print 'MPI RANK: '+str(self.mpi_comm.rank)+' with datasize: '+str(self.N_range)
|
|
||||||
mpi_comm.Bcast(self.param_array, root=0)
|
|
||||||
|
|
||||||
if self.group_spike:
|
|
||||||
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in xrange(self.X.gamma.shape[1])] # Tie columns together
|
|
||||||
|
|
||||||
def set_X_gradients(self, X, X_grad):
|
def set_X_gradients(self, X, X_grad):
|
||||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||||
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
|
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
|
||||||
|
|
||||||
def get_X_gradients(self, X):
|
|
||||||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
|
||||||
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
|
|
||||||
|
|
||||||
def parameters_changed(self):
|
def parameters_changed(self):
|
||||||
if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
|
if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
|
||||||
update_gradients(self, mpi_comm=self.mpi_comm)
|
update_gradients(self)
|
||||||
return
|
return
|
||||||
|
|
||||||
super(SSGPLVM, self).parameters_changed()
|
super(SSGPLVM, self).parameters_changed()
|
||||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||||
|
|
||||||
|
|
@ -108,7 +91,7 @@ class SSGPLVM(SparseGP):
|
||||||
# update for the KL divergence
|
# update for the KL divergence
|
||||||
self.variational_prior.update_gradients_KL(self.X)
|
self.variational_prior.update_gradients_KL(self.X)
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self, summarize=True):
|
||||||
if self.kern.ARD:
|
if self.kern.ARD:
|
||||||
return self.kern.input_sensitivity()
|
return self.kern.input_sensitivity()
|
||||||
else:
|
else:
|
||||||
|
|
@ -121,47 +104,235 @@ class SSGPLVM(SparseGP):
|
||||||
|
|
||||||
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
|
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
|
||||||
|
|
||||||
def __getstate__(self):
|
def do_test_latents(self, Y):
|
||||||
dc = super(SSGPLVM, self).__getstate__()
|
"""
|
||||||
dc['mpi_comm'] = None
|
Compute the latent representation for a set of new points Y
|
||||||
if self.mpi_comm != None:
|
|
||||||
del dc['N_range']
|
Notes:
|
||||||
del dc['N_list']
|
This will only work with a univariate Gaussian likelihood (for now)
|
||||||
del dc['Y_local']
|
"""
|
||||||
return dc
|
assert not self.likelihood.is_heteroscedastic
|
||||||
|
N_test = Y.shape[0]
|
||||||
def __setstate__(self, state):
|
input_dim = self.Z.shape[1]
|
||||||
return super(SSGPLVM, self).__setstate__(state)
|
means = np.zeros((N_test, input_dim))
|
||||||
|
covars = np.zeros((N_test, input_dim))
|
||||||
#=====================================================
|
|
||||||
# The MPI parallelization
|
dpsi0 = -0.5 * self.output_dim * self.likelihood.precision
|
||||||
# - can move to model at some point
|
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods
|
||||||
#=====================================================
|
V = self.likelihood.precision * Y
|
||||||
|
|
||||||
def _set_params_transformed(self, p):
|
#compute CPsi1V
|
||||||
if self.mpi_comm != None:
|
if self.Cpsi1V is None:
|
||||||
if self.__IN_OPTIMIZATION__ and self.mpi_comm.rank==0:
|
psi1V = np.dot(self.psi1.T, self.likelihood.V)
|
||||||
self.mpi_comm.Bcast(np.int32(1),root=0)
|
tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0)
|
||||||
self.mpi_comm.Bcast(p, root=0)
|
tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1)
|
||||||
super(SSGPLVM, self)._set_params_transformed(p)
|
self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1)
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
dpsi1 = np.dot(self.Cpsi1V, V.T)
|
||||||
self.__IN_OPTIMIZATION__ = True
|
|
||||||
if self.mpi_comm==None:
|
start = np.zeros(self.input_dim * 2)
|
||||||
super(SSGPLVM, self).optimize(optimizer,start,**kwargs)
|
|
||||||
elif self.mpi_comm.rank==0:
|
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]):
|
||||||
super(SSGPLVM, self).optimize(optimizer,start,**kwargs)
|
args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2)
|
||||||
self.mpi_comm.Bcast(np.int32(-1),root=0)
|
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False)
|
||||||
elif self.mpi_comm.rank>0:
|
|
||||||
x = self._get_params_transformed().copy()
|
mu, log_S = xopt.reshape(2, 1, -1)
|
||||||
flag = np.empty(1,dtype=np.int32)
|
means[n] = mu[0].copy()
|
||||||
while True:
|
covars[n] = np.exp(log_S[0]).copy()
|
||||||
self.mpi_comm.Bcast(flag,root=0)
|
|
||||||
if flag==1:
|
return means, covars
|
||||||
self._set_params_transformed(x)
|
|
||||||
elif flag==-1:
|
def dmu_dX(self, Xnew):
|
||||||
break
|
"""
|
||||||
else:
|
Calculate the gradient of the prediction at Xnew w.r.t Xnew.
|
||||||
self.__IN_OPTIMIZATION__ = False
|
"""
|
||||||
raise Exception("Unrecognizable flag for synchronization!")
|
dmu_dX = np.zeros_like(Xnew)
|
||||||
self.__IN_OPTIMIZATION__ = False
|
for i in range(self.Z.shape[0]):
|
||||||
|
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
|
||||||
|
return dmu_dX
|
||||||
|
|
||||||
|
def dmu_dXnew(self, Xnew):
|
||||||
|
"""
|
||||||
|
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
|
||||||
|
"""
|
||||||
|
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing))
|
||||||
|
ones = np.ones((1, 1))
|
||||||
|
for i in range(self.Z.shape[0]):
|
||||||
|
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
|
||||||
|
return np.dot(dK_dX, self.Cpsi1Vf)
|
||||||
|
|
||||||
|
def plot_steepest_gradient_map(self, fignum=None, ax=None, which_indices=None, labels=None, data_labels=None, data_marker='o', data_s=40, resolution=20, aspect='auto', updates=False, ** kwargs):
|
||||||
|
input_1, input_2 = significant_dims = most_significant_input_dimensions(self, which_indices)
|
||||||
|
|
||||||
|
X = np.zeros((resolution ** 2, self.input_dim))
|
||||||
|
indices = np.r_[:X.shape[0]]
|
||||||
|
if labels is None:
|
||||||
|
labels = range(self.output_dim)
|
||||||
|
|
||||||
|
def plot_function(x):
|
||||||
|
X[:, significant_dims] = x
|
||||||
|
dmu_dX = self.dmu_dXnew(X)
|
||||||
|
argmax = np.argmax(dmu_dX, 1)
|
||||||
|
return dmu_dX[indices, argmax], np.array(labels)[argmax]
|
||||||
|
|
||||||
|
if ax is None:
|
||||||
|
fig = pyplot.figure(num=fignum)
|
||||||
|
ax = fig.add_subplot(111)
|
||||||
|
|
||||||
|
if data_labels is None:
|
||||||
|
data_labels = np.ones(self.num_data)
|
||||||
|
ulabels = []
|
||||||
|
for lab in data_labels:
|
||||||
|
if not lab in ulabels:
|
||||||
|
ulabels.append(lab)
|
||||||
|
marker = itertools.cycle(list(data_marker))
|
||||||
|
from GPy.util import Tango
|
||||||
|
for i, ul in enumerate(ulabels):
|
||||||
|
if type(ul) is np.string_:
|
||||||
|
this_label = ul
|
||||||
|
elif type(ul) is np.int64:
|
||||||
|
this_label = 'class %i' % ul
|
||||||
|
else:
|
||||||
|
this_label = 'class %i' % i
|
||||||
|
m = marker.next()
|
||||||
|
index = np.nonzero(data_labels == ul)[0]
|
||||||
|
x = self.X[index, input_1]
|
||||||
|
y = self.X[index, input_2]
|
||||||
|
ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label)
|
||||||
|
|
||||||
|
ax.set_xlabel('latent dimension %i' % input_1)
|
||||||
|
ax.set_ylabel('latent dimension %i' % input_2)
|
||||||
|
|
||||||
|
from matplotlib.cm import get_cmap
|
||||||
|
from GPy.util.latent_space_visualizations.controllers.imshow_controller import ImAnnotateController
|
||||||
|
if not 'cmap' in kwargs.keys():
|
||||||
|
kwargs.update(cmap=get_cmap('jet'))
|
||||||
|
controller = ImAnnotateController(ax,
|
||||||
|
plot_function,
|
||||||
|
tuple(self.X.min(0)[:, significant_dims]) + tuple(self.X.max(0)[:, significant_dims]),
|
||||||
|
resolution=resolution,
|
||||||
|
aspect=aspect,
|
||||||
|
**kwargs)
|
||||||
|
ax.legend()
|
||||||
|
ax.figure.tight_layout()
|
||||||
|
if updates:
|
||||||
|
pyplot.show()
|
||||||
|
clear = raw_input('Enter to continue')
|
||||||
|
if clear.lower() in 'yes' or clear == '':
|
||||||
|
controller.deactivate()
|
||||||
|
return controller.view
|
||||||
|
|
||||||
|
def plot_X_1d(self, fignum=None, ax=None, colors=None):
|
||||||
|
"""
|
||||||
|
Plot latent space X in 1D:
|
||||||
|
|
||||||
|
- if fig is given, create input_dim subplots in fig and plot in these
|
||||||
|
- if ax is given plot input_dim 1D latent space plots of X into each `axis`
|
||||||
|
- if neither fig nor ax is given create a figure with fignum and plot in there
|
||||||
|
|
||||||
|
colors:
|
||||||
|
colors of different latent space dimensions input_dim
|
||||||
|
|
||||||
|
"""
|
||||||
|
import pylab
|
||||||
|
if ax is None:
|
||||||
|
fig = pylab.figure(num=fignum, figsize=(8, min(12, (2 * self.X.shape[1]))))
|
||||||
|
if colors is None:
|
||||||
|
colors = pylab.gca()._get_lines.color_cycle
|
||||||
|
pylab.clf()
|
||||||
|
else:
|
||||||
|
colors = iter(colors)
|
||||||
|
plots = []
|
||||||
|
x = np.arange(self.X.shape[0])
|
||||||
|
for i in range(self.X.shape[1]):
|
||||||
|
if ax is None:
|
||||||
|
a = fig.add_subplot(self.X.shape[1], 1, i + 1)
|
||||||
|
elif isinstance(ax, (tuple, list)):
|
||||||
|
a = ax[i]
|
||||||
|
else:
|
||||||
|
raise ValueError("Need one ax per latent dimnesion input_dim")
|
||||||
|
a.plot(self.X, c='k', alpha=.3)
|
||||||
|
plots.extend(a.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
|
||||||
|
a.fill_between(x,
|
||||||
|
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
|
||||||
|
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
|
||||||
|
facecolor=plots[-1].get_color(),
|
||||||
|
alpha=.3)
|
||||||
|
a.legend(borderaxespad=0.)
|
||||||
|
a.set_xlim(x.min(), x.max())
|
||||||
|
if i < self.X.shape[1] - 1:
|
||||||
|
a.set_xticklabels('')
|
||||||
|
pylab.draw()
|
||||||
|
if ax is None:
|
||||||
|
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||||
|
return fig
|
||||||
|
|
||||||
|
def getstate(self):
|
||||||
|
"""
|
||||||
|
Get the current state of the class,
|
||||||
|
here just all the indices, rest can get recomputed
|
||||||
|
"""
|
||||||
|
return SparseGP._getstate(self) + [self.init]
|
||||||
|
|
||||||
|
def setstate(self, state):
|
||||||
|
self._const_jitter = None
|
||||||
|
self.init = state.pop()
|
||||||
|
SparseGP._setstate(self, state)
|
||||||
|
|
||||||
|
|
||||||
|
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||||
|
"""
|
||||||
|
objective function for fitting the latent variables for test points
|
||||||
|
(negative log-likelihood: should be minimised!)
|
||||||
|
"""
|
||||||
|
mu, log_S = mu_S.reshape(2, 1, -1)
|
||||||
|
S = np.exp(log_S)
|
||||||
|
|
||||||
|
psi0 = kern.psi0(Z, mu, S)
|
||||||
|
psi1 = kern.psi1(Z, mu, S)
|
||||||
|
psi2 = kern.psi2(Z, mu, S)
|
||||||
|
|
||||||
|
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
|
||||||
|
|
||||||
|
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
|
||||||
|
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
|
||||||
|
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
|
||||||
|
|
||||||
|
dmu = mu0 + mu1 + mu2 - mu
|
||||||
|
# dS = S0 + S1 + S2 -0.5 + .5/S
|
||||||
|
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
|
||||||
|
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
|
||||||
|
|
||||||
|
def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||||
|
"""
|
||||||
|
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
|
||||||
|
This is the same as latent_cost_and_grad but only for the objective
|
||||||
|
"""
|
||||||
|
mu, log_S = mu_S.reshape(2, 1, -1)
|
||||||
|
S = np.exp(log_S)
|
||||||
|
|
||||||
|
psi0 = kern.psi0(Z, mu, S)
|
||||||
|
psi1 = kern.psi1(Z, mu, S)
|
||||||
|
psi2 = kern.psi2(Z, mu, S)
|
||||||
|
|
||||||
|
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
|
||||||
|
return -float(lik)
|
||||||
|
|
||||||
|
def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||||
|
"""
|
||||||
|
This is the same as latent_cost_and_grad but only for the grad
|
||||||
|
"""
|
||||||
|
mu, log_S = mu_S.reshape(2, 1, -1)
|
||||||
|
S = np.exp(log_S)
|
||||||
|
|
||||||
|
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
|
||||||
|
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
|
||||||
|
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
|
||||||
|
|
||||||
|
dmu = mu0 + mu1 + mu2 - mu
|
||||||
|
# dS = S0 + S1 + S2 -0.5 + .5/S
|
||||||
|
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
|
||||||
|
|
||||||
|
return -np.hstack((dmu.flatten(), dlnS.flatten()))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -30,18 +30,18 @@ def add_bar_labels(fig, ax, bars, bottom=0):
|
||||||
c = 'k'
|
c = 'k'
|
||||||
transform = transOffsetUp
|
transform = transOffsetUp
|
||||||
ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform)
|
ax.text(xi, height, "${xi}$".format(xi=int(num)), color=c, rotation=0, ha='center', va=va, transform=transform)
|
||||||
|
|
||||||
ax.set_xticks([])
|
ax.set_xticks([])
|
||||||
|
|
||||||
|
|
||||||
def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
|
def plot_bars(fig, ax, x, ard_params, color, name, bottom=0):
|
||||||
from ...util.misc import param_to_array
|
from ...util.misc import param_to_array
|
||||||
return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
|
return ax.bar(left=x, height=param_to_array(ard_params), width=.8,
|
||||||
bottom=bottom, align='center',
|
bottom=bottom, align='center',
|
||||||
color=color, edgecolor='k', linewidth=1.2,
|
color=color, edgecolor='k', linewidth=1.2,
|
||||||
label=name.replace("_"," "))
|
label=name.replace("_"," "))
|
||||||
|
|
||||||
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False, filtering=None):
|
||||||
"""
|
"""
|
||||||
If an ARD kernel is present, plot a bar representation using matplotlib
|
If an ARD kernel is present, plot a bar representation using matplotlib
|
||||||
|
|
||||||
|
|
@ -51,6 +51,10 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
title of the plot,
|
title of the plot,
|
||||||
pass '' to not print a title
|
pass '' to not print a title
|
||||||
pass None for a generic title
|
pass None for a generic title
|
||||||
|
:param filtering: list of names, which to use for plotting ARD parameters.
|
||||||
|
Only kernels which match names in the list of names in filtering
|
||||||
|
will be used for plotting.
|
||||||
|
:type filtering: list of names to use for ARD plot
|
||||||
"""
|
"""
|
||||||
fig, ax = ax_default(fignum,ax)
|
fig, ax = ax_default(fignum,ax)
|
||||||
|
|
||||||
|
|
@ -58,19 +62,25 @@ def plot_ARD(kernel, fignum=None, ax=None, title='', legend=False):
|
||||||
ax.set_title('ARD parameters, %s kernel' % kernel.name)
|
ax.set_title('ARD parameters, %s kernel' % kernel.name)
|
||||||
else:
|
else:
|
||||||
ax.set_title(title)
|
ax.set_title(title)
|
||||||
|
|
||||||
Tango.reset()
|
Tango.reset()
|
||||||
bars = []
|
bars = []
|
||||||
|
|
||||||
ard_params = np.atleast_2d(kernel.input_sensitivity())
|
ard_params = np.atleast_2d(kernel.input_sensitivity(summarize=False))
|
||||||
bottom = 0
|
bottom = 0
|
||||||
x = np.arange(kernel.input_dim)
|
x = np.arange(kernel.input_dim)
|
||||||
|
|
||||||
|
if order is None:
|
||||||
|
order = kernel.parameter_names(recursive=False)
|
||||||
|
|
||||||
for i in range(ard_params.shape[0]):
|
for i in range(ard_params.shape[0]):
|
||||||
c = Tango.nextMedium()
|
if kernel.parameters[i].name in order:
|
||||||
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom))
|
c = Tango.nextMedium()
|
||||||
bottom += ard_params[i,:]
|
bars.append(plot_bars(fig, ax, x, ard_params[i,:], c, kernel.parameters[i].name, bottom=bottom))
|
||||||
|
bottom += ard_params[i,:]
|
||||||
|
else:
|
||||||
|
print "filtering out {}".format(kernel.parameters[i].name)
|
||||||
|
|
||||||
ax.set_xlim(-.5, kernel.input_dim - .5)
|
ax.set_xlim(-.5, kernel.input_dim - .5)
|
||||||
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[i,:])
|
add_bar_labels(fig, ax, [bars[-1]], bottom=bottom-ard_params[i,:])
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue