mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-02 14:45:15 +02:00
merge the changes
This commit is contained in:
commit
fc0e8f3e7e
66 changed files with 55761 additions and 702 deletions
|
|
@ -89,7 +89,6 @@ class GP(Model):
|
|||
assert mean_function.output_dim == self.output_dim
|
||||
self.link_parameter(mean_function)
|
||||
|
||||
|
||||
#find a sensible inference method
|
||||
logger.info("initializing inference method")
|
||||
if inference_method is None:
|
||||
|
|
@ -208,6 +207,7 @@ class GP(Model):
|
|||
Kxx = kern.Kdiag(_Xnew)
|
||||
var = Kxx - np.sum(WiKx*Kx, 0)
|
||||
var = var.reshape(-1, 1)
|
||||
var[var<0.] = 0.
|
||||
|
||||
#force mu to be a column vector
|
||||
if len(mu.shape)==1: mu = mu[:,None]
|
||||
|
|
@ -229,13 +229,14 @@ class GP(Model):
|
|||
:param Y_metadata: metadata about the predicting point to pass to the likelihood
|
||||
:param kern: The kernel to use for prediction (defaults to the model
|
||||
kern). this is useful for examining e.g. subprocesses.
|
||||
:returns: (mean, var, lower_upper):
|
||||
:returns: (mean, var):
|
||||
mean: posterior mean, a Numpy array, Nnew x self.input_dim
|
||||
var: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||
lower_upper: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.input_dim
|
||||
|
||||
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
|
||||
This is to allow for different normalizations of the output dimensions.
|
||||
|
||||
Note: If you want the predictive quantiles (e.g. 95% confidence interval) use :py:func:"~GPy.core.gp.GP.predict_quantiles".
|
||||
"""
|
||||
#predict the latent function values
|
||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov, kern=kern)
|
||||
|
|
@ -243,7 +244,7 @@ class GP(Model):
|
|||
mu, var = self.normalizer.inverse_mean(mu), self.normalizer.inverse_variance(var)
|
||||
|
||||
# now push through likelihood
|
||||
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata)
|
||||
mean, var = self.likelihood.predictive_values(mu, var, full_cov, Y_metadata=Y_metadata)
|
||||
return mean, var
|
||||
|
||||
def predict_quantiles(self, X, quantiles=(2.5, 97.5), Y_metadata=None):
|
||||
|
|
@ -255,12 +256,12 @@ class GP(Model):
|
|||
:param quantiles: tuple of quantiles, default is (2.5, 97.5) which is the 95% interval
|
||||
:type quantiles: tuple
|
||||
:returns: list of quantiles for each X and predictive quantiles for interval combination
|
||||
:rtype: [np.ndarray (Xnew x self.input_dim), np.ndarray (Xnew x self.input_dim)]
|
||||
:rtype: [np.ndarray (Xnew x self.output_dim), np.ndarray (Xnew x self.output_dim)]
|
||||
"""
|
||||
m, v = self._raw_predict(X, full_cov=False)
|
||||
if self.normalizer is not None:
|
||||
m, v = self.normalizer.inverse_mean(m), self.normalizer.inverse_variance(v)
|
||||
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata)
|
||||
return self.likelihood.predictive_quantiles(m, v, quantiles, Y_metadata=Y_metadata)
|
||||
|
||||
def predictive_gradients(self, Xnew):
|
||||
"""
|
||||
|
|
@ -330,7 +331,7 @@ class GP(Model):
|
|||
:returns: Ysim: set of simulations, a Numpy array (N x samples).
|
||||
"""
|
||||
fsim = self.posterior_samples_f(X, size, full_cov=full_cov)
|
||||
Ysim = self.likelihood.samples(fsim, Y_metadata)
|
||||
Ysim = self.likelihood.samples(fsim, Y_metadata=Y_metadata)
|
||||
return Ysim
|
||||
|
||||
def plot_f(self, plot_limits=None, which_data_rows='all',
|
||||
|
|
@ -395,7 +396,7 @@ class GP(Model):
|
|||
which_data_ycols='all', fixed_inputs=[],
|
||||
levels=20, samples=0, fignum=None, ax=None, resolution=None,
|
||||
plot_raw=False,
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx'):
|
||||
linecol=None,fillcol=None, Y_metadata=None, data_symbol='kx', predict_kw=None):
|
||||
"""
|
||||
Plot the posterior of the GP.
|
||||
- In one dimension, the function is plotted with a shaded region identifying two standard deviations.
|
||||
|
|
@ -444,7 +445,7 @@ class GP(Model):
|
|||
which_data_ycols, fixed_inputs,
|
||||
levels, samples, fignum, ax, resolution,
|
||||
plot_raw=plot_raw, Y_metadata=Y_metadata,
|
||||
data_symbol=data_symbol, **kw)
|
||||
data_symbol=data_symbol, predict_kw=predict_kw, **kw)
|
||||
|
||||
def input_sensitivity(self, summarize=True):
|
||||
"""
|
||||
|
|
@ -472,16 +473,51 @@ class GP(Model):
|
|||
self.inference_method.on_optimization_end()
|
||||
raise
|
||||
|
||||
def infer_newX(self, Y_new, optimize=True, ):
|
||||
def infer_newX(self, Y_new, optimize=True):
|
||||
"""
|
||||
Infer the distribution of X for the new observed data *Y_new*.
|
||||
Infer X for the new observed data *Y_new*.
|
||||
|
||||
:param Y_new: the new observed data for inference
|
||||
:type Y_new: numpy.ndarray
|
||||
:param optimize: whether to optimize the location of new X (True by default)
|
||||
:type optimize: boolean
|
||||
:return: a tuple containing the posterior estimation of X and the model that optimize X
|
||||
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` or numpy.ndarray, :class:`~GPy.core.model.Model`)
|
||||
:rtype: (:class:`~GPy.core.parameterization.variational.VariationalPosterior` and numpy.ndarray, :class:`~GPy.core.model.Model`)
|
||||
"""
|
||||
from ..inference.latent_function_inference.inferenceX import infer_newX
|
||||
return infer_newX(self, Y_new, optimize=optimize)
|
||||
|
||||
def log_predictive_density(self, x_test, y_test, Y_metadata=None):
|
||||
"""
|
||||
Calculation of the log predictive density
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param Y_metadata: metadata associated with the test points
|
||||
"""
|
||||
mu_star, var_star = self._raw_predict(x_test)
|
||||
return self.likelihood.log_predictive_density(y_test, mu_star, var_star, Y_metadata=Y_metadata)
|
||||
|
||||
def log_predictive_density_sampling(self, x_test, y_test, Y_metadata=None, num_samples=1000):
|
||||
"""
|
||||
Calculation of the log predictive density by sampling
|
||||
|
||||
.. math:
|
||||
p(y_{*}|D) = p(y_{*}|f_{*})p(f_{*}|\mu_{*}\\sigma^{2}_{*})
|
||||
|
||||
:param x_test: test locations (x_{*})
|
||||
:type x_test: (Nx1) array
|
||||
:param y_test: test observations (y_{*})
|
||||
:type y_test: (Nx1) array
|
||||
:param Y_metadata: metadata associated with the test points
|
||||
:param num_samples: number of samples to use in monte carlo integration
|
||||
:type num_samples: int
|
||||
"""
|
||||
mu_star, var_star = self._raw_predict(x_test)
|
||||
return self.likelihood.log_predictive_density_sampling(y_test, mu_star, var_star, Y_metadata=Y_metadata, num_samples=num_samples)
|
||||
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class Model(Parameterized):
|
|||
jobs = []
|
||||
pool = mp.Pool(processes=num_processes)
|
||||
for i in range(num_restarts):
|
||||
self.randomize()
|
||||
if i>0: self.randomize()
|
||||
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
|
||||
jobs.append(job)
|
||||
|
||||
|
|
@ -90,7 +90,7 @@ class Model(Parameterized):
|
|||
for i in range(num_restarts):
|
||||
try:
|
||||
if not parallel:
|
||||
self.randomize()
|
||||
if i>0: self.randomize()
|
||||
self.optimize(**kwargs)
|
||||
else:
|
||||
self.optimization_runs.append(jobs[i].get())
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import numpy
|
|||
from numpy.lib.function_base import vectorize
|
||||
from .lists_and_dicts import IntArrayDict
|
||||
from functools import reduce
|
||||
from .transformations import Transformation
|
||||
|
||||
def extract_properties_to_index(index, props):
|
||||
prop_index = dict()
|
||||
|
|
|
|||
|
|
@ -38,6 +38,11 @@ class Param(Parameterizable, ObsAr):
|
|||
Fixing parameters will fix them to the value they are right now. If you change
|
||||
the fixed value, it will be fixed to the new value!
|
||||
|
||||
Important Note:
|
||||
Multilevel indexing (e.g. self[:2][1:]) is not supported and might lead to unexpected behaviour.
|
||||
Try to index in one go, using boolean indexing or the numpy builtin
|
||||
np.index function.
|
||||
|
||||
See :py:class:`GPy.core.parameterized.Parameterized` for more details on constraining etc.
|
||||
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -430,23 +430,38 @@ class Indexable(Nameable, Updateable):
|
|||
|
||||
def log_prior(self):
|
||||
"""evaluate the prior"""
|
||||
if self.priors.size > 0:
|
||||
x = self.param_array
|
||||
#py3 fix
|
||||
#return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.iteritems()), 0)
|
||||
return reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
|
||||
return 0.
|
||||
if self.priors.size == 0:
|
||||
return 0.
|
||||
x = self.param_array
|
||||
#evaluate the prior log densities
|
||||
log_p = reduce(lambda a, b: a + b, (p.lnpdf(x[ind]).sum() for p, ind in self.priors.items()), 0)
|
||||
|
||||
#account for the transformation by evaluating the log Jacobian (where things are transformed)
|
||||
log_j = 0.
|
||||
priored_indexes = np.hstack([i for p, i in self.priors.items()])
|
||||
for c,j in self.constraints.items():
|
||||
if not isinstance(c, Transformation):continue
|
||||
for jj in j:
|
||||
if jj in priored_indexes:
|
||||
log_j += c.log_jacobian(x[jj])
|
||||
return log_p + log_j
|
||||
|
||||
def _log_prior_gradients(self):
|
||||
"""evaluate the gradients of the priors"""
|
||||
if self.priors.size > 0:
|
||||
x = self.param_array
|
||||
ret = np.zeros(x.size)
|
||||
#py3 fix
|
||||
#[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.iteritems()]
|
||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
|
||||
return ret
|
||||
return 0.
|
||||
if self.priors.size == 0:
|
||||
return 0.
|
||||
x = self.param_array
|
||||
ret = np.zeros(x.size)
|
||||
#compute derivate of prior density
|
||||
[np.put(ret, ind, p.lnpdf_grad(x[ind])) for p, ind in self.priors.items()]
|
||||
#add in jacobian derivatives if transformed
|
||||
priored_indexes = np.hstack([i for p, i in self.priors.items()])
|
||||
for c,j in self.constraints.items():
|
||||
if not isinstance(c, Transformation):continue
|
||||
for jj in j:
|
||||
if jj in priored_indexes:
|
||||
ret[jj] += c.log_jacobian_grad(x[jj])
|
||||
return ret
|
||||
|
||||
#===========================================================================
|
||||
# Tie parameters together
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from .param import ParamConcatenation
|
|||
from .parameter_core import HierarchyError, Parameterizable, adjust_name_for_printing
|
||||
|
||||
import logging
|
||||
from GPy.core.parameterization.index_operations import ParameterIndexOperationsView
|
||||
from .index_operations import ParameterIndexOperationsView
|
||||
logger = logging.getLogger("parameters changed meta")
|
||||
|
||||
class ParametersChangedMeta(type):
|
||||
|
|
|
|||
|
|
@ -522,16 +522,9 @@ class DGPLVM(Prior):
|
|||
|
||||
"""
|
||||
domain = _REAL
|
||||
# _instances = []
|
||||
# def __new__(cls, mu, sigma): # Singleton:
|
||||
# if cls._instances:
|
||||
# cls._instances[:] = [instance for instance in cls._instances if instance()]
|
||||
# for instance in cls._instances:
|
||||
# if instance().mu == mu and instance().sigma == sigma:
|
||||
# return instance()
|
||||
# o = super(Prior, cls).__new__(cls, mu, sigma)
|
||||
# cls._instances.append(weakref.ref(o))
|
||||
# return cls._instances[-1]()
|
||||
|
||||
def __new__(cls, sigma2, lbl, x_shape):
|
||||
return super(Prior, cls).__new__(cls, sigma2, lbl, x_shape)
|
||||
|
||||
def __init__(self, sigma2, lbl, x_shape):
|
||||
self.sigma2 = sigma2
|
||||
|
|
@ -730,7 +723,7 @@ class DGPLVM(Prior):
|
|||
|
||||
# ******************************************
|
||||
|
||||
from parameterized import Parameterized
|
||||
from .. import Parameterized
|
||||
from .. import Param
|
||||
class DGPLVM_Lamda(Prior, Parameterized):
|
||||
"""
|
||||
|
|
@ -758,12 +751,12 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
|||
self.sigma2 = sigma2
|
||||
# self.x = x
|
||||
self.lbl = lbl
|
||||
self.lamda = lamda
|
||||
self.lamda = lamda
|
||||
self.classnum = lbl.shape[1]
|
||||
self.datanum = lbl.shape[0]
|
||||
self.x_shape = x_shape
|
||||
self.dim = x_shape[1]
|
||||
self.lamda = Param('lamda', np.diag(lamda))
|
||||
self.lamda = Param('lamda', np.diag(lamda))
|
||||
self.link_parameter(self.lamda)
|
||||
|
||||
def get_class_label(self, y):
|
||||
|
|
@ -789,7 +782,7 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
|||
M_i = np.zeros((self.classnum, self.dim))
|
||||
for i in cls:
|
||||
# Mean of each class
|
||||
class_i = cls[i]
|
||||
class_i = cls[i]
|
||||
M_i[i] = np.mean(class_i, axis=0)
|
||||
return M_i
|
||||
|
||||
|
|
@ -899,8 +892,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
|||
#!!!!!!!!!!!!!!!!!!!!!!!!!!!
|
||||
#self.lamda.values[:] = self.lamda.values/self.lamda.values.sum()
|
||||
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
|
|
@ -916,8 +909,8 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
|||
# This function calculates derivative of the log of prior function
|
||||
def lnpdf_grad(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
xprime = x.dot(np.diagflat(self.lamda))
|
||||
x = xprime
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
|
|
@ -951,14 +944,14 @@ class DGPLVM_Lamda(Prior, Parameterized):
|
|||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||
DPxprim_Dx = DPxprim_Dx.T
|
||||
|
||||
DPxprim_Dlamda = DPx_Dx.dot(x)
|
||||
DPxprim_Dlamda = DPx_Dx.dot(x)
|
||||
|
||||
# Because of the GPy we need to transpose our matrix so that it gets the same shape as out matrix (denominator layout!!!)
|
||||
DPxprim_Dlamda = DPxprim_Dlamda.T
|
||||
DPxprim_Dlamda = DPxprim_Dlamda.T
|
||||
|
||||
self.lamda.gradient = np.diag(DPxprim_Dlamda)
|
||||
self.lamda.gradient = np.diag(DPxprim_Dlamda)
|
||||
# print DPxprim_Dx
|
||||
return DPxprim_Dx
|
||||
return DPxprim_Dx
|
||||
|
||||
|
||||
# def frb(self, x):
|
||||
|
|
@ -1139,8 +1132,8 @@ class DGPLVM_T(Prior):
|
|||
# This function calculates log of our prior
|
||||
def lnpdf(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
|
|
@ -1156,11 +1149,11 @@ class DGPLVM_T(Prior):
|
|||
|
||||
# This function calculates derivative of the log of prior function
|
||||
def lnpdf_grad(self, x):
|
||||
x = x.reshape(self.x_shape)
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
x = x.reshape(self.x_shape)
|
||||
xprim = x.dot(self.vec)
|
||||
x = xprim
|
||||
# print x
|
||||
cls = self.compute_cls(x)
|
||||
cls = self.compute_cls(x)
|
||||
M_0 = np.mean(x, axis=0)
|
||||
M_i = self.compute_Mi(cls)
|
||||
Sb = self.compute_Sb(cls, M_i, M_0)
|
||||
|
|
|
|||
|
|
@ -31,6 +31,16 @@ class Transformation(object):
|
|||
raise NotImplementedError
|
||||
def finv(self, model_param):
|
||||
raise NotImplementedError
|
||||
def log_jacobian(self, model_param):
|
||||
"""
|
||||
compute the log of the jacobian of f, evaluated at f(x)= model_param
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def log_jacobian_grad(self, model_param):
|
||||
"""
|
||||
compute the drivative of the log of the jacobian of f, evaluated at f(x)= model_param
|
||||
"""
|
||||
raise NotImplementedError
|
||||
def gradfactor(self, model_param, dL_dmodel_param):
|
||||
""" df(opt_param)_dopt_param evaluated at self.f(opt_param)=model_param, times the gradient dL_dmodel_param,
|
||||
|
||||
|
|
@ -74,9 +84,33 @@ class Logexp(Transformation):
|
|||
if np.any(f < 0.):
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def log_jacobian(self, model_param):
|
||||
return np.where(model_param>_lim_val, model_param, np.log(np.exp(model_param+1e-20) - 1.)) - model_param
|
||||
def log_jacobian_grad(self, model_param):
|
||||
return 1./(np.exp(model_param)-1.)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
||||
class Exponent(Transformation):
|
||||
domain = _POSITIVE
|
||||
def f(self, x):
|
||||
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
||||
def finv(self, x):
|
||||
return np.log(x)
|
||||
def gradfactor(self, f, df):
|
||||
return np.einsum('i,i->i', df, f)
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def log_jacobian(self, model_param):
|
||||
return np.log(model_param)
|
||||
def log_jacobian_grad(self, model_param):
|
||||
return 1./model_param
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
||||
|
||||
|
||||
class NormalTheta(Transformation):
|
||||
"Do not use, not officially supported!"
|
||||
|
|
@ -417,22 +451,6 @@ class LogexpClipped(Logexp):
|
|||
def __str__(self):
|
||||
return '+ve_c'
|
||||
|
||||
class Exponent(Transformation):
|
||||
# TODO: can't allow this to go to zero, need to set a lower bound. Similar with negative Exponent below. See old MATLAB code.
|
||||
domain = _POSITIVE
|
||||
def f(self, x):
|
||||
return np.where(x<_lim_val, np.where(x>-_lim_val, np.exp(x), np.exp(-_lim_val)), np.exp(_lim_val))
|
||||
def finv(self, x):
|
||||
return np.log(x)
|
||||
def gradfactor(self, f, df):
|
||||
return np.einsum('i,i->i', df, f)
|
||||
def initialize(self, f):
|
||||
if np.any(f < 0.):
|
||||
print("Warning: changing parameters to satisfy constraints")
|
||||
return np.abs(f)
|
||||
def __str__(self):
|
||||
return '+ve'
|
||||
|
||||
class NegativeExponent(Exponent):
|
||||
domain = _NEGATIVE
|
||||
def f(self, x):
|
||||
|
|
|
|||
|
|
@ -36,8 +36,9 @@ class NormalPrior(VariationalPrior):
|
|||
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
|
||||
|
||||
class SpikeAndSlabPrior(VariationalPrior):
|
||||
def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw):
|
||||
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
|
||||
def __init__(self, pi=None, learnPi=False, variance = 1.0, group_spike=False, name='SpikeAndSlabPrior', **kw):
|
||||
super(SpikeAndSlabPrior, self).__init__(name=name, **kw)
|
||||
self.group_spike = group_spike
|
||||
self.variance = Param('variance',variance)
|
||||
self.learnPi = learnPi
|
||||
if learnPi:
|
||||
|
|
@ -50,7 +51,10 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
def KL_divergence(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.gamma.values
|
||||
if self.group_spike:
|
||||
gamma = variational_posterior.gamma.values[0]
|
||||
else:
|
||||
gamma = variational_posterior.gamma.values
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
|
|
@ -65,14 +69,21 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
def update_gradients_KL(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.gamma.values
|
||||
if self.group_spike:
|
||||
gamma = variational_posterior.gamma.values[0]
|
||||
else:
|
||||
gamma = variational_posterior.gamma.values
|
||||
if len(self.pi.shape)==2:
|
||||
idx = np.unique(variational_posterior.gamma._raveled_index()/gamma.shape[-1])
|
||||
pi = self.pi[idx]
|
||||
else:
|
||||
pi = self.pi
|
||||
|
||||
variational_posterior.binary_prob.gradient -= np.log((1-pi)/pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
if self.group_spike:
|
||||
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))/variational_posterior.num_data
|
||||
else:
|
||||
dgamma = np.log((1-pi)/pi*gamma/(1.-gamma))
|
||||
variational_posterior.binary_prob.gradient -= dgamma+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
|
||||
mu.gradient -= gamma*mu/self.variance
|
||||
S.gradient -= (1./self.variance - 1./S) * gamma /2.
|
||||
if self.learnPi:
|
||||
|
|
@ -154,13 +165,31 @@ class SpikeAndSlabPosterior(VariationalPosterior):
|
|||
'''
|
||||
The SpikeAndSlab distribution for variational approximations.
|
||||
'''
|
||||
def __init__(self, means, variances, binary_prob, name='latent space'):
|
||||
def __init__(self, means, variances, binary_prob, group_spike=False, sharedX=False, name='latent space'):
|
||||
"""
|
||||
binary_prob : the probability of the distribution on the slab part.
|
||||
"""
|
||||
super(SpikeAndSlabPosterior, self).__init__(means, variances, name)
|
||||
self.gamma = Param("binary_prob",binary_prob,Logistic(0.,1.))
|
||||
self.link_parameter(self.gamma)
|
||||
self.group_spike = group_spike
|
||||
self.sharedX = sharedX
|
||||
if sharedX:
|
||||
self.mean.fix(warning=False)
|
||||
self.variance.fix(warning=False)
|
||||
if group_spike:
|
||||
self.gamma_group = Param("binary_prob_group",binary_prob.mean(axis=0),Logistic(1e-10,1.-1e-10))
|
||||
self.gamma = Param("binary_prob",binary_prob, __fixed__)
|
||||
self.link_parameters(self.gamma_group,self.gamma)
|
||||
else:
|
||||
self.gamma = Param("binary_prob",binary_prob,Logistic(1e-10,1.-1e-10))
|
||||
self.link_parameter(self.gamma)
|
||||
|
||||
def propogate_val(self):
|
||||
if self.group_spike:
|
||||
self.gamma.values[:] = self.gamma_group.values
|
||||
|
||||
def collate_gradient(self):
|
||||
if self.group_spike:
|
||||
self.gamma_group.gradient = self.gamma.gradient.reshape(self.gamma.shape).sum(axis=0)
|
||||
|
||||
def set_gradients(self, grad):
|
||||
self.mean.gradient, self.variance.gradient, self.gamma.gradient = grad
|
||||
|
|
@ -179,15 +208,15 @@ class SpikeAndSlabPosterior(VariationalPosterior):
|
|||
n.parameters[dc['variance']._parent_index_] = dc['variance']
|
||||
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
|
||||
n._gradient_array_ = None
|
||||
oversize = self.size - self.mean.size - self.variance.size
|
||||
n.size = n.mean.size + n.variance.size + oversize
|
||||
oversize = self.size - self.mean.size - self.variance.size - self.gamma.size
|
||||
n.size = n.mean.size + n.variance.size + n.gamma.size + oversize
|
||||
n.ndim = n.mean.ndim
|
||||
n.shape = n.mean.shape
|
||||
n.num_data = n.mean.shape[0]
|
||||
n.input_dim = n.mean.shape[1] if n.ndim != 1 else 1
|
||||
return n
|
||||
else:
|
||||
return super(VariationalPrior, self).__getitem__(s)
|
||||
return super(SpikeAndSlabPosterior, self).__getitem__(s)
|
||||
|
||||
def plot(self, *args, **kwargs):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -132,14 +132,14 @@ class SparseGP(GP):
|
|||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = Kxx - np.dot(Kx.T, np.dot(self.posterior.woodbury_inv, Kx))
|
||||
elif self.posterior.woodbury_inv.ndim == 3:
|
||||
var = Kxx[:,:,None] - np.tensordot(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx).T, Kx, [1,0]).swapaxes(1,2)
|
||||
var = np.empty((Kxx.shape[0],Kxx.shape[1],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[2]):
|
||||
var[:, :, i] = (Kxx - mdot(Kx.T, self.posterior.woodbury_inv[:, :, i], Kx))
|
||||
var = var
|
||||
else:
|
||||
Kxx = kern.Kdiag(Xnew)
|
||||
if self.posterior.woodbury_inv.ndim == 2:
|
||||
var = (Kxx - np.sum(np.dot(np.atleast_3d(self.posterior.woodbury_inv).T, Kx) * Kx[None,:,:], 1)).T
|
||||
#For plot_latent, the below code doesn't work!
|
||||
#var = Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0)
|
||||
var = (Kxx - np.sum(np.dot(self.posterior.woodbury_inv.T, Kx) * Kx, 0))[:,None]
|
||||
elif self.posterior.woodbury_inv.ndim == 3:
|
||||
var = np.empty((Kxx.shape[0],self.posterior.woodbury_inv.shape[2]))
|
||||
for i in range(var.shape[1]):
|
||||
|
|
@ -149,9 +149,9 @@ class SparseGP(GP):
|
|||
if self.mean_function is not None:
|
||||
mu += self.mean_function.f(Xnew)
|
||||
else:
|
||||
psi0_star = self.kern.psi0(self.Z, Xnew)
|
||||
psi1_star = self.kern.psi1(self.Z, Xnew)
|
||||
#psi2_star = self.kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
||||
psi0_star = kern.psi0(self.Z, Xnew)
|
||||
psi1_star = kern.psi1(self.Z, Xnew)
|
||||
#psi2_star = kern.psi2(self.Z, Xnew) # Only possible if we get NxMxM psi2 out of the code.
|
||||
la = self.posterior.woodbury_vector
|
||||
mu = np.dot(psi1_star, la) # TODO: dimensions?
|
||||
|
||||
|
|
@ -163,7 +163,7 @@ class SparseGP(GP):
|
|||
|
||||
for i in range(Xnew.shape[0]):
|
||||
_mu, _var = Xnew.mean.values[[i]], Xnew.variance.values[[i]]
|
||||
psi2_star = self.kern.psi2(self.Z, NormalPosterior(_mu, _var))
|
||||
psi2_star = kern.psi2(self.Z, NormalPosterior(_mu, _var))
|
||||
tmp = (psi2_star[:, :] - psi1_star[[i]].T.dot(psi1_star[[i]]))
|
||||
|
||||
var_ = mdot(la.T, tmp, la)
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ from ..inference.latent_function_inference import SVGP as svgp_inf
|
|||
|
||||
|
||||
class SVGP(SparseGP):
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None):
|
||||
def __init__(self, X, Y, Z, kernel, likelihood, mean_function=None, name='SVGP', Y_metadata=None, batchsize=None, num_latent_functions=None):
|
||||
"""
|
||||
Stochastic Variational GP.
|
||||
|
||||
|
|
@ -41,8 +41,12 @@ class SVGP(SparseGP):
|
|||
SparseGP.__init__(self, X_batch, Y_batch, Z, kernel, likelihood, mean_function=mean_function, inference_method=inf_method,
|
||||
name=name, Y_metadata=Y_metadata, normalizer=False)
|
||||
|
||||
self.m = Param('q_u_mean', np.zeros((self.num_inducing, Y.shape[1])))
|
||||
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[:,:,None], (1,1,Y.shape[1])))
|
||||
#assume the number of latent functions is one per col of Y unless specified
|
||||
if num_latent_functions is None:
|
||||
num_latent_functions = Y.shape[1]
|
||||
|
||||
self.m = Param('q_u_mean', np.zeros((self.num_inducing, num_latent_functions)))
|
||||
chol = choleskies.triang_to_flat(np.tile(np.eye(self.num_inducing)[None,:,:], (num_latent_functions, 1,1)))
|
||||
self.chol = Param('q_u_chol', chol)
|
||||
self.link_parameter(self.chol)
|
||||
self.link_parameter(self.m)
|
||||
|
|
|
|||
|
|
@ -5,9 +5,10 @@ from __future__ import print_function
|
|||
import numpy as np
|
||||
import sys
|
||||
import time
|
||||
import datetime
|
||||
|
||||
def exponents(fnow, current_grad):
|
||||
exps = [np.abs(np.float(fnow)), current_grad]
|
||||
exps = [np.abs(np.float(fnow)), 1 if current_grad is np.nan else current_grad]
|
||||
return np.sign(exps) * np.log10(exps).astype(int)
|
||||
|
||||
class VerboseOptimization(object):
|
||||
|
|
@ -23,6 +24,7 @@ class VerboseOptimization(object):
|
|||
self.model.add_observer(self, self.print_status)
|
||||
self.status = 'running'
|
||||
self.clear = clear_after_finish
|
||||
self.deltat = .2
|
||||
|
||||
self.update()
|
||||
|
||||
|
|
@ -44,25 +46,25 @@ class VerboseOptimization(object):
|
|||
self.hor_align = FlexBox(children = [left_col, right_col], width='100%', orientation='horizontal')
|
||||
|
||||
display(self.hor_align)
|
||||
|
||||
|
||||
try:
|
||||
self.text.set_css('width', '100%')
|
||||
left_col.set_css({
|
||||
'padding': '2px',
|
||||
'width': "100%",
|
||||
})
|
||||
|
||||
|
||||
right_col.set_css({
|
||||
'padding': '2px',
|
||||
})
|
||||
|
||||
|
||||
self.hor_align.set_css({
|
||||
'width': "100%",
|
||||
})
|
||||
|
||||
self.hor_align.remove_class('vbox')
|
||||
self.hor_align.add_class('hbox')
|
||||
|
||||
|
||||
left_col.add_class("box-flex1")
|
||||
right_col.add_class('box-flex0')
|
||||
|
||||
|
|
@ -74,16 +76,31 @@ class VerboseOptimization(object):
|
|||
else:
|
||||
self.exps = exponents(self.fnow, self.current_gradient)
|
||||
print('Running {} Code:'.format(self.opt_name))
|
||||
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "secs", mi=self.len_maxiters))
|
||||
print(' {3:7s} {0:{mi}s} {1:11s} {2:11s}'.format("i", "f", "|g|", "runtime", mi=self.len_maxiters))
|
||||
|
||||
def __enter__(self):
|
||||
self.start = time.time()
|
||||
return self
|
||||
|
||||
def print_out(self):
|
||||
def print_out(self, seconds):
|
||||
if seconds<60:
|
||||
ms = (seconds%1)*100
|
||||
self.timestring = "{s:0>2d}s{ms:0>2d}".format(s=int(seconds), ms=int(ms))
|
||||
else:
|
||||
m, s = divmod(seconds, 60)
|
||||
if m>59:
|
||||
h, m = divmod(m, 60)
|
||||
if h>23:
|
||||
d, h = divmod(h, 24)
|
||||
self.timestring = '{d:0>2d}d{h:0>2d}h{m:0>2d}'.format(m=int(m), h=int(h), d=int(d))
|
||||
else:
|
||||
self.timestring = '{h:0>2d}h{m:0>2d}m{s:0>2d}'.format(m=int(m), s=int(s), h=int(h))
|
||||
else:
|
||||
ms = (seconds%1)*100
|
||||
self.timestring = '{m:0>2d}m{s:0>2d}s{ms:0>2d}'.format(m=int(m), s=int(s), ms=int(ms))
|
||||
if self.ipython_notebook:
|
||||
names_vals = [['optimizer', "{:s}".format(self.opt_name)],
|
||||
['runtime [s]', "{:> g}".format(time.time()-self.start)],
|
||||
['runtime', "{:>s}".format(self.timestring)],
|
||||
['evaluation', "{:>0{l}}".format(self.iteration, l=self.len_maxiters)],
|
||||
['objective', "{: > 12.3E}".format(self.fnow)],
|
||||
['||gradient||', "{: >+12.3E}".format(float(self.current_gradient))],
|
||||
|
|
@ -120,14 +137,18 @@ class VerboseOptimization(object):
|
|||
if b:
|
||||
self.exps = n_exps
|
||||
print('\r', end=' ')
|
||||
print('{3:> 7.2g} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), time.time()-self.start, mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
print('{3:} {0:>0{mi}g} {1:> 12e} {2:> 12e}'.format(self.iteration, float(self.fnow), float(self.current_gradient), "{:>8s}".format(self.timestring), mi=self.len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
sys.stdout.flush()
|
||||
|
||||
def print_status(self, me, which=None):
|
||||
self.update()
|
||||
|
||||
seconds = time.time()-self.start
|
||||
#sys.stdout.write(" "*len(self.message))
|
||||
self.print_out()
|
||||
self.deltat += seconds
|
||||
if self.deltat > .2:
|
||||
self.print_out(seconds)
|
||||
self.deltat = 0
|
||||
|
||||
self.iteration += 1
|
||||
|
||||
|
|
@ -141,17 +162,24 @@ class VerboseOptimization(object):
|
|||
|
||||
def finish(self, opt):
|
||||
self.status = opt.status
|
||||
if self.verbose and self.ipython_notebook:
|
||||
if 'conv' in self.status.lower():
|
||||
self.progress.bar_style = 'success'
|
||||
elif self.iteration >= self.maxiters:
|
||||
self.progress.bar_style = 'warning'
|
||||
else:
|
||||
self.progress.bar_style = 'danger'
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
if self.verbose:
|
||||
self.stop = time.time()
|
||||
self.model.remove_observer(self)
|
||||
self.print_out()
|
||||
self.print_out(self.stop - self.start)
|
||||
|
||||
if not self.ipython_notebook:
|
||||
print()
|
||||
print('Optimization finished in {0:.5g} Seconds'.format(self.stop-self.start))
|
||||
print('Optimization status: {0}'.format(self.status))
|
||||
print('Runtime: {}'.format("{:>9s}".format(self.timestring)))
|
||||
print('Optimization status: {0}'.format(self.status))
|
||||
print()
|
||||
elif self.clear:
|
||||
self.hor_align.close()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue