mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-06-29 15:59:41 +02:00
merge with current GPy devel
This commit is contained in:
commit
c0e6978054
164 changed files with 733 additions and 5931 deletions
|
|
@ -1,3 +1,7 @@
|
|||
from . import latent_function_inference
|
||||
from . import optimization
|
||||
from . import latent_function_inference
|
||||
from . import mcmc
|
||||
|
||||
import sys
|
||||
sys.modules['GPy.inference.optimization'] = optimization
|
||||
sys.modules['GPy.inference.optimization.optimization'] = optimization
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
def __init__(self):
|
||||
pass#self._YYTfactor_cache = caching.cache()
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, precision=None):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, K=None, precision=None, Z_tilde=None):
|
||||
"""
|
||||
Returns a Posterior class containing essential quantities of the posterior
|
||||
"""
|
||||
|
|
@ -49,9 +49,15 @@ class ExactGaussianInference(LatentFunctionInference):
|
|||
|
||||
log_marginal = 0.5*(-Y.size * log_2_pi - Y.shape[1] * W_logdet - np.sum(alpha * YYT_factor))
|
||||
|
||||
if Z_tilde is not None:
|
||||
# This is a correction term for the log marginal likelihood
|
||||
# In EP this is log Z_tilde, which is the difference between the
|
||||
# Gaussian marginal and Z_EP
|
||||
log_marginal += Z_tilde
|
||||
|
||||
dL_dK = 0.5 * (tdot(alpha) - Y.shape[1] * Wi)
|
||||
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK),Y_metadata)
|
||||
dL_dthetaL = likelihood.exact_inference_gradients(np.diag(dL_dK), Y_metadata)
|
||||
|
||||
return Posterior(woodbury_chol=LW, woodbury_vector=alpha, K=K), log_marginal, {'dL_dK':dL_dK, 'dL_dthetaL':dL_dthetaL, 'dL_dm':alpha}
|
||||
|
||||
|
|
|
|||
|
|
@ -2,14 +2,14 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
import numpy as np
|
||||
from ...util.linalg import jitchol, DSYR, dtrtrs, dtrtri
|
||||
from ...core.parameterization.observable_array import ObsAr
|
||||
from paramz import ObsAr
|
||||
from . import ExactGaussianInference, VarDTC
|
||||
from ...util import diag
|
||||
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
class EPBase(object):
|
||||
def __init__(self, epsilon=1e-6, eta=1., delta=1.):
|
||||
def __init__(self, epsilon=1e-6, eta=1., delta=1., always_reset=False):
|
||||
"""
|
||||
The expectation-propagation algorithm.
|
||||
For nomenclature see Rasmussen & Williams 2006.
|
||||
|
|
@ -20,8 +20,12 @@ class EPBase(object):
|
|||
:type eta: float64
|
||||
:param delta: damping EP updates factor.
|
||||
:type delta: float64
|
||||
:param always_reset: setting to always reset the approximation at the beginning of every inference call.
|
||||
:type always_reest: boolean
|
||||
|
||||
"""
|
||||
super(EPBase, self).__init__()
|
||||
self.always_reset = always_reset
|
||||
self.epsilon, self.eta, self.delta = epsilon, eta, delta
|
||||
self.reset()
|
||||
|
||||
|
|
@ -38,37 +42,46 @@ class EPBase(object):
|
|||
|
||||
class EP(EPBase, ExactGaussianInference):
|
||||
def inference(self, kern, X, likelihood, Y, mean_function=None, Y_metadata=None, precision=None, K=None):
|
||||
if self.always_reset:
|
||||
self.reset()
|
||||
|
||||
num_data, output_dim = Y.shape
|
||||
assert output_dim ==1, "ep in 1D only (for now!)"
|
||||
assert output_dim == 1, "ep in 1D only (for now!)"
|
||||
|
||||
if K is None:
|
||||
K = kern.K(X)
|
||||
|
||||
if self._ep_approximation is None:
|
||||
#if we don't yet have the results of runnign EP, run EP and store the computed factors in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(K, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
#if we've already run EP, just use the existing approximation stored in self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
|
||||
|
||||
return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, precision=1./tau_tilde, K=K)
|
||||
return super(EP, self).inference(kern, X, likelihood, mu_tilde[:,None], mean_function=mean_function, Y_metadata=Y_metadata, precision=1./tau_tilde, K=K, Z_tilde=np.log(Z_tilde).sum())
|
||||
|
||||
def expectation_propagation(self, K, Y, likelihood, Y_metadata):
|
||||
|
||||
num_data, data_dim = Y.shape
|
||||
assert data_dim == 1, "This EP methods only works for 1D outputs"
|
||||
|
||||
|
||||
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
|
||||
mu = np.zeros(num_data)
|
||||
Sigma = K.copy()
|
||||
diag.add(Sigma, 1e-7)
|
||||
|
||||
# Makes computing the sign quicker if we work with numpy arrays rather
|
||||
# than ObsArrays
|
||||
Y = Y.values.copy()
|
||||
|
||||
#Initial values - Marginal moments
|
||||
Z_hat = np.empty(num_data,dtype=np.float64)
|
||||
mu_hat = np.empty(num_data,dtype=np.float64)
|
||||
sigma2_hat = np.empty(num_data,dtype=np.float64)
|
||||
|
||||
tau_cav = np.empty(num_data,dtype=np.float64)
|
||||
v_cav = np.empty(num_data,dtype=np.float64)
|
||||
|
||||
#initial values - Gaussian factors
|
||||
if self.old_mutilde is None:
|
||||
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
|
||||
|
|
@ -80,22 +93,32 @@ class EP(EPBase, ExactGaussianInference):
|
|||
#Approximation
|
||||
tau_diff = self.epsilon + 1.
|
||||
v_diff = self.epsilon + 1.
|
||||
tau_tilde_old = np.nan
|
||||
v_tilde_old = np.nan
|
||||
iterations = 0
|
||||
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
|
||||
update_order = np.random.permutation(num_data)
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
tau_cav = 1./Sigma[i,i] - self.eta*tau_tilde[i]
|
||||
v_cav = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
|
||||
tau_cav[i] = 1./Sigma[i,i] - self.eta*tau_tilde[i]
|
||||
v_cav[i] = mu[i]/Sigma[i,i] - self.eta*v_tilde[i]
|
||||
if Y_metadata is not None:
|
||||
# Pick out the relavent metadata for Yi
|
||||
Y_metadata_i = {}
|
||||
for key in Y_metadata.keys():
|
||||
Y_metadata_i[key] = Y_metadata[key][i, :]
|
||||
else:
|
||||
Y_metadata_i = None
|
||||
#Marginal moments
|
||||
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
|
||||
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i], Y_metadata_i=Y_metadata_i)
|
||||
#Site parameters update
|
||||
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma[i,i])
|
||||
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma[i,i])
|
||||
tau_tilde[i] += delta_tau
|
||||
v_tilde[i] += delta_v
|
||||
#Posterior distribution parameters update
|
||||
DSYR(Sigma, Sigma[:,i].copy(), -delta_tau/(1.+ delta_tau*Sigma[i,i]))
|
||||
ci = delta_tau/(1.+ delta_tau*Sigma[i,i])
|
||||
DSYR(Sigma, Sigma[:,i].copy(), -ci)
|
||||
mu = np.dot(Sigma, v_tilde)
|
||||
|
||||
#(re) compute Sigma and mu using full Cholesky decompy
|
||||
|
|
@ -108,7 +131,7 @@ class EP(EPBase, ExactGaussianInference):
|
|||
mu = np.dot(Sigma,v_tilde)
|
||||
|
||||
#monitor convergence
|
||||
if iterations>0:
|
||||
if iterations > 0:
|
||||
tau_diff = np.mean(np.square(tau_tilde-tau_tilde_old))
|
||||
v_diff = np.mean(np.square(v_tilde-v_tilde_old))
|
||||
tau_tilde_old = tau_tilde.copy()
|
||||
|
|
@ -117,7 +140,11 @@ class EP(EPBase, ExactGaussianInference):
|
|||
iterations += 1
|
||||
|
||||
mu_tilde = v_tilde/tau_tilde
|
||||
return mu, Sigma, mu_tilde, tau_tilde, Z_hat
|
||||
mu_cav = v_cav/tau_cav
|
||||
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
|
||||
Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
|
||||
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
|
||||
return mu, Sigma, mu_tilde, tau_tilde, Z_tilde
|
||||
|
||||
class EPDTC(EPBase, VarDTC):
|
||||
def inference(self, kern, X, Z, likelihood, Y, mean_function=None, Y_metadata=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
|
||||
|
|
@ -133,16 +160,16 @@ class EPDTC(EPBase, VarDTC):
|
|||
Kmn = psi1.T
|
||||
|
||||
if self._ep_approximation is None:
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation = self.expectation_propagation(Kmm, Kmn, Y, likelihood, Y_metadata)
|
||||
else:
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_hat = self._ep_approximation
|
||||
mu, Sigma, mu_tilde, tau_tilde, Z_tilde = self._ep_approximation
|
||||
|
||||
return super(EPDTC, self).inference(kern, X, Z, likelihood, mu_tilde,
|
||||
mean_function=mean_function,
|
||||
Y_metadata=Y_metadata,
|
||||
precision=tau_tilde,
|
||||
Lm=Lm, dL_dKmm=dL_dKmm,
|
||||
psi0=psi0, psi1=psi1, psi2=psi2)
|
||||
psi0=psi0, psi1=psi1, psi2=psi2, Z_tilde=np.log(Z_tilde).sum())
|
||||
|
||||
def expectation_propagation(self, Kmm, Kmn, Y, likelihood, Y_metadata):
|
||||
num_data, output_dim = Y.shape
|
||||
|
|
@ -167,6 +194,9 @@ class EPDTC(EPBase, VarDTC):
|
|||
mu_hat = np.zeros(num_data,dtype=np.float64)
|
||||
sigma2_hat = np.zeros(num_data,dtype=np.float64)
|
||||
|
||||
tau_cav = np.empty(num_data,dtype=np.float64)
|
||||
v_cav = np.empty(num_data,dtype=np.float64)
|
||||
|
||||
#initial values - Gaussian factors
|
||||
if self.old_mutilde is None:
|
||||
tau_tilde, mu_tilde, v_tilde = np.zeros((3, num_data))
|
||||
|
|
@ -186,10 +216,10 @@ class EPDTC(EPBase, VarDTC):
|
|||
while (tau_diff > self.epsilon) or (v_diff > self.epsilon):
|
||||
for i in update_order:
|
||||
#Cavity distribution parameters
|
||||
tau_cav = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
|
||||
v_cav = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
|
||||
tau_cav[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i]
|
||||
v_cav[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i]
|
||||
#Marginal moments
|
||||
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav, v_cav)#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
|
||||
Z_hat[i], mu_hat[i], sigma2_hat[i] = likelihood.moments_match_ep(Y[i], tau_cav[i], v_cav[i])#, Y_metadata=None)#=(None if Y_metadata is None else Y_metadata[i]))
|
||||
#Site parameters update
|
||||
delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
|
||||
delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
|
||||
|
|
@ -233,5 +263,8 @@ class EPDTC(EPBase, VarDTC):
|
|||
iterations += 1
|
||||
|
||||
mu_tilde = v_tilde/tau_tilde
|
||||
return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_hat
|
||||
|
||||
mu_cav = v_cav/tau_cav
|
||||
sigma2_sigma2tilde = 1./tau_cav + 1./tau_tilde
|
||||
Z_tilde = np.exp(np.log(Z_hat) + 0.5*np.log(2*np.pi) + 0.5*np.log(sigma2_sigma2tilde)
|
||||
+ 0.5*((mu_cav - mu_tilde)**2) / (sigma2_sigma2tilde))
|
||||
return mu, Sigma, ObsAr(mu_tilde[:,None]), tau_tilde, Z_tilde
|
||||
|
|
|
|||
|
|
@ -3,9 +3,8 @@
|
|||
|
||||
import numpy as np
|
||||
from ...core import Model
|
||||
from ...core.parameterization import variational
|
||||
from GPy.core.parameterization import variational
|
||||
from ...util.linalg import tdot
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
|
||||
def infer_newX(model, Y_new, optimize=True, init='L2'):
|
||||
"""
|
||||
|
|
@ -62,14 +61,12 @@ class InferenceX(Model):
|
|||
# self.kern.GPU(True)
|
||||
from copy import deepcopy
|
||||
self.posterior = deepcopy(model.posterior)
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
if isinstance(model.X, variational.VariationalPosterior):
|
||||
self.uncertain_input = True
|
||||
from ...models.ss_gplvm import IBPPrior
|
||||
from ...models.ss_mrd import IBPPrior_SSMRD
|
||||
if isinstance(model.variational_prior, IBPPrior) or isinstance(model.variational_prior, IBPPrior_SSMRD):
|
||||
from ...core.parameterization.variational import SpikeAndSlabPrior
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
|
||||
self.variational_prior = variational.SpikeAndSlabPrior(pi=0.5, learnPi=False, group_spike=False)
|
||||
else:
|
||||
self.variational_prior = model.variational_prior.copy()
|
||||
else:
|
||||
|
|
@ -105,17 +102,16 @@ class InferenceX(Model):
|
|||
idx = dist.argmin(axis=1)
|
||||
|
||||
from ...models import SSGPLVM
|
||||
from ...util.misc import param_to_array
|
||||
if isinstance(model, SSGPLVM):
|
||||
X = variational.SpikeAndSlabPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]), param_to_array(model.X.gamma[idx]))
|
||||
X = variational.SpikeAndSlabPosterior((model.X.mean[idx].values), (model.X.variance[idx].values), (model.X.gamma[idx].values))
|
||||
if model.group_spike:
|
||||
X.gamma.fix()
|
||||
else:
|
||||
if self.uncertain_input and self.sparse_gp:
|
||||
X = variational.NormalPosterior(param_to_array(model.X.mean[idx]), param_to_array(model.X.variance[idx]))
|
||||
X = variational.NormalPosterior((model.X.mean[idx].values), (model.X.variance[idx].values))
|
||||
else:
|
||||
from ...core import Param
|
||||
X = Param('latent mean',param_to_array(model.X[idx]).copy())
|
||||
X = Param('latent mean',(model.X[idx].values).copy())
|
||||
|
||||
return X
|
||||
|
||||
|
|
@ -160,8 +156,7 @@ class InferenceX(Model):
|
|||
self.X.gradient = X_grad
|
||||
|
||||
if self.uncertain_input:
|
||||
from ...core.parameterization.variational import SpikeAndSlabPrior
|
||||
if isinstance(self.variational_prior, SpikeAndSlabPrior):
|
||||
if isinstance(self.variational_prior, variational.SpikeAndSlabPrior):
|
||||
# Update Log-likelihood
|
||||
KL_div = self.variational_prior.KL_divergence(self.X)
|
||||
# update for the KL divergence
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
from .posterior import Posterior
|
||||
from ...util.linalg import mdot, jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri, dpotri, dpotrs, symmetrify
|
||||
from ...util import diag
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
|
@ -23,8 +23,7 @@ class VarDTC(LatentFunctionInference):
|
|||
"""
|
||||
const_jitter = 1e-8
|
||||
def __init__(self, limit=1):
|
||||
#self._YYTfactor_cache = caching.cache()
|
||||
from ...util.caching import Cacher
|
||||
from paramz.caching import Cacher
|
||||
self.limit = limit
|
||||
self.get_trYYT = Cacher(self._get_trYYT, limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
|
||||
|
|
@ -45,7 +44,7 @@ class VarDTC(LatentFunctionInference):
|
|||
def __setstate__(self, state):
|
||||
# has to be overridden, as Cacher objects cannot be pickled.
|
||||
self.limit = state
|
||||
from ...util.caching import Cacher
|
||||
from paramz.caching import Cacher
|
||||
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
|
||||
|
||||
|
|
@ -64,7 +63,7 @@ class VarDTC(LatentFunctionInference):
|
|||
def get_VVTfactor(self, Y, prec):
|
||||
return Y * prec # TODO chache this, and make it effective
|
||||
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, mean_function=None, precision=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None):
|
||||
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None, mean_function=None, precision=None, Lm=None, dL_dKmm=None, psi0=None, psi1=None, psi2=None, Z_tilde=None):
|
||||
assert mean_function is None, "inference with a mean function not implemented"
|
||||
|
||||
num_data, output_dim = Y.shape
|
||||
|
|
@ -152,6 +151,12 @@ class VarDTC(LatentFunctionInference):
|
|||
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, precision, het_noise,
|
||||
psi0, A, LB, trYYT, data_fit, Y)
|
||||
|
||||
if Z_tilde is not None:
|
||||
# This is a correction term for the log marginal likelihood
|
||||
# In EP this is log Z_tilde, which is the difference between the
|
||||
# Gaussian marginal and Z_EP
|
||||
log_marginal += Z_tilde
|
||||
|
||||
#noise derivatives
|
||||
dL_dR = _compute_dL_dR(likelihood,
|
||||
het_noise, uncertain_inputs, LB,
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
from .posterior import Posterior
|
||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs, dtrtri,pdinv
|
||||
from ...util import diag
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
from GPy.core.parameterization.variational import VariationalPosterior
|
||||
import numpy as np
|
||||
from . import LatentFunctionInference
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
|
@ -28,7 +28,7 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
self.limit = limit
|
||||
|
||||
# Cache functions
|
||||
from ...util.caching import Cacher
|
||||
from paramz.caching import Cacher
|
||||
self.get_trYYT = Cacher(self._get_trYYT, limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
|
||||
|
||||
|
|
@ -46,7 +46,7 @@ class VarDTC_minibatch(LatentFunctionInference):
|
|||
self.mpi_comm = None
|
||||
self.midRes = {}
|
||||
self.batch_pos = 0
|
||||
from ...util.caching import Cacher
|
||||
from paramz.caching import Cacher
|
||||
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
|
||||
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,2 +1,5 @@
|
|||
from .scg import SCG
|
||||
from .optimization import *
|
||||
from paramz.optimization import stochastics, Optimizer
|
||||
from paramz.optimization import *
|
||||
import sys
|
||||
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
|
||||
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer
|
||||
|
|
@ -1,285 +0,0 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from .gradient_descent_update_rules import FletcherReeves, \
|
||||
PolakRibiere
|
||||
from Queue import Empty
|
||||
from multiprocessing import Value
|
||||
from multiprocessing.queues import Queue
|
||||
from multiprocessing.synchronize import Event
|
||||
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
|
||||
from threading import Thread
|
||||
import numpy
|
||||
import sys
|
||||
import time
|
||||
|
||||
RUNNING = "running"
|
||||
CONVERGED = "converged"
|
||||
MAXITER = "maximum number of iterations reached"
|
||||
MAX_F_EVAL = "maximum number of function calls reached"
|
||||
LINE_SEARCH = "line search failed"
|
||||
KBINTERRUPT = "interrupted"
|
||||
|
||||
class _Async_Optimization(Thread):
|
||||
|
||||
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
|
||||
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
|
||||
gtol=1e-6, outqueue=None, *args, **kw):
|
||||
"""
|
||||
Helper Process class for async optimization
|
||||
|
||||
f_call and df_call are Multiprocessing Values, for synchronized assignment
|
||||
"""
|
||||
self.f_call = Value('i', 0)
|
||||
self.df_call = Value('i', 0)
|
||||
self.f = self.f_wrapper(f, self.f_call)
|
||||
self.df = self.f_wrapper(df, self.df_call)
|
||||
self.x0 = x0
|
||||
self.update_rule = update_rule
|
||||
self.report_every = report_every
|
||||
self.messages = messages
|
||||
self.maxiter = maxiter
|
||||
self.max_f_eval = max_f_eval
|
||||
self.gtol = gtol
|
||||
self.SENTINEL = SENTINEL
|
||||
self.runsignal = runsignal
|
||||
# self.parent = parent
|
||||
# self.result = None
|
||||
self.outq = outqueue
|
||||
super(_Async_Optimization, self).__init__(target=self.run,
|
||||
name="CG Optimization",
|
||||
*args, **kw)
|
||||
|
||||
# def __enter__(self):
|
||||
# return self
|
||||
#
|
||||
# def __exit__(self, type, value, traceback):
|
||||
# return isinstance(value, TypeError)
|
||||
|
||||
def f_wrapper(self, f, counter):
|
||||
def f_w(*a, **kw):
|
||||
counter.value += 1
|
||||
return f(*a, **kw)
|
||||
return f_w
|
||||
|
||||
def callback(self, *a):
|
||||
if self.outq is not None:
|
||||
self.outq.put(a)
|
||||
# self.parent and self.parent.callback(*a, **kw)
|
||||
pass
|
||||
# print "callback done"
|
||||
|
||||
def callback_return(self, *a):
|
||||
self.callback(*a)
|
||||
if self.outq is not None:
|
||||
self.outq.put(self.SENTINEL)
|
||||
if self.messages:
|
||||
print("")
|
||||
self.runsignal.clear()
|
||||
|
||||
def run(self, *args, **kwargs):
|
||||
raise NotImplementedError("Overwrite this with optimization (for async use)")
|
||||
pass
|
||||
|
||||
class _CGDAsync(_Async_Optimization):
|
||||
|
||||
def reset(self, xi, *a, **kw):
|
||||
gi = -self.df(xi, *a, **kw)
|
||||
si = gi
|
||||
ur = self.update_rule(gi)
|
||||
return gi, ur, si
|
||||
|
||||
def run(self, *a, **kw):
|
||||
status = RUNNING
|
||||
|
||||
fi = self.f(self.x0)
|
||||
fi_old = fi + 5000
|
||||
|
||||
gi, ur, si = self.reset(self.x0, *a, **kw)
|
||||
xi = self.x0
|
||||
xi_old = numpy.nan
|
||||
it = 0
|
||||
|
||||
while it < self.maxiter:
|
||||
if not self.runsignal.is_set():
|
||||
break
|
||||
|
||||
if self.f_call.value > self.max_f_eval:
|
||||
status = MAX_F_EVAL
|
||||
|
||||
gi = -self.df(xi, *a, **kw)
|
||||
if numpy.dot(gi.T, gi) <= self.gtol:
|
||||
status = CONVERGED
|
||||
break
|
||||
if numpy.isnan(numpy.dot(gi.T, gi)):
|
||||
if numpy.any(numpy.isnan(xi_old)):
|
||||
status = CONVERGED
|
||||
break
|
||||
self.reset(xi_old)
|
||||
|
||||
gammai = ur(gi)
|
||||
if gammai < 1e-6 or it % xi.shape[0] == 0:
|
||||
gi, ur, si = self.reset(xi, *a, **kw)
|
||||
si = gi + gammai * si
|
||||
alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f,
|
||||
self.df,
|
||||
xi,
|
||||
si, gi,
|
||||
fi, fi_old)
|
||||
if alphai is None:
|
||||
alphai, _, _, fi2, fi_old2, gfi = \
|
||||
line_search_wolfe2(self.f, self.df,
|
||||
xi, si, gi,
|
||||
fi, fi_old)
|
||||
if alphai is None:
|
||||
# This line search also failed to find a better solution.
|
||||
status = LINE_SEARCH
|
||||
break
|
||||
if fi2 < fi:
|
||||
fi, fi_old = fi2, fi_old2
|
||||
if gfi is not None:
|
||||
gi = gfi
|
||||
|
||||
if numpy.isnan(fi) or fi_old < fi:
|
||||
gi, ur, si = self.reset(xi, *a, **kw)
|
||||
|
||||
else:
|
||||
xi += numpy.dot(alphai, si)
|
||||
if self.messages:
|
||||
sys.stdout.write("\r")
|
||||
sys.stdout.flush()
|
||||
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
|
||||
|
||||
if it % self.report_every == 0:
|
||||
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||
it += 1
|
||||
else:
|
||||
status = MAXITER
|
||||
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
|
||||
|
||||
class Async_Optimize(object):
|
||||
callback = lambda *x: None
|
||||
runsignal = Event()
|
||||
SENTINEL = "SENTINEL"
|
||||
|
||||
def async_callback_collect(self, q):
|
||||
while self.runsignal.is_set():
|
||||
try:
|
||||
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
|
||||
self.callback(*ret)
|
||||
self.runsignal.clear()
|
||||
except Empty:
|
||||
pass
|
||||
|
||||
def opt_async(self, f, df, x0, callback, update_rule=PolakRibiere,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, *args, **kwargs):
|
||||
self.runsignal.set()
|
||||
c = None
|
||||
outqueue = None
|
||||
if callback:
|
||||
outqueue = Queue()
|
||||
self.callback = callback
|
||||
c = Thread(target=self.async_callback_collect, args=(outqueue,))
|
||||
c.start()
|
||||
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
|
||||
report_every=report_every, messages=messages, maxiter=maxiter,
|
||||
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
|
||||
p.start()
|
||||
return p, c
|
||||
|
||||
def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, *args, **kwargs):
|
||||
p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
|
||||
maxiter, max_f_eval, gtol,
|
||||
report_every, *args, **kwargs)
|
||||
while self.runsignal.is_set():
|
||||
try:
|
||||
p.join(1)
|
||||
if c: c.join(1)
|
||||
except KeyboardInterrupt:
|
||||
# print "^C"
|
||||
self.runsignal.clear()
|
||||
p.join()
|
||||
if c: c.join()
|
||||
if c and c.is_alive():
|
||||
# self.runsignal.set()
|
||||
# while self.runsignal.is_set():
|
||||
# try:
|
||||
# c.join(.1)
|
||||
# except KeyboardInterrupt:
|
||||
# # print "^C"
|
||||
# self.runsignal.clear()
|
||||
# c.join()
|
||||
print("WARNING: callback still running, optimisation done!")
|
||||
return p.result
|
||||
|
||||
class CGD(Async_Optimize):
|
||||
'''
|
||||
Conjugate gradient descent algorithm to minimize
|
||||
function f with gradients df, starting at x0
|
||||
with update rule update_rule
|
||||
|
||||
if df returns tuple (grad, natgrad) it will optimize according
|
||||
to natural gradient rules
|
||||
'''
|
||||
opt_name = "Conjugate Gradient Descent"
|
||||
|
||||
def opt_async(self, *a, **kw):
|
||||
"""
|
||||
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, \*args, \*\*kwargs)
|
||||
|
||||
callback gets called every `report_every` iterations
|
||||
|
||||
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||
|
||||
if df returns tuple (grad, natgrad) it will optimize according
|
||||
to natural gradient rules
|
||||
|
||||
f, and df will be called with
|
||||
|
||||
f(xi, \*args, \*\*kwargs)
|
||||
df(xi, \*args, \*\*kwargs)
|
||||
|
||||
**Returns:**
|
||||
|
||||
Started `Process` object, optimizing asynchronously
|
||||
|
||||
**Calls:**
|
||||
|
||||
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
|
||||
|
||||
at end of optimization!
|
||||
"""
|
||||
return super(CGD, self).opt_async(*a, **kw)
|
||||
|
||||
def opt(self, *a, **kw):
|
||||
"""
|
||||
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
||||
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||
report_every=10, \*args, \*\*kwargs)
|
||||
|
||||
Minimize f, calling callback every `report_every` iterations with following syntax:
|
||||
|
||||
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||
|
||||
if df returns tuple (grad, natgrad) it will optimize according
|
||||
to natural gradient rules
|
||||
|
||||
f, and df will be called with
|
||||
|
||||
f(xi, \*args, \*\*kwargs)
|
||||
df(xi, \*args, \*\*kwargs)
|
||||
|
||||
**returns**
|
||||
|
||||
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
|
||||
|
||||
at end of optimization
|
||||
"""
|
||||
return super(CGD, self).opt(*a, **kw)
|
||||
|
||||
|
|
@ -1,51 +0,0 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
import numpy
|
||||
|
||||
class GDUpdateRule():
|
||||
_gradnat = None
|
||||
_gradnatold = None
|
||||
def __init__(self, initgrad, initgradnat=None):
|
||||
self.grad = initgrad
|
||||
if initgradnat:
|
||||
self.gradnat = initgradnat
|
||||
else:
|
||||
self.gradnat = initgrad
|
||||
# self.grad, self.gradnat
|
||||
def _gamma(self):
|
||||
raise NotImplemented("""Implement gamma update rule here,
|
||||
you can use self.grad and self.gradold for parameters, as well as
|
||||
self.gradnat and self.gradnatold for natural gradients.""")
|
||||
def __call__(self, grad, gradnat=None, si=None, *args, **kw):
|
||||
"""
|
||||
Return gamma for given gradients and optional natural gradients
|
||||
"""
|
||||
if not gradnat:
|
||||
gradnat = grad
|
||||
self.gradold = self.grad
|
||||
self.gradnatold = self.gradnat
|
||||
self.grad = grad
|
||||
self.gradnat = gradnat
|
||||
self.si = si
|
||||
return self._gamma(*args, **kw)
|
||||
|
||||
class FletcherReeves(GDUpdateRule):
|
||||
'''
|
||||
Fletcher Reeves update rule for gamma
|
||||
'''
|
||||
def _gamma(self, *a, **kw):
|
||||
tmp = numpy.dot(self.grad.T, self.gradnat)
|
||||
if tmp:
|
||||
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
||||
return tmp
|
||||
|
||||
class PolakRibiere(GDUpdateRule):
|
||||
'''
|
||||
Fletcher Reeves update rule for gamma
|
||||
'''
|
||||
def _gamma(self, *a, **kw):
|
||||
tmp = numpy.dot((self.grad - self.gradold).T, self.gradnat)
|
||||
if tmp:
|
||||
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
||||
return tmp
|
||||
|
|
@ -1,193 +0,0 @@
|
|||
# Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2014)
|
||||
|
||||
# Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
||||
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
|
||||
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
||||
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
# LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
# POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
import sys
|
||||
|
||||
def print_out(len_maxiters, fnow, current_grad, beta, iteration):
|
||||
print('\r', end=' ')
|
||||
print('{0:>0{mi}g} {1:> 12e} {2:< 12.6e} {3:> 12e}'.format(iteration, float(fnow), float(beta), float(current_grad), mi=len_maxiters), end=' ') # print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||
sys.stdout.flush()
|
||||
|
||||
def exponents(fnow, current_grad):
|
||||
exps = [np.abs(np.float(fnow)), current_grad]
|
||||
return np.sign(exps) * np.log10(exps).astype(int)
|
||||
|
||||
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=np.inf, display=True, xtol=None, ftol=None, gtol=None):
|
||||
"""
|
||||
Optimisation through Scaled Conjugate Gradients (SCG)
|
||||
|
||||
f: the objective function
|
||||
gradf : the gradient function (should return a 1D np.ndarray)
|
||||
x : the initial condition
|
||||
|
||||
Returns
|
||||
x the optimal value for x
|
||||
flog : a list of all the objective values
|
||||
function_eval number of fn evaluations
|
||||
status: string describing convergence status
|
||||
"""
|
||||
if xtol is None:
|
||||
xtol = 1e-6
|
||||
if ftol is None:
|
||||
ftol = 1e-6
|
||||
if gtol is None:
|
||||
gtol = 1e-5
|
||||
|
||||
sigma0 = 1.0e-7
|
||||
fold = f(x, *optargs) # Initial function value.
|
||||
function_eval = 1
|
||||
fnow = fold
|
||||
gradnew = gradf(x, *optargs) # Initial gradient.
|
||||
function_eval += 1
|
||||
#if any(np.isnan(gradnew)):
|
||||
# raise UnexpectedInfOrNan, "Gradient contribution resulted in a NaN value"
|
||||
current_grad = np.dot(gradnew, gradnew)
|
||||
gradold = gradnew.copy()
|
||||
d = -gradnew # Initial search direction.
|
||||
success = True # Force calculation of directional derivs.
|
||||
nsuccess = 0 # nsuccess counts number of successes.
|
||||
beta = 1.0 # Initial scale parameter.
|
||||
betamin = 1.0e-15 # Lower bound on scale.
|
||||
betamax = 1.0e15 # Upper bound on scale.
|
||||
status = "Not converged"
|
||||
|
||||
flog = [fold]
|
||||
|
||||
iteration = 0
|
||||
|
||||
len_maxiters = len(str(maxiters))
|
||||
if display:
|
||||
print(' {0:{mi}s} {1:11s} {2:11s} {3:11s}'.format("I", "F", "Scale", "|g|", mi=len_maxiters))
|
||||
exps = exponents(fnow, current_grad)
|
||||
p_iter = iteration
|
||||
|
||||
# Main optimization loop.
|
||||
while iteration < maxiters:
|
||||
|
||||
# Calculate first and second directional derivatives.
|
||||
if success:
|
||||
mu = np.dot(d, gradnew)
|
||||
if mu >= 0:
|
||||
d = -gradnew
|
||||
mu = np.dot(d, gradnew)
|
||||
kappa = np.dot(d, d)
|
||||
sigma = sigma0 / np.sqrt(kappa)
|
||||
xplus = x + sigma * d
|
||||
gplus = gradf(xplus, *optargs)
|
||||
function_eval += 1
|
||||
theta = np.dot(d, (gplus - gradnew)) / sigma
|
||||
|
||||
# Increase effective curvature and evaluate step size alpha.
|
||||
delta = theta + beta * kappa
|
||||
if delta <= 0:
|
||||
delta = beta * kappa
|
||||
beta = beta - theta / kappa
|
||||
|
||||
alpha = -mu / delta
|
||||
|
||||
# Calculate the comparison ratio.
|
||||
xnew = x + alpha * d
|
||||
fnew = f(xnew, *optargs)
|
||||
function_eval += 1
|
||||
|
||||
if function_eval >= max_f_eval:
|
||||
status = "maximum number of function evaluations exceeded"
|
||||
break
|
||||
return x, flog, function_eval, status
|
||||
|
||||
Delta = 2.*(fnew - fold) / (alpha * mu)
|
||||
if Delta >= 0.:
|
||||
success = True
|
||||
nsuccess += 1
|
||||
x = xnew
|
||||
fnow = fnew
|
||||
else:
|
||||
success = False
|
||||
fnow = fold
|
||||
|
||||
# Store relevant variables
|
||||
flog.append(fnow) # Current function value
|
||||
|
||||
iteration += 1
|
||||
if display:
|
||||
print_out(len_maxiters, fnow, current_grad, beta, iteration)
|
||||
n_exps = exponents(fnow, current_grad)
|
||||
if iteration - p_iter >= 20 * np.random.rand():
|
||||
a = iteration >= p_iter * 2.78
|
||||
b = np.any(n_exps < exps)
|
||||
if a or b:
|
||||
p_iter = iteration
|
||||
print('')
|
||||
if b:
|
||||
exps = n_exps
|
||||
|
||||
if success:
|
||||
# Test for termination
|
||||
|
||||
if (np.abs(fnew - fold) < ftol):
|
||||
status = 'converged - relative reduction in objective'
|
||||
break
|
||||
# return x, flog, function_eval, status
|
||||
elif (np.max(np.abs(alpha * d)) < xtol):
|
||||
status = 'converged - relative stepsize'
|
||||
break
|
||||
else:
|
||||
# Update variables for new position
|
||||
gradold = gradnew
|
||||
gradnew = gradf(x, *optargs)
|
||||
function_eval += 1
|
||||
current_grad = np.dot(gradnew, gradnew)
|
||||
fold = fnew
|
||||
# If the gradient is zero then we are done.
|
||||
if current_grad <= gtol:
|
||||
status = 'converged - relative reduction in gradient'
|
||||
break
|
||||
# return x, flog, function_eval, status
|
||||
|
||||
# Adjust beta according to comparison ratio.
|
||||
if Delta < 0.25:
|
||||
beta = min(4.0 * beta, betamax)
|
||||
if Delta > 0.75:
|
||||
beta = max(0.25 * beta, betamin)
|
||||
|
||||
# Update search direction using Polak-Ribiere formula, or re-start
|
||||
# in direction of negative gradient after nparams steps.
|
||||
if nsuccess == x.size:
|
||||
d = -gradnew
|
||||
beta = 1. # This is not in the original paper
|
||||
nsuccess = 0
|
||||
elif success:
|
||||
Gamma = np.dot(gradold - gradnew, gradnew) / (mu)
|
||||
d = Gamma * d - gradnew
|
||||
else:
|
||||
# If we get here, then we haven't terminated in the given number of
|
||||
# iterations.
|
||||
status = "maxiter exceeded"
|
||||
|
||||
if display:
|
||||
print_out(len_maxiters, fnow, current_grad, beta, iteration)
|
||||
print("")
|
||||
print(status)
|
||||
return x, flog, function_eval, status
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
# Copyright (c) 2012-2014, Max Zwiessele
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
class StochasticStorage(object):
|
||||
'''
|
||||
This is a container for holding the stochastic parameters,
|
||||
such as subset indices or step length and so on.
|
||||
|
||||
self.d has to be a list of lists:
|
||||
[dimension indices, nan indices for those dimensions]
|
||||
so that the minibatches can be used as efficiently as possible.10
|
||||
'''
|
||||
def __init__(self, model):
|
||||
"""
|
||||
Initialize this stochastic container using the given model
|
||||
"""
|
||||
|
||||
def do_stochastics(self):
|
||||
"""
|
||||
Update the internal state to the next batch of the stochastic
|
||||
descent algorithm.
|
||||
"""
|
||||
pass
|
||||
|
||||
def reset(self):
|
||||
"""
|
||||
Reset the state of this stochastics generator.
|
||||
"""
|
||||
|
||||
class SparseGPMissing(StochasticStorage):
|
||||
def __init__(self, model, batchsize=1):
|
||||
"""
|
||||
Here we want to loop over all dimensions everytime.
|
||||
Thus, we can just make sure the loop goes over self.d every
|
||||
time. We will try to get batches which look the same together
|
||||
which speeds up calculations significantly.
|
||||
"""
|
||||
import numpy as np
|
||||
self.Y = model.Y_normalized
|
||||
bdict = {}
|
||||
#For N > 1000 array2string default crops
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold=np.inf)
|
||||
for d in range(self.Y.shape[1]):
|
||||
inan = np.isnan(self.Y)[:, d]
|
||||
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
|
||||
class SparseGPStochastics(StochasticStorage):
|
||||
"""
|
||||
For the sparse gp we need to store the dimension we are in,
|
||||
and the indices corresponding to those
|
||||
"""
|
||||
def __init__(self, model, batchsize=1, missing_data=True):
|
||||
self.batchsize = batchsize
|
||||
self.output_dim = model.Y.shape[1]
|
||||
self.Y = model.Y_normalized
|
||||
self.missing_data = missing_data
|
||||
self.reset()
|
||||
self.do_stochastics()
|
||||
|
||||
def do_stochastics(self):
|
||||
import numpy as np
|
||||
if self.batchsize == 1:
|
||||
self.current_dim = (self.current_dim+1)%self.output_dim
|
||||
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
|
||||
else:
|
||||
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
||||
bdict = {}
|
||||
if self.missing_data:
|
||||
opt = np.get_printoptions()
|
||||
np.set_printoptions(threshold=np.inf)
|
||||
for d in self.d:
|
||||
inan = np.isnan(self.Y[:, d])
|
||||
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
|
||||
try:
|
||||
bdict[arr_str][0].append(d)
|
||||
except:
|
||||
bdict[arr_str] = [[d], ~inan]
|
||||
np.set_printoptions(**opt)
|
||||
self.d = bdict.values()
|
||||
else:
|
||||
self.d = [[self.d, None]]
|
||||
|
||||
def reset(self):
|
||||
self.current_dim = -1
|
||||
self.d = None
|
||||
Loading…
Add table
Add a link
Reference in a new issue