mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-27 14:25:16 +02:00
Merge pull request #323 from SheffieldML/stochastics
[stochastics] update for new stochastic iptimizers in gpy
This commit is contained in:
commit
af76126ef1
7 changed files with 188 additions and 24 deletions
|
|
@ -30,6 +30,8 @@ install:
|
||||||
- source install_retry.sh
|
- source install_retry.sh
|
||||||
- pip install codecov
|
- pip install codecov
|
||||||
- pip install pypandoc
|
- pip install pypandoc
|
||||||
|
- pip install git+git://github.com/BRML/climin.git
|
||||||
|
- pip install autograd
|
||||||
- python setup.py develop
|
- python setup.py develop
|
||||||
|
|
||||||
script:
|
script:
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,8 @@
|
||||||
from paramz.optimization import stochastics, Optimizer
|
from paramz.optimization import Optimizer
|
||||||
|
from . import stochastics
|
||||||
|
|
||||||
from paramz.optimization import *
|
from paramz.optimization import *
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
|
sys.modules['GPy.inference.optimization.stochastics'] = stochastics
|
||||||
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer
|
sys.modules['GPy.inference.optimization.Optimizer'] = Optimizer
|
||||||
|
|
|
||||||
119
GPy/inference/optimization/stochastics.py
Normal file
119
GPy/inference/optimization/stochastics.py
Normal file
|
|
@ -0,0 +1,119 @@
|
||||||
|
#===============================================================================
|
||||||
|
# Copyright (c) 2015, Max Zwiessele
|
||||||
|
# All rights reserved.
|
||||||
|
#
|
||||||
|
# Redistribution and use in source and binary forms, with or without
|
||||||
|
# modification, are permitted provided that the following conditions are met:
|
||||||
|
#
|
||||||
|
# * Redistributions of source code must retain the above copyright notice, this
|
||||||
|
# list of conditions and the following disclaimer.
|
||||||
|
#
|
||||||
|
# * Redistributions in binary form must reproduce the above copyright notice,
|
||||||
|
# this list of conditions and the following disclaimer in the documentation
|
||||||
|
# and/or other materials provided with the distribution.
|
||||||
|
#
|
||||||
|
# * Neither the name of paramax nor the names of its
|
||||||
|
# contributors may be used to endorse or promote products derived from
|
||||||
|
# this software without specific prior written permission.
|
||||||
|
#
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||||
|
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||||
|
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||||
|
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||||
|
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||||
|
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
#===============================================================================
|
||||||
|
|
||||||
|
class StochasticStorage(object):
|
||||||
|
'''
|
||||||
|
This is a container for holding the stochastic parameters,
|
||||||
|
such as subset indices or step length and so on.
|
||||||
|
|
||||||
|
self.d has to be a list of lists:
|
||||||
|
[dimension indices, nan indices for those dimensions]
|
||||||
|
so that the minibatches can be used as efficiently as possible.
|
||||||
|
'''
|
||||||
|
def __init__(self, model):
|
||||||
|
"""
|
||||||
|
Initialize this stochastic container using the given model
|
||||||
|
"""
|
||||||
|
|
||||||
|
def do_stochastics(self):
|
||||||
|
"""
|
||||||
|
Update the internal state to the next batch of the stochastic
|
||||||
|
descent algorithm.
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
"""
|
||||||
|
Reset the state of this stochastics generator.
|
||||||
|
"""
|
||||||
|
|
||||||
|
class SparseGPMissing(StochasticStorage):
|
||||||
|
def __init__(self, model, batchsize=1):
|
||||||
|
"""
|
||||||
|
Here we want to loop over all dimensions everytime.
|
||||||
|
Thus, we can just make sure the loop goes over self.d every
|
||||||
|
time. We will try to get batches which look the same together
|
||||||
|
which speeds up calculations significantly.
|
||||||
|
"""
|
||||||
|
import numpy as np
|
||||||
|
self.Y = model.Y_normalized
|
||||||
|
bdict = {}
|
||||||
|
#For N > 1000 array2string default crops
|
||||||
|
opt = np.get_printoptions()
|
||||||
|
np.set_printoptions(threshold=np.inf)
|
||||||
|
for d in range(self.Y.shape[1]):
|
||||||
|
inan = np.isnan(self.Y)[:, d]
|
||||||
|
arr_str = np.array2string(inan, np.inf, 0, True, '', formatter={'bool':lambda x: '1' if x else '0'})
|
||||||
|
try:
|
||||||
|
bdict[arr_str][0].append(d)
|
||||||
|
except:
|
||||||
|
bdict[arr_str] = [[d], ~inan]
|
||||||
|
np.set_printoptions(**opt)
|
||||||
|
self.d = bdict.values()
|
||||||
|
|
||||||
|
class SparseGPStochastics(StochasticStorage):
|
||||||
|
"""
|
||||||
|
For the sparse gp we need to store the dimension we are in,
|
||||||
|
and the indices corresponding to those
|
||||||
|
"""
|
||||||
|
def __init__(self, model, batchsize=1, missing_data=True):
|
||||||
|
self.batchsize = batchsize
|
||||||
|
self.output_dim = model.Y.shape[1]
|
||||||
|
self.Y = model.Y_normalized
|
||||||
|
self.missing_data = missing_data
|
||||||
|
self.reset()
|
||||||
|
self.do_stochastics()
|
||||||
|
|
||||||
|
def do_stochastics(self):
|
||||||
|
import numpy as np
|
||||||
|
if self.batchsize == 1:
|
||||||
|
self.current_dim = (self.current_dim+1)%self.output_dim
|
||||||
|
self.d = [[[self.current_dim], np.isnan(self.Y[:, self.current_dim]) if self.missing_data else None]]
|
||||||
|
else:
|
||||||
|
self.d = np.random.choice(self.output_dim, size=self.batchsize, replace=False)
|
||||||
|
bdict = {}
|
||||||
|
if self.missing_data:
|
||||||
|
opt = np.get_printoptions()
|
||||||
|
np.set_printoptions(threshold=np.inf)
|
||||||
|
for d in self.d:
|
||||||
|
inan = np.isnan(self.Y[:, d])
|
||||||
|
arr_str = np.array2string(inan,np.inf, 0,True, '',formatter={'bool':lambda x: '1' if x else '0'})
|
||||||
|
try:
|
||||||
|
bdict[arr_str][0].append(d)
|
||||||
|
except:
|
||||||
|
bdict[arr_str] = [[d], ~inan]
|
||||||
|
np.set_printoptions(**opt)
|
||||||
|
self.d = bdict.values()
|
||||||
|
else:
|
||||||
|
self.d = [[self.d, None]]
|
||||||
|
|
||||||
|
def reset(self):
|
||||||
|
self.current_dim = -1
|
||||||
|
self.d = None
|
||||||
|
|
@ -99,6 +99,9 @@ class Stationary(Kern):
|
||||||
|
|
||||||
@Cache_this(limit=3, ignore_args=())
|
@Cache_this(limit=3, ignore_args=())
|
||||||
def dK_dr_via_X(self, X, X2):
|
def dK_dr_via_X(self, X, X2):
|
||||||
|
"""
|
||||||
|
compute the derivative of K wrt X going through X
|
||||||
|
"""
|
||||||
#a convenience function, so we can cache dK_dr
|
#a convenience function, so we can cache dK_dr
|
||||||
return self.dK_dr(self._scaled_dist(X, X2))
|
return self.dK_dr(self._scaled_dist(X, X2))
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -40,12 +40,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||||
assert Z.shape[1] == X.shape[1]
|
assert Z.shape[1] == X.shape[1]
|
||||||
|
|
||||||
if X_variance == False:
|
if X_variance is False:
|
||||||
self.logger.info('no variance on X, activating sparse GPLVM')
|
self.logger.info('no variance on X, activating sparse GPLVM')
|
||||||
X = Param("latent space", X)
|
X = Param("latent space", X)
|
||||||
elif X_variance is None:
|
else:
|
||||||
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
|
if X_variance is None:
|
||||||
X_variance = np.random.uniform(0,.1,X.shape)
|
self.logger.info("initializing latent space variance ~ uniform(0,.1)")
|
||||||
|
X_variance = np.random.uniform(0,.1,X.shape)
|
||||||
self.variational_prior = NormalPrior()
|
self.variational_prior = NormalPrior()
|
||||||
X = NormalPosterior(X, X_variance)
|
X = NormalPosterior(X, X_variance)
|
||||||
|
|
||||||
|
|
@ -71,13 +72,13 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
self.X = X
|
self.X = X
|
||||||
self.link_parameter(self.X, 0)
|
self.link_parameter(self.X, 0)
|
||||||
|
|
||||||
def set_X_gradients(self, X, X_grad):
|
#def set_X_gradients(self, X, X_grad):
|
||||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
# """Set the gradients of the posterior distribution of X in its specific form."""
|
||||||
X.mean.gradient, X.variance.gradient = X_grad
|
# X.mean.gradient, X.variance.gradient = X_grad
|
||||||
|
|
||||||
def get_X_gradients(self, X):
|
#def get_X_gradients(self, X):
|
||||||
"""Get the gradients of the posterior distribution of X in its specific form."""
|
# """Get the gradients of the posterior distribution of X in its specific form."""
|
||||||
return X.mean.gradient, X.variance.gradient
|
# return X.mean.gradient, X.variance.gradient
|
||||||
|
|
||||||
def _outer_values_update(self, full_values):
|
def _outer_values_update(self, full_values):
|
||||||
"""
|
"""
|
||||||
|
|
@ -122,7 +123,7 @@ class BayesianGPLVMMiniBatch(SparseGPMiniBatch):
|
||||||
|
|
||||||
if self.missing_data or not self.stochastics:
|
if self.missing_data or not self.stochastics:
|
||||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)
|
||||||
elif self.stochastics:
|
else: #self.stochastics is given:
|
||||||
d = self.output_dim
|
d = self.output_dim
|
||||||
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
|
self._log_marginal_likelihood -= kl_fctr*self.variational_prior.KL_divergence(self.X)*self.stochastics.batchsize/d
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,7 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
def __init__(self, X, Y, Z, kernel, likelihood, inference_method=None,
|
||||||
name='sparse gp', Y_metadata=None, normalizer=False,
|
name='sparse gp', Y_metadata=None, normalizer=False,
|
||||||
missing_data=False, stochastic=False, batchsize=1):
|
missing_data=False, stochastic=False, batchsize=1):
|
||||||
|
self._update_stochastics = False
|
||||||
|
|
||||||
# pick a sensible inference method
|
# pick a sensible inference method
|
||||||
if inference_method is None:
|
if inference_method is None:
|
||||||
|
|
@ -74,6 +75,13 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
self.link_parameter(self.Z, index=0)
|
self.link_parameter(self.Z, index=0)
|
||||||
self.posterior = None
|
self.posterior = None
|
||||||
|
|
||||||
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
|
try:
|
||||||
|
self._update_stochastics = True
|
||||||
|
SparseGP.optimize(self, optimizer=optimizer, start=start, **kwargs)
|
||||||
|
finally:
|
||||||
|
self._update_stochastics = False
|
||||||
|
|
||||||
def has_uncertain_inputs(self):
|
def has_uncertain_inputs(self):
|
||||||
return isinstance(self.X, VariationalPosterior)
|
return isinstance(self.X, VariationalPosterior)
|
||||||
|
|
||||||
|
|
@ -226,16 +234,16 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
woodbury_inv = self.posterior._woodbury_inv
|
woodbury_inv = self.posterior._woodbury_inv
|
||||||
woodbury_vector = self.posterior._woodbury_vector
|
woodbury_vector = self.posterior._woodbury_vector
|
||||||
|
|
||||||
if not self.stochastics:
|
#if not self.stochastics:
|
||||||
m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
|
# m_f = lambda i: "Inference with missing_data: {: >7.2%}".format(float(i+1)/self.output_dim)
|
||||||
message = m_f(-1)
|
# message = m_f(-1)
|
||||||
print(message, end=' ')
|
# print(message, end=' ')
|
||||||
|
|
||||||
for d, ninan in self.stochastics.d:
|
for d, ninan in self.stochastics.d:
|
||||||
if not self.stochastics:
|
#if not self.stochastics:
|
||||||
print(' '*(len(message)) + '\r', end=' ')
|
# print(' '*(len(message)) + '\r', end=' ')
|
||||||
message = m_f(d)
|
# message = m_f(d)
|
||||||
print(message, end=' ')
|
# print(message, end=' ')
|
||||||
|
|
||||||
psi0ni = self.psi0[ninan]
|
psi0ni = self.psi0[ninan]
|
||||||
psi1ni = self.psi1[ninan]
|
psi1ni = self.psi1[ninan]
|
||||||
|
|
@ -262,8 +270,8 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
woodbury_vector[:, d] = posterior.woodbury_vector
|
woodbury_vector[:, d] = posterior.woodbury_vector
|
||||||
self._log_marginal_likelihood += log_marginal_likelihood
|
self._log_marginal_likelihood += log_marginal_likelihood
|
||||||
|
|
||||||
if not self.stochastics:
|
#if not self.stochastics:
|
||||||
print('')
|
# print('')
|
||||||
|
|
||||||
if self.posterior is None:
|
if self.posterior is None:
|
||||||
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
self.posterior = Posterior(woodbury_inv=woodbury_inv, woodbury_vector=woodbury_vector,
|
||||||
|
|
@ -314,6 +322,8 @@ class SparseGPMiniBatch(SparseGP):
|
||||||
if self.missing_data:
|
if self.missing_data:
|
||||||
self._outer_loop_for_missing_data()
|
self._outer_loop_for_missing_data()
|
||||||
elif self.stochastics:
|
elif self.stochastics:
|
||||||
|
if self._update_stochastics:
|
||||||
|
self.stochastics.do_stochastics()
|
||||||
self._outer_loop_without_missing_data()
|
self._outer_loop_without_missing_data()
|
||||||
else:
|
else:
|
||||||
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
self.posterior, self._log_marginal_likelihood, self.grad_dict = self._inner_parameters_changed(self.kern, self.X, self.Z, self.likelihood, self.Y_normalized, self.Y_metadata)
|
||||||
|
|
|
||||||
|
|
@ -54,7 +54,7 @@ class BGPLVMTest(unittest.TestCase):
|
||||||
def test_lik_comparisons_m0_s0(self):
|
def test_lik_comparisons_m0_s0(self):
|
||||||
# Test if the different implementations give the exact same likelihood as the full model.
|
# Test if the different implementations give the exact same likelihood as the full model.
|
||||||
# All of the following settings should give the same likelihood and gradients as the full model:
|
# All of the following settings should give the same likelihood and gradients as the full model:
|
||||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, missing_data=False, stochastic=False)
|
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=self.m_full.X.variance.values, missing_data=False, stochastic=False)
|
||||||
m[:] = self.m_full[:]
|
m[:] = self.m_full[:]
|
||||||
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
np.testing.assert_almost_equal(m.log_likelihood(), self.m_full.log_likelihood(), 7)
|
||||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||||
|
|
@ -124,6 +124,32 @@ class SparseGPMinibatchTest(unittest.TestCase):
|
||||||
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
np.testing.assert_allclose(m.gradient, self.m_full.gradient)
|
||||||
assert(m.checkgrad())
|
assert(m.checkgrad())
|
||||||
|
|
||||||
|
def test_sparsegp_init(self):
|
||||||
|
# Test if the different implementations give the exact same likelihood as the full model.
|
||||||
|
# All of the following settings should give the same likelihood and gradients as the full model:
|
||||||
|
np.random.seed(1234)
|
||||||
|
Z = self.X[np.random.choice(self.X.shape[0], replace=False, size=10)].copy()
|
||||||
|
Q = Z.shape[1]
|
||||||
|
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=False)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
m.optimize('adadelta', max_iters=10)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
|
||||||
|
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=True, stochastic=True)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
m.optimize('rprop', max_iters=10)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
|
||||||
|
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=False)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
m.optimize('rprop', max_iters=10)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
|
||||||
|
m = GPy.models.sparse_gp_minibatch.SparseGPMiniBatch(self.X, self.Y, Z, GPy.kern.RBF(Q)+GPy.kern.Matern32(Q)+GPy.kern.Bias(Q), GPy.likelihoods.Gaussian(), missing_data=False, stochastic=True)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
m.optimize('adadelta', max_iters=10)
|
||||||
|
assert(m.checkgrad())
|
||||||
|
|
||||||
def test_predict_missing_data(self):
|
def test_predict_missing_data(self):
|
||||||
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
|
m = GPy.models.bayesian_gplvm_minibatch.BayesianGPLVMMiniBatch(self.Y, self.Q, X_variance=False, missing_data=True, stochastic=True, batchsize=self.Y.shape[1])
|
||||||
m[:] = self.m_full[:]
|
m[:] = self.m_full[:]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue