mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-14 14:32:37 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
This commit is contained in:
commit
5dd343e89d
8 changed files with 617 additions and 264 deletions
|
|
@ -84,31 +84,6 @@ class model(parameterised):
|
||||||
for w in which:
|
for w in which:
|
||||||
self.priors[w] = what
|
self.priors[w] = what
|
||||||
|
|
||||||
def get(self,name, return_names=False):
|
|
||||||
"""
|
|
||||||
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(name)
|
|
||||||
if len(matches):
|
|
||||||
if return_names:
|
|
||||||
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
|
||||||
else:
|
|
||||||
return self._get_params()[matches]
|
|
||||||
else:
|
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
|
||||||
|
|
||||||
def set(self,name,val):
|
|
||||||
"""
|
|
||||||
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(name)
|
|
||||||
if len(matches):
|
|
||||||
x = self._get_params()
|
|
||||||
x[matches] = val
|
|
||||||
self._set_params(x)
|
|
||||||
else:
|
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
|
||||||
|
|
||||||
def get_gradient(self, name, return_names=False):
|
def get_gradient(self, name, return_names=False):
|
||||||
"""
|
"""
|
||||||
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ import copy
|
||||||
import cPickle
|
import cPickle
|
||||||
import os
|
import os
|
||||||
from ..util.squashers import sigmoid
|
from ..util.squashers import sigmoid
|
||||||
|
import warnings
|
||||||
|
|
||||||
def truncate_pad(string, width, align='m'):
|
def truncate_pad(string, width, align='m'):
|
||||||
"""
|
"""
|
||||||
|
|
@ -55,6 +56,73 @@ class parameterised(object):
|
||||||
|
|
||||||
return copy.deepcopy(self)
|
return copy.deepcopy(self)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def params(self):
|
||||||
|
"""
|
||||||
|
Returns a **copy** of parameters in non transformed space
|
||||||
|
|
||||||
|
:see_also: :py:func:`GPy.core.parameterised.params_transformed`
|
||||||
|
"""
|
||||||
|
return self._get_params()
|
||||||
|
@params.setter
|
||||||
|
def params(self, params):
|
||||||
|
self._set_params(params)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def params_transformed(self):
|
||||||
|
"""
|
||||||
|
Returns a **copy** of parameters in transformed space
|
||||||
|
|
||||||
|
:see_also: :py:func:`GPy.core.parameterised.params`
|
||||||
|
"""
|
||||||
|
return self._get_params_transformed()
|
||||||
|
@params_transformed.setter
|
||||||
|
def params_transformed(self, params):
|
||||||
|
self._set_params_transformed(params)
|
||||||
|
|
||||||
|
_get_set_deprecation = """get and set methods wont be available at next minor release
|
||||||
|
in the next releases you will get and set with following syntax:
|
||||||
|
Assume m is a model class:
|
||||||
|
print m['var'] # > prints all parameters matching 'var'
|
||||||
|
m['var'] = 2. # > sets all parameters matching 'var' to 2.
|
||||||
|
m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>
|
||||||
|
"""
|
||||||
|
def get(self, name):
|
||||||
|
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
||||||
|
return self[name]
|
||||||
|
|
||||||
|
def set(self, name, val):
|
||||||
|
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
||||||
|
self[name] = val
|
||||||
|
|
||||||
|
def __getitem__(self, name, return_names=False):
|
||||||
|
"""
|
||||||
|
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||||
|
"""
|
||||||
|
matches = self.grep_param_names(name)
|
||||||
|
if len(matches):
|
||||||
|
if return_names:
|
||||||
|
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
||||||
|
else:
|
||||||
|
return self._get_params()[matches]
|
||||||
|
else:
|
||||||
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
|
def __setitem__(self, name, val):
|
||||||
|
"""
|
||||||
|
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
|
||||||
|
"""
|
||||||
|
matches = self.grep_param_names(name)
|
||||||
|
if len(matches):
|
||||||
|
val = np.array(val)
|
||||||
|
assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
|
||||||
|
x = self.params
|
||||||
|
x[matches] = val
|
||||||
|
self.params = x
|
||||||
|
# import ipdb;ipdb.set_trace()
|
||||||
|
# self.params[matches] = val
|
||||||
|
else:
|
||||||
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
def tie_params(self, which):
|
def tie_params(self, which):
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(which)
|
||||||
|
|
|
||||||
|
|
@ -112,14 +112,14 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
|
||||||
s3 = s3(x)
|
s3 = s3(x)
|
||||||
sS = sS(x)
|
sS = sS(x)
|
||||||
|
|
||||||
s1 -= s1.mean()
|
# s1 -= s1.mean()
|
||||||
s2 -= s2.mean()
|
# s2 -= s2.mean()
|
||||||
s3 -= s3.mean()
|
# s3 -= s3.mean()
|
||||||
sS -= sS.mean()
|
# sS -= sS.mean()
|
||||||
s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
|
# s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
|
||||||
s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
|
# s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
|
||||||
s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
|
# s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
|
||||||
sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
|
# sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
|
||||||
|
|
||||||
S1 = np.hstack([s1, sS])
|
S1 = np.hstack([s1, sS])
|
||||||
S2 = np.hstack([s2, sS])
|
S2 = np.hstack([s2, sS])
|
||||||
|
|
@ -129,9 +129,9 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
|
||||||
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
|
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
|
||||||
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
|
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
|
||||||
|
|
||||||
Y1 += .5 * np.random.randn(*Y1.shape)
|
Y1 += .3 * np.random.randn(*Y1.shape)
|
||||||
Y2 += .5 * np.random.randn(*Y2.shape)
|
Y2 += .3 * np.random.randn(*Y2.shape)
|
||||||
Y3 += .5 * np.random.randn(*Y3.shape)
|
Y3 += .3 * np.random.randn(*Y3.shape)
|
||||||
|
|
||||||
Y1 -= Y1.mean(0)
|
Y1 -= Y1.mean(0)
|
||||||
Y2 -= Y2.mean(0)
|
Y2 -= Y2.mean(0)
|
||||||
|
|
@ -162,8 +162,11 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
|
||||||
|
|
||||||
return slist, [S1, S2, S3], Ylist
|
return slist, [S1, S2, S3], Ylist
|
||||||
|
|
||||||
def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
|
def bgplvm_simulation(burnin='scg', plot_sim=False,
|
||||||
D1, D2, D3, N, M, Q = 2000, 8, 8, 500, 2, 6
|
max_burnin=100, true_X=False,
|
||||||
|
do_opt=True,
|
||||||
|
max_f_eval=1000):
|
||||||
|
D1, D2, D3, N, M, Q = 10, 8, 8, 50, 30, 5
|
||||||
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
||||||
|
|
||||||
from GPy.models import mrd
|
from GPy.models import mrd
|
||||||
|
|
@ -171,53 +174,73 @@ def bgplvm_simulation(burnin='scg', plot_sim=False, max_f_eval=12):
|
||||||
reload(mrd); reload(kern)
|
reload(mrd); reload(kern)
|
||||||
|
|
||||||
|
|
||||||
Y = Ylist[1]
|
Y = Ylist[0]
|
||||||
|
|
||||||
k = kern.linear(Q, ARD=True) + kern.white(Q, .00001) # + kern.bias(Q)
|
k = kern.linear(Q, ARD=True) + kern.white(Q, .00001) # + kern.bias(Q)
|
||||||
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k)
|
# k = kern.white(Q, .00001) + kern.bias(Q)
|
||||||
|
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
|
||||||
# m.set('noise',)
|
# m.set('noise',)
|
||||||
|
m.ensure_default_constraints()
|
||||||
# m.auto_scale_factor = True
|
# m.auto_scale_factor = True
|
||||||
# m.scale_factor = 1.
|
# m.scale_factor = 1.
|
||||||
m.ensure_default_constraints()
|
|
||||||
|
|
||||||
|
|
||||||
if burnin:
|
if burnin:
|
||||||
print "initializing beta"
|
print "initializing beta"
|
||||||
cstr = "noise"
|
cstr = "noise"
|
||||||
m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 100.)
|
m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 70.)
|
||||||
m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
|
m.optimize(burnin, messages=1, max_f_eval=max_burnin)
|
||||||
|
|
||||||
print "releasing beta"
|
print "releasing beta"
|
||||||
cstr = "noise"
|
cstr = "noise"
|
||||||
m.unconstrain(cstr); m.constrain_positive(cstr)
|
m.unconstrain(cstr); m.constrain_positive(cstr)
|
||||||
|
|
||||||
true_X = np.hstack((slist[1], slist[3], 0. * np.ones((N, Q - 2))))
|
if true_X:
|
||||||
|
true_X = np.hstack((slist[0], slist[3], 0. * np.ones((N, Q - 2))))
|
||||||
m.set('X_\d', true_X)
|
m.set('X_\d', true_X)
|
||||||
m.constrain_fixed("X_\d")
|
m.constrain_fixed("X_\d")
|
||||||
|
|
||||||
# # cstr = 'variance'
|
cstr = 'X_variance'
|
||||||
# # m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
|
# m.unconstrain(cstr), m.constrain_fixed(cstr, .0001)
|
||||||
|
m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-7, .1)
|
||||||
|
|
||||||
|
# cstr = 'X_variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
|
||||||
|
|
||||||
|
m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
|
||||||
|
|
||||||
|
# cstr = "iip"
|
||||||
|
# m.unconstrain(cstr); m.constrain_fixed(cstr)
|
||||||
|
|
||||||
|
# cstr = 'variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
|
||||||
# cstr = 'X_\d'
|
# cstr = 'X_\d'
|
||||||
# m.unconstrain(cstr), m.constrain_bounded(cstr, -100., 100.)
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, -10., 10.)
|
||||||
#
|
#
|
||||||
# cstr = 'noise'
|
# cstr = 'noise'
|
||||||
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-5, 1.)
|
||||||
#
|
#
|
||||||
# cstr = 'white'
|
# cstr = 'white'
|
||||||
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-6, 1.)
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-6, 1.)
|
||||||
#
|
#
|
||||||
# cstr = 'linear_variance'
|
# cstr = 'linear_variance'
|
||||||
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.) # m.constrain_positive(cstr)
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
|
||||||
#
|
|
||||||
# cstr = 'X_variance'
|
# cstr = 'variance'
|
||||||
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.) # m.constrain_positive(cstr)
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
|
||||||
|
|
||||||
# np.seterr(all='call')
|
# np.seterr(all='call')
|
||||||
# def ipdbonerr(errtype, flags):
|
# def ipdbonerr(errtype, flags):
|
||||||
# import ipdb; ipdb.set_trace()
|
# import ipdb; ipdb.set_trace()
|
||||||
# np.seterrcall(ipdbonerr)
|
# np.seterrcall(ipdbonerr)
|
||||||
|
|
||||||
|
if do_opt and burnin:
|
||||||
|
try:
|
||||||
|
m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
return m
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def mrd_simulation(plot_sim=False):
|
def mrd_simulation(plot_sim=False):
|
||||||
|
|
@ -261,6 +284,7 @@ def mrd_simulation(plot_sim=False):
|
||||||
m.set('{}_noise'.format(i + 1), Y.var() / 100.)
|
m.set('{}_noise'.format(i + 1), Y.var() / 100.)
|
||||||
|
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
m.auto_scale_factor = True
|
||||||
|
|
||||||
# cstr = 'variance'
|
# cstr = 'variance'
|
||||||
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
|
||||||
|
|
|
||||||
146
GPy/inference/natural_gradient_scg.py
Normal file
146
GPy/inference/natural_gradient_scg.py
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
|
||||||
|
|
||||||
|
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
|
||||||
|
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
||||||
|
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||||
|
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
|
||||||
|
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
||||||
|
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
# HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
# LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
|
||||||
|
"""
|
||||||
|
Optimisation through Scaled Conjugate Gradients (SCG)
|
||||||
|
|
||||||
|
f: the objective function
|
||||||
|
gradf : the gradient function (should return a 1D np.ndarray)
|
||||||
|
x : the initial condition
|
||||||
|
|
||||||
|
Returns
|
||||||
|
x the optimal value for x
|
||||||
|
flog : a list of all the objective values
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
sigma0 = 1.0e-4
|
||||||
|
fold = f(x, *optargs) # Initial function value.
|
||||||
|
function_eval = 1
|
||||||
|
fnow = fold
|
||||||
|
gradnew = gradf(x, *optargs) # Initial gradient.
|
||||||
|
gradold = gradnew.copy()
|
||||||
|
d = -gradnew # Initial search direction.
|
||||||
|
success = True # Force calculation of directional derivs.
|
||||||
|
nsuccess = 0 # nsuccess counts number of successes.
|
||||||
|
beta = 1.0 # Initial scale parameter.
|
||||||
|
betamin = 1.0e-15 # Lower bound on scale.
|
||||||
|
betamax = 1.0e100 # Upper bound on scale.
|
||||||
|
status = "Not converged"
|
||||||
|
|
||||||
|
flog = [fold]
|
||||||
|
|
||||||
|
iteration = 0
|
||||||
|
|
||||||
|
# Main optimization loop.
|
||||||
|
while iteration < maxiters:
|
||||||
|
|
||||||
|
# Calculate first and second directional derivatives.
|
||||||
|
if success:
|
||||||
|
mu = np.dot(d, gradnew)
|
||||||
|
if mu >= 0:
|
||||||
|
d = -gradnew
|
||||||
|
mu = np.dot(d, gradnew)
|
||||||
|
kappa = np.dot(d, d)
|
||||||
|
sigma = sigma0/np.sqrt(kappa)
|
||||||
|
xplus = x + sigma*d
|
||||||
|
gplus = gradf(xplus, *optargs)
|
||||||
|
theta = np.dot(d, (gplus - gradnew))/sigma
|
||||||
|
|
||||||
|
# Increase effective curvature and evaluate step size alpha.
|
||||||
|
delta = theta + beta*kappa
|
||||||
|
if delta <= 0:
|
||||||
|
delta = beta*kappa
|
||||||
|
beta = beta - theta/kappa
|
||||||
|
|
||||||
|
alpha = - mu/delta
|
||||||
|
|
||||||
|
# Calculate the comparison ratio.
|
||||||
|
xnew = x + alpha*d
|
||||||
|
fnew = f(xnew, *optargs)
|
||||||
|
function_eval += 1
|
||||||
|
|
||||||
|
if function_eval >= max_f_eval:
|
||||||
|
status = "Maximum number of function evaluations exceeded"
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
|
Delta = 2.*(fnew - fold)/(alpha*mu)
|
||||||
|
if Delta >= 0.:
|
||||||
|
success = True
|
||||||
|
nsuccess += 1
|
||||||
|
x = xnew
|
||||||
|
fnow = fnew
|
||||||
|
else:
|
||||||
|
success = False
|
||||||
|
fnow = fold
|
||||||
|
|
||||||
|
# Store relevant variables
|
||||||
|
flog.append(fnow) # Current function value
|
||||||
|
|
||||||
|
iteration += 1
|
||||||
|
if display:
|
||||||
|
print '\r',
|
||||||
|
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta),
|
||||||
|
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if success:
|
||||||
|
# Test for termination
|
||||||
|
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
|
||||||
|
status='converged'
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Update variables for new position
|
||||||
|
fold = fnew
|
||||||
|
gradold = gradnew
|
||||||
|
gradnew = gradf(x, *optargs)
|
||||||
|
# If the gradient is zero then we are done.
|
||||||
|
if np.dot(gradnew,gradnew) == 0:
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
|
# Adjust beta according to comparison ratio.
|
||||||
|
if Delta < 0.25:
|
||||||
|
beta = min(4.0*beta, betamax)
|
||||||
|
if Delta > 0.75:
|
||||||
|
beta = max(0.5*beta, betamin)
|
||||||
|
|
||||||
|
# Update search direction using Polak-Ribiere formula, or re-start
|
||||||
|
# in direction of negative gradient after nparams steps.
|
||||||
|
if nsuccess == x.size:
|
||||||
|
d = -gradnew
|
||||||
|
nsuccess = 0
|
||||||
|
elif success:
|
||||||
|
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
|
||||||
|
d = gamma*d - gradnew
|
||||||
|
|
||||||
|
# If we get here, then we haven't terminated in the given number of
|
||||||
|
# iterations.
|
||||||
|
status = "maxiter exceeded"
|
||||||
|
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
@ -70,8 +70,8 @@ class kern(parameterised):
|
||||||
ard_params = 1. / p.lengthscale
|
ard_params = 1. / p.lengthscale
|
||||||
|
|
||||||
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
|
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
|
||||||
ax.set_xticks(np.arange(len(ard_params)),
|
ax.set_xticks(np.arange(len(ard_params)))
|
||||||
["${}$".format(i + 1) for i in range(len(ard_params))])
|
ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))])
|
||||||
return ax
|
return ax
|
||||||
|
|
||||||
def _transform_gradients(self, g):
|
def _transform_gradients(self, g):
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ from GPy.util.linalg import pdinv
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from numpy.linalg.linalg import LinAlgError
|
from numpy.linalg.linalg import LinAlgError
|
||||||
|
import itertools
|
||||||
|
|
||||||
class Bayesian_GPLVM(sparse_GP, GPLVM):
|
class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
"""
|
"""
|
||||||
|
|
@ -23,7 +24,9 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, oldpsave=5, **kwargs):
|
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
|
||||||
|
Z=None, kernel=None, oldpsave=5, _debug=False,
|
||||||
|
**kwargs):
|
||||||
if X == None:
|
if X == None:
|
||||||
X = self.initialise_latent(init, Q, Y)
|
X = self.initialise_latent(init, Q, Y)
|
||||||
|
|
||||||
|
|
@ -39,6 +42,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
|
|
||||||
self.oldpsave = oldpsave
|
self.oldpsave = oldpsave
|
||||||
self._oldps = []
|
self._oldps = []
|
||||||
|
self._debug = _debug
|
||||||
|
|
||||||
|
if self._debug:
|
||||||
|
self._count = itertools.count()
|
||||||
|
self._savedklll = []
|
||||||
|
self._savedparams = []
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
|
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
|
||||||
|
|
||||||
|
|
@ -70,16 +79,18 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
||||||
return x
|
return x
|
||||||
|
|
||||||
def _set_params(self, x, save_old=True):
|
def _set_params(self, x, save_old=True, save_count=0):
|
||||||
try:
|
try:
|
||||||
N, Q = self.N, self.Q
|
N, Q = self.N, self.Q
|
||||||
self.X = x[:self.X.size].reshape(N, Q).copy()
|
self.X = x[:self.X.size].reshape(N, Q).copy()
|
||||||
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
|
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
|
||||||
sparse_GP._set_params(self, x[(2 * N * Q):])
|
sparse_GP._set_params(self, x[(2 * N * Q):])
|
||||||
self.oldps = x
|
self.oldps = x
|
||||||
except (LinAlgError, FloatingPointError):
|
except (LinAlgError, FloatingPointError, ZeroDivisionError):
|
||||||
print "\rWARNING: Caught LinAlgError, reconstructing old state "
|
print "\rWARNING: Caught LinAlgError, continueing without setting "
|
||||||
self._set_params(self.oldps[-1], save_old=False)
|
# if save_count > 10:
|
||||||
|
# raise
|
||||||
|
# self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
|
||||||
|
|
||||||
def dKL_dmuS(self):
|
def dKL_dmuS(self):
|
||||||
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
|
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
|
||||||
|
|
@ -103,15 +114,29 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
ll = sparse_GP.log_likelihood(self)
|
ll = sparse_GP.log_likelihood(self)
|
||||||
kl = self.KL_divergence()
|
kl = self.KL_divergence()
|
||||||
return ll + kl
|
|
||||||
|
# if ll < -2E4:
|
||||||
|
# ll = -2E4 + np.random.randn()
|
||||||
|
# if kl > 5E4:
|
||||||
|
# kl = 5E4 + np.random.randn()
|
||||||
|
|
||||||
|
if self._debug:
|
||||||
|
f_call = self._count.next()
|
||||||
|
self._savedklll.append([f_call, ll, kl])
|
||||||
|
if f_call % 1 == 0:
|
||||||
|
self._savedparams.append([f_call, self._get_params()])
|
||||||
|
|
||||||
|
|
||||||
|
# print "\nkl:", kl, "ll:", ll
|
||||||
|
return ll - kl
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||||
dL_dmu, dL_dS = self.dL_dmuS()
|
dL_dmu, dL_dS = self.dL_dmuS()
|
||||||
# TODO: find way to make faster
|
# TODO: find way to make faster
|
||||||
|
|
||||||
d_dmu = (dL_dmu + dKL_dmu).flatten()
|
d_dmu = (dL_dmu - dKL_dmu).flatten()
|
||||||
d_dS = (dL_dS + dKL_dS).flatten()
|
d_dS = (dL_dS - dKL_dS).flatten()
|
||||||
# TEST KL: ====================
|
# TEST KL: ====================
|
||||||
# d_dmu = (dKL_dmu).flatten()
|
# d_dmu = (dKL_dmu).flatten()
|
||||||
# d_dS = (dKL_dS).flatten()
|
# d_dS = (dKL_dS).flatten()
|
||||||
|
|
@ -135,3 +160,140 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
|
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
|
||||||
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
|
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
|
||||||
return ax
|
return ax
|
||||||
|
|
||||||
|
def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
|
||||||
|
import pylab
|
||||||
|
|
||||||
|
fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
|
||||||
|
if colors is None:
|
||||||
|
colors = pylab.gca()._get_lines.color_cycle
|
||||||
|
pylab.clf()
|
||||||
|
plots = []
|
||||||
|
for i in range(self.X.shape[1]):
|
||||||
|
if axes is None:
|
||||||
|
ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
|
||||||
|
else:
|
||||||
|
ax = axes[i]
|
||||||
|
ax.plot(self.X, c='k', alpha=.3)
|
||||||
|
plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
|
||||||
|
ax.fill_between(np.arange(self.X.shape[0]),
|
||||||
|
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
|
||||||
|
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
|
||||||
|
facecolor=plots[-1].get_color(),
|
||||||
|
alpha=.3)
|
||||||
|
ax.legend(borderaxespad=0.)
|
||||||
|
if i < self.X.shape[1] - 1:
|
||||||
|
ax.set_xticklabels('')
|
||||||
|
pylab.draw()
|
||||||
|
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||||
|
return fig
|
||||||
|
|
||||||
|
def _debug_filter_params(self, x):
|
||||||
|
start, end = 0, self.X.size,
|
||||||
|
X = x[start:end].reshape(self.N, self.Q)
|
||||||
|
start, end = end, end + self.X_variance.size
|
||||||
|
X_v = x[start:end].reshape(self.N, self.Q)
|
||||||
|
start, end = end, end + (self.M * self.Q)
|
||||||
|
Z = x[start:end].reshape(self.M, self.Q)
|
||||||
|
start, end = end, end + self.Q
|
||||||
|
theta = x[start:]
|
||||||
|
return X, X_v, Z, theta
|
||||||
|
|
||||||
|
def _debug_plot(self):
|
||||||
|
assert self._debug, "must enable _debug, to debug-plot"
|
||||||
|
import pylab
|
||||||
|
from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
fig = pylab.figure('BGPLVM DEBUG', figsize=(12, 10))
|
||||||
|
fig.clf()
|
||||||
|
|
||||||
|
# log like
|
||||||
|
splotshape = (6, 4)
|
||||||
|
ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
|
||||||
|
ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
kllls = np.array(self._savedklll)
|
||||||
|
LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], label=r'$\log p(\mathbf{Y})$', mew=1.5)
|
||||||
|
KL, = ax1.plot(kllls[:, 0], kllls[:, 2], label=r'$\mathcal{KL}(p||q)$', mew=1.5)
|
||||||
|
L, = ax1.plot(kllls[:, 0], kllls[:, 1], label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
|
||||||
|
|
||||||
|
drawn = dict(self._savedparams)
|
||||||
|
iters = np.array(drawn.keys())
|
||||||
|
self.showing = 0
|
||||||
|
|
||||||
|
ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
|
||||||
|
ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
|
||||||
|
ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
|
||||||
|
ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
|
||||||
|
ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
|
||||||
|
X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
|
||||||
|
Xlatentplts = ax2.plot(X, ls="-", marker="x")
|
||||||
|
Slatentplts = ax3.plot(S, ls="-", marker="x")
|
||||||
|
Zplts = ax4.plot(Z, ls="-", marker="x")
|
||||||
|
thetaplts = ax5.bar(np.arange(len(theta)) - .4, theta)
|
||||||
|
ax5.set_xticks(np.arange(len(theta)))
|
||||||
|
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
|
||||||
|
|
||||||
|
Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||||
|
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
|
||||||
|
borderaxespad=0, mode="expand")
|
||||||
|
Lleg = ax1.legend()
|
||||||
|
Lleg.draggable()
|
||||||
|
ax1.add_artist(Qleg)
|
||||||
|
|
||||||
|
indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
|
||||||
|
indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
|
||||||
|
indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
|
||||||
|
|
||||||
|
try:
|
||||||
|
pylab.draw()
|
||||||
|
pylab.tight_layout(box=(0, .1, 1, .9))
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# parameter changes
|
||||||
|
# ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
|
||||||
|
def onclick(event):
|
||||||
|
if event.inaxes is ax1 and event.button == 1:
|
||||||
|
# event.button, event.x, event.y, event.xdata, event.ydata)
|
||||||
|
tmp = np.abs(iters - event.xdata)
|
||||||
|
closest_hit = iters[tmp == tmp.min()][0]
|
||||||
|
|
||||||
|
if closest_hit != self.showing:
|
||||||
|
self.showing = closest_hit
|
||||||
|
# print closest_hit, iters, event.xdata
|
||||||
|
|
||||||
|
indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
|
||||||
|
indicatorKL.set_data(self.showing, kllls[self.showing, 2])
|
||||||
|
indicatorL.set_data(self.showing, kllls[self.showing, 1])
|
||||||
|
|
||||||
|
X, S, Z, theta = self._debug_filter_params(drawn[self.showing])
|
||||||
|
for i, Xlatent in enumerate(Xlatentplts):
|
||||||
|
Xlatent.set_ydata(X[:, i])
|
||||||
|
for i, Slatent in enumerate(Slatentplts):
|
||||||
|
Slatent.set_ydata(S[:, i])
|
||||||
|
for i, Zlatent in enumerate(Zplts):
|
||||||
|
Zlatent.set_ydata(Z[:, i])
|
||||||
|
for p, t in zip(thetaplts, theta):
|
||||||
|
p.set_height(t)
|
||||||
|
|
||||||
|
ax2.relim()
|
||||||
|
ax3.relim()
|
||||||
|
ax4.relim()
|
||||||
|
ax5.relim()
|
||||||
|
ax2.autoscale()
|
||||||
|
ax3.autoscale()
|
||||||
|
ax4.autoscale()
|
||||||
|
ax5.autoscale()
|
||||||
|
fig.canvas.draw()
|
||||||
|
|
||||||
|
cid = fig.canvas.mpl_connect('button_press_event', onclick)
|
||||||
|
|
||||||
|
return ax1, ax2, ax3, ax4, ax5
|
||||||
|
|
|
||||||
|
|
@ -287,29 +287,6 @@ class MRD(model):
|
||||||
else:
|
else:
|
||||||
return pylab.gcf()
|
return pylab.gcf()
|
||||||
|
|
||||||
def plot_X_1d(self, fig_num="MRD X 1d", axes=None, colors=None):
|
|
||||||
fig = pylab.figure(num=fig_num, figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
|
|
||||||
if colors is None:
|
|
||||||
colors = pylab.gca()._get_lines.color_cycle
|
|
||||||
pylab.clf()
|
|
||||||
plots = []
|
|
||||||
for i in range(self.X.shape[1]):
|
|
||||||
if axes is None:
|
|
||||||
ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
|
|
||||||
ax.plot(self.X, c='k', alpha=.3)
|
|
||||||
plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
|
|
||||||
ax.fill_between(numpy.arange(self.X.shape[0]),
|
|
||||||
self.X.T[i] - 2 * numpy.sqrt(self.gref.X_variance.T[i]),
|
|
||||||
self.X.T[i] + 2 * numpy.sqrt(self.gref.X_variance.T[i]),
|
|
||||||
facecolor=plots[-1].get_color(),
|
|
||||||
alpha=.3)
|
|
||||||
ax.legend(borderaxespad=0.)
|
|
||||||
if i < self.X.shape[1] - 1:
|
|
||||||
ax.set_xticklabels('')
|
|
||||||
pylab.draw()
|
|
||||||
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
|
||||||
return fig
|
|
||||||
|
|
||||||
def plot_X(self, fig_num="MRD Predictions", axes=None):
|
def plot_X(self, fig_num="MRD Predictions", axes=None):
|
||||||
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
|
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
|
||||||
return fig
|
return fig
|
||||||
|
|
|
||||||
|
|
@ -57,6 +57,7 @@ class Test(unittest.TestCase):
|
||||||
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
|
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
|
||||||
Z = numpy.random.permutation(X)[:M]
|
Z = numpy.random.permutation(X)[:M]
|
||||||
Y = X.dot(numpy.random.randn(Q, D))
|
Y = X.dot(numpy.random.randn(Q, D))
|
||||||
|
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q)]
|
||||||
|
|
||||||
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
|
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
|
||||||
GPy.kern.linear(Q) + GPy.kern.bias(Q),
|
GPy.kern.linear(Q) + GPy.kern.bias(Q),
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue