swiss_roll example added, BGPLVM_oil now working

This commit is contained in:
Max Zwiessele 2013-05-16 13:47:55 +01:00
parent 61a79c5041
commit 93d517f24e
5 changed files with 137 additions and 173 deletions

View file

@ -6,6 +6,7 @@ from matplotlib import pyplot as plt
import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import swiss_roll_generated
default_seed = np.random.seed(123344)
@ -61,15 +62,18 @@ def GPLVM_oil_100(optimize=True):
m.plot_latent(labels=m.data_labels)
return m
def swiss_roll(optimize=True, N=1000, M=15, Q=4):
def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
from GPy.util.datasets import swiss_roll
from GPy.core.transformations import logexp_clipped
data = swiss_roll(N=N)
data = swiss_roll_generated(N=N, sigma=sigma)
Y = data['Y']
Y -= Y.mean(0)
Y /= Y.std(0)
t = data['t']
c = data['colors']
try:
from sklearn.manifold.isomap import Isomap
iso = Isomap().fit(Y)
@ -79,16 +83,33 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4):
except ImportError:
X = np.random.randn(N, Q)
if plot:
from mpl_toolkits import mplot3d
import pylab
fig = pylab.figure("Swiss Roll Data")
ax = fig.add_subplot(121, projection='3d')
ax.scatter(*Y.T, c=c)
ax.set_title("Swiss Roll")
ax = fig.add_subplot(122)
ax.scatter(*X.T[:2], c=c)
ax.set_title("Initialization")
var = .5
S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2, -(1 - var), (1 - var))) + .001
S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
- (1 - var),
(1 - var))) + .001
Z = np.random.permutation(X)[:M]
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, 2)
m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
m.data_colors = c
m.data_t = t
# m.constrain('variance|length', logexp_clipped())
m['lengthscale'] = X.var(0) / X.var(0).max()
m.constrain('variance|length', logexp_clipped())
m['lengthscale'] = X.var(0).max() / X.var(0)
m['noise'] = Y.var() / 100.
m.ensure_default_constraints()
@ -96,36 +117,33 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4):
m.optimize('scg', messages=1)
return m
def BGPLVM_oil(optimize=True, N=100, Q=10, M=10, max_f_eval=1e3, plot=False, **k):
def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k):
data = GPy.util.datasets.oil()
from GPy.core.transformations import logexp_clipped
np.random.seed(0)
# create simple GP model
kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
Y = data['X'][:N]
Y -= Y.mean(0)
Yn = Y - Y.mean(0)
Yn /= Yn.std(0)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M, **k)
m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
m.constrain('variance', logexp_clipped())
m.constrain('length', logexp_clipped())
m['lengt'] = 1.
m['noise'] = Y.var() / 100.
# m.constrain('variance', logexp_clipped())
# m.constrain('length', logexp_clipped())
m['lengt'] = m.X.var(0).max() / m.X.var(0)
m['noise'] = Yn.var() / 100.
m.ensure_default_constraints()
# optimize
if optimize:
m.unconstrain('X'); m.constrain_fixed('X')
m.optimize('scg', messages=1, max_f_eval=10)
m.unconstrain('X'); m.constrain('X_var', logexp_clipped())
m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
m.optimize('scg', messages=1, max_f_eval=150)
m.unconstrain('noise')
m.constrain('noise', logexp_clipped())
# m.unconstrain('noise'); m.constrain_fixed('noise')
# m.optimize('scg', messages=1, max_f_eval=200)
# m.unconstrain('noise')
# m.constrain('noise', logexp_clipped())
m.optimize('scg', messages=1, max_f_eval=max_f_eval)
if plot:

View file

@ -1,146 +0,0 @@
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import sys
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
"""
sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0/np.sqrt(kappa)
xplus = x + sigma*d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew))/sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta*kappa
if delta <= 0:
delta = beta*kappa
beta = beta - theta/kappa
alpha = - mu/delta
# Calculate the comparison ratio.
xnew = x + alpha*d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status
Delta = 2.*(fnew - fold)/(alpha*mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print '\r',
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
if success:
# Test for termination
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
status='converged'
return x, flog, function_eval, status
else:
# Update variables for new position
fold = fnew
gradold = gradnew
gradnew = gradf(x, *optargs)
# If the gradient is zero then we are done.
if np.dot(gradnew,gradnew) == 0:
return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0*beta, betamax)
if Delta > 0.75:
beta = max(0.5*beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
nsuccess = 0
elif success:
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
d = gamma*d - gradnew
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
return x, flog, function_eval, status

View file

@ -27,7 +27,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
"""
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
Z=None, kernel=None, oldpsave=5, _debug=False,
Z=None, kernel=None, oldpsave=10, _debug=False,
**kwargs):
if X == None:
X = self.initialise_latent(init, Q, Y)
@ -167,8 +167,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
# d_dmu = (dL_dmu).flatten()
# d_dS = (dL_dS).flatten()
# ========================
dbound_dmuS = np.hstack((d_dmu, d_dS))
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
self.dbound_dmuS = np.hstack((d_dmu, d_dS))
self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
def _log_likelihood_normal_gradients(self):
Si, _, _, _ = pdinv(self.X_variance)
def plot_latent(self, which_indices=None, *args, **kwargs):
@ -263,7 +267,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
param_dict = dict(self._savedparams)
gradient_dict = dict(self._savedgradients)
kmm_dict = dict(self._savedpsiKmm)
# kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys())
ABCD_dict = np.array(self._savedABCD)
self.showing = 0

View file

@ -4,6 +4,7 @@ import numpy as np
import GPy
import scipy.sparse
import scipy.io
import cPickle as pickle
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
@ -96,6 +97,19 @@ def stick():
lbls = 'connect'
return {'Y': Y, 'connect' : connect, 'info': "Stick man data from Ohio."}
def swiss_roll_generated(N=1000, sigma=0.0):
with open(os.path.join(data_path, 'swiss_roll.pickle')) as f:
data = pickle.load(f)
Na = data['Y'].shape[0]
perm = np.random.permutation(np.r_[:Na])[:N]
Y = data['Y'][perm, :]
t = data['t'][perm]
c = data['colors'][perm, :]
so = np.argsort(t)
Y = Y[so, :]
t = t[so]
c = c[so, :]
return {'Y':Y, 't':t, 'colors':c}
def swiss_roll_1000():
mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data'))
@ -105,8 +119,7 @@ def swiss_roll_1000():
def swiss_roll(N=3000):
mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat'))
Y = mat_data['X_data'][:, 0:N].transpose()
import ipdb;ipdb.set_trace()
return {'Y': Y, 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
return {'Y': Y, 'X': mat_data['X_data'], 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
def toy_rbf_1d(seed=default_seed):
np.random.seed(seed=seed)

File diff suppressed because one or more lines are too long