swiss_roll example added, BGPLVM_oil now working

2026-07-20 16:51:05 +02:00 · 2013-05-16 13:47:55 +01:00 · 2013-05-16 13:47:55 +01:00 · 93d517f24e
commit 93d517f24e
parent 61a79c5041
5 changed files with 137 additions and 173 deletions
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -6,6 +6,7 @@ from matplotlib import pyplot as plt

 import GPy
 from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
+from GPy.util.datasets import swiss_roll_generated

 default_seed = np.random.seed(123344)

@ -61,15 +62,18 @@ def GPLVM_oil_100(optimize=True):
    m.plot_latent(labels=m.data_labels)
    return m

-def swiss_roll(optimize=True, N=1000, M=15, Q=4):
+def swiss_roll(optimize=True, N=1000, M=15, Q=4, sigma=.2, plot=False):
    from GPy.util.datasets import swiss_roll
    from GPy.core.transformations import logexp_clipped

-    data = swiss_roll(N=N)
+    data = swiss_roll_generated(N=N, sigma=sigma)
    Y = data['Y']
    Y -= Y.mean(0)
    Y /= Y.std(0)

+    t = data['t']
+    c = data['colors']
+
    try:
        from sklearn.manifold.isomap import Isomap
        iso = Isomap().fit(Y)
@ -79,16 +83,33 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4):
    except ImportError:
        X = np.random.randn(N, Q)

+    if plot:
+        from mpl_toolkits import mplot3d
+        import pylab
+        fig = pylab.figure("Swiss Roll Data")
+        ax = fig.add_subplot(121, projection='3d')
+        ax.scatter(*Y.T, c=c)
+        ax.set_title("Swiss Roll")
+
+        ax = fig.add_subplot(122)
+        ax.scatter(*X.T[:2], c=c)
+        ax.set_title("Initialization")
+
+
    var = .5
-    S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2, -(1 - var), (1 - var))) + .001
+    S = (var * np.ones_like(X) + np.clip(np.random.randn(N, Q) * var ** 2,
+                                         - (1 - var),
+                                         (1 - var))) + .001
    Z = np.random.permutation(X)[:M]

    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, 2)

    m = Bayesian_GPLVM(Y, Q, X=X, X_variance=S, M=M, Z=Z, kernel=kernel)
+    m.data_colors = c
+    m.data_t = t

-#     m.constrain('variance|length', logexp_clipped())
-    m['lengthscale'] = X.var(0) / X.var(0).max()
+    m.constrain('variance|length', logexp_clipped())
+    m['lengthscale'] = X.var(0).max() / X.var(0)
    m['noise'] = Y.var() / 100.
    m.ensure_default_constraints()

@ -96,36 +117,33 @@ def swiss_roll(optimize=True, N=1000, M=15, Q=4):
        m.optimize('scg', messages=1)
    return m

-def BGPLVM_oil(optimize=True, N=100, Q=10, M=10, max_f_eval=1e3, plot=False, **k):
+def BGPLVM_oil(optimize=True, N=100, Q=5, M=25, max_f_eval=4e3, plot=False, **k):
    data = GPy.util.datasets.oil()
    from GPy.core.transformations import logexp_clipped
+    np.random.seed(0)

    # create simple GP model
    kernel = GPy.kern.rbf(Q, ARD=True) + GPy.kern.bias(Q, np.exp(-2)) + GPy.kern.white(Q, np.exp(-2))
    Y = data['X'][:N]
-    Y -= Y.mean(0)
+    Yn = Y - Y.mean(0)
+    Yn /= Yn.std(0)

-    m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=kernel, M=M, **k)
+    m = GPy.models.Bayesian_GPLVM(Yn, Q, kernel=kernel, M=M, **k)
    m.data_labels = data['Y'][:N].argmax(axis=1)

-    m.constrain('variance', logexp_clipped())
-    m.constrain('length', logexp_clipped())
-    m['lengt'] = 1.
-    m['noise'] = Y.var() / 100.
+#     m.constrain('variance', logexp_clipped())
+#     m.constrain('length', logexp_clipped())
+    m['lengt'] = m.X.var(0).max() / m.X.var(0)
+    m['noise'] = Yn.var() / 100.

    m.ensure_default_constraints()

    # optimize
    if optimize:
-        m.unconstrain('X'); m.constrain_fixed('X')
-        m.optimize('scg', messages=1, max_f_eval=10)
-        m.unconstrain('X'); m.constrain('X_var', logexp_clipped())
-
-        m.unconstrain('noise'); m.constrain_fixed('noise', Y.var() / 100.)
-        m.optimize('scg', messages=1, max_f_eval=150)
-
-        m.unconstrain('noise')
-        m.constrain('noise', logexp_clipped())
+#         m.unconstrain('noise'); m.constrain_fixed('noise')
+#         m.optimize('scg', messages=1, max_f_eval=200)
+#         m.unconstrain('noise')
+#         m.constrain('noise', logexp_clipped())
        m.optimize('scg', messages=1, max_f_eval=max_f_eval)

    if plot:
--- a/GPy/inference/natural_gradient_scg.py
+++ b/GPy/inference/natural_gradient_scg.py
@ -1,146 +0,0 @@
-#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
-
-#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
-
-#      THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
-#      HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
-#      EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
-#      NOT LIMITED TO, THE IMPLIED WARRANTIES OF
-#      MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-#      PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
-#      REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
-#      DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-#      EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-#      (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
-#      OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-#      DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-#      HOWEVER CAUSED AND ON ANY THEORY OF
-#      LIABILITY, WHETHER IN CONTRACT, STRICT
-#      LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
-#      OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-#      OF THIS SOFTWARE, EVEN IF ADVISED OF THE
-#      POSSIBILITY OF SUCH DAMAGE.
-
-
-import numpy as np
-import sys
-
-def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
-    """
-    Optimisation through Scaled Conjugate Gradients (SCG)
-
-    f: the objective function
-    gradf : the gradient function (should return a 1D np.ndarray)
-    x : the initial condition
-
-    Returns
-    x the optimal value for x
-    flog : a list of all the objective values
-
-    """
-
-    sigma0 = 1.0e-4
-    fold = f(x, *optargs)	# Initial function value.
-    function_eval = 1
-    fnow = fold
-    gradnew = gradf(x, *optargs)	# Initial gradient.
-    gradold = gradnew.copy()
-    d = -gradnew				# Initial search direction.
-    success = True				# Force calculation of directional derivs.
-    nsuccess = 0				# nsuccess counts number of successes.
-    beta = 1.0				# Initial scale parameter.
-    betamin = 1.0e-15 			# Lower bound on scale.
-    betamax = 1.0e100			# Upper bound on scale.
-    status = "Not converged"
-
-    flog = [fold]
-
-    iteration = 0
-
-    # Main optimization loop.
-    while iteration < maxiters:
-
-        # Calculate first and second directional derivatives.
-        if success:
-            mu = np.dot(d, gradnew)
-            if mu >= 0:
-                d = -gradnew
-                mu = np.dot(d, gradnew)
-            kappa = np.dot(d, d)
-            sigma = sigma0/np.sqrt(kappa)
-            xplus = x + sigma*d
-            gplus = gradf(xplus, *optargs)
-            theta = np.dot(d, (gplus - gradnew))/sigma
-
-        # Increase effective curvature and evaluate step size alpha.
-        delta = theta + beta*kappa
-        if delta <= 0:
-            delta = beta*kappa
-            beta = beta - theta/kappa
-
-        alpha = - mu/delta
-
-        # Calculate the comparison ratio.
-        xnew = x + alpha*d
-        fnew = f(xnew, *optargs)
-        function_eval += 1
-
-        if function_eval >= max_f_eval:
-            status = "Maximum number of function evaluations exceeded"
-            return x, flog, function_eval, status
-
-        Delta = 2.*(fnew - fold)/(alpha*mu)
-        if Delta  >= 0.:
-            success = True
-            nsuccess += 1
-            x = xnew
-            fnow = fnew
-        else:
-            success = False
-            fnow = fold
-
-        # Store relevant variables
-        flog.append(fnow)		# Current function value
-
-        iteration += 1
-        if display:
-            print '\r',
-            print 'Iteration: {0:>5g}  Objective:{1:> 12e}  Scale:{2:> 12e}'.format(iteration, fnow, beta),
-            # print 'Iteration:', iteration, ' Objective:', fnow, '  Scale:', beta, '\r',
-            sys.stdout.flush()
-
-        if success:
-            # Test for termination
-            if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
-                status='converged'
-                return x, flog, function_eval, status
-
-            else:
-                # Update variables for new position
-                fold = fnew
-                gradold = gradnew
-                gradnew = gradf(x, *optargs)
-                # If the gradient is zero then we are done.
-                if np.dot(gradnew,gradnew) == 0:
-                    return x, flog, function_eval, status
-
-        # Adjust beta according to comparison ratio.
-        if Delta < 0.25:
-            beta = min(4.0*beta, betamax)
-        if Delta > 0.75:
-            beta = max(0.5*beta, betamin)
-
-        # Update search direction using Polak-Ribiere formula, or re-start
-        # in direction of negative gradient after nparams steps.
-        if nsuccess == x.size:
-            d = -gradnew
-            nsuccess = 0
-        elif success:
-            gamma = np.dot(gradold - gradnew,gradnew)/(mu)
-            d = gamma*d - gradnew
-
-    # If we get here, then we haven't terminated in the given number of
-    # iterations.
-    status = "maxiter exceeded"
-
-    return x, flog, function_eval, status
--- a/GPy/models/Bayesian_GPLVM.py
+++ b/GPy/models/Bayesian_GPLVM.py
@ -27,7 +27,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):

    """
    def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
-                 Z=None, kernel=None, oldpsave=5, _debug=False,
+                 Z=None, kernel=None, oldpsave=10, _debug=False,
                 **kwargs):
        if X == None:
            X = self.initialise_latent(init, Q, Y)
@ -167,8 +167,12 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
 #         d_dmu = (dL_dmu).flatten()
 #         d_dS = (dL_dS).flatten()
        # ========================
-        dbound_dmuS = np.hstack((d_dmu, d_dS))
-        return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
+        self.dbound_dmuS = np.hstack((d_dmu, d_dS))
+        self.dbound_dZtheta = sparse_GP._log_likelihood_gradients(self)
+        return np.hstack((self.dbound_dmuS.flatten(), self.dbound_dZtheta))
+
+    def _log_likelihood_normal_gradients(self):
+        Si, _, _, _ = pdinv(self.X_variance)

    def plot_latent(self, which_indices=None, *args, **kwargs):

@ -263,7 +267,7 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):

        param_dict = dict(self._savedparams)
        gradient_dict = dict(self._savedgradients)
-        kmm_dict = dict(self._savedpsiKmm)
+#         kmm_dict = dict(self._savedpsiKmm)
        iters = np.array(param_dict.keys())
        ABCD_dict = np.array(self._savedABCD)
        self.showing = 0
--- a/GPy/util/datasets.py
+++ b/GPy/util/datasets.py
@ -4,6 +4,7 @@ import numpy as np
 import GPy
 import scipy.sparse
 import scipy.io
+import cPickle as pickle
 data_path = os.path.join(os.path.dirname(__file__), 'datasets')
 default_seed = 10000

@ -96,6 +97,19 @@ def stick():
    lbls = 'connect'
    return {'Y': Y, 'connect' : connect, 'info': "Stick man data from Ohio."}

+def swiss_roll_generated(N=1000, sigma=0.0):
+    with open(os.path.join(data_path, 'swiss_roll.pickle')) as f:
+        data = pickle.load(f)
+    Na = data['Y'].shape[0]
+    perm = np.random.permutation(np.r_[:Na])[:N]
+    Y = data['Y'][perm, :]
+    t = data['t'][perm]
+    c = data['colors'][perm, :]
+    so = np.argsort(t)
+    Y = Y[so, :]
+    t = t[so]
+    c = c[so, :]
+    return {'Y':Y, 't':t, 'colors':c}

 def swiss_roll_1000():
    mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data'))
@ -105,8 +119,7 @@ def swiss_roll_1000():
 def swiss_roll(N=3000):
    mat_data = scipy.io.loadmat(os.path.join(data_path, 'swiss_roll_data.mat'))
    Y = mat_data['X_data'][:, 0:N].transpose()
-    import ipdb;ipdb.set_trace()
-    return {'Y': Y, 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}
+    return {'Y': Y, 'X': mat_data['X_data'], 'info': "The first 3,000 points from the swiss roll data of Tennenbaum, de Silva and Langford (2001)."}

 def toy_rbf_1d(seed=default_seed):
    np.random.seed(seed=seed)
--- a/GPy/util/datasets/swiss_roll.pickle
+++ b/GPy/util/datasets/swiss_roll.pickle