Merge branch 'devel' of github.com:SheffieldML/GPy into devel

2026-06-14 15:25:15 +02:00 · 2013-06-04 15:57:47 +01:00 · 2013-06-04 15:57:47 +01:00 · bce4c8723c
commit bce4c8723c
parent 96c7810cf1 9dabd32e30
4 changed files with 23 additions and 80 deletions
--- a/GPy/examples/dimensionality_reduction.py
+++ b/GPy/examples/dimensionality_reduction.py
@ -7,6 +7,7 @@ from matplotlib import pyplot as plt
 import GPy
 from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
 from GPy.util.datasets import swiss_roll_generated
+from GPy.core.transformations import logexp

 default_seed = np.random.seed(123344)

@ -17,11 +18,11 @@ def BGPLVM(seed=default_seed):
    D = 4
    # generate GPLVM-like data
    X = np.random.rand(N, Q)
-    k = GPy.kern.rbf(Q)  + GPy.kern.white(Q, 0.00001)
+    k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
    K = k.K(X)
    Y = np.random.multivariate_normal(np.zeros(N), K, D).T

-    k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True)  + GPy.kern.white(Q)
+    k = GPy.kern.rbf(Q, ARD=True) + GPy.kern.linear(Q, ARD=True) + GPy.kern.rbf(Q, ARD=True) + GPy.kern.white(Q)
    # k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
    # k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
    # k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
@ -187,8 +188,8 @@ def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
    Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
    Y3 = S3.dot(np.random.randn(S3.shape[1], D3))

-    Y1 += .1 * np.random.randn(*Y1.shape)
-    Y2 += .1 * np.random.randn(*Y2.shape)
+    Y1 += .3 * np.random.randn(*Y1.shape)
+    Y2 += .2 * np.random.randn(*Y2.shape)
    Y3 += .1 * np.random.randn(*Y3.shape)

    Y1 -= Y1.mean(0)
@ -262,13 +263,13 @@ def bgplvm_simulation(optimize='scg',
    # m.constrain('variance|noise', logexp_clipped())
    m.ensure_default_constraints()
    m['noise'] = Y.var() / 100.
-    m['linear_variance'] = .01
+    m['linear_variance'] = .001

    if optimize:
        print "Optimizing model:"
-        m.optimize('bfgs', max_iters=max_f_eval,
+        m.optimize('scg', max_iters=max_f_eval,
                   max_f_eval=max_f_eval,
-                   messages=True, gtol=1e-2)
+                   messages=True, gtol=1e-6)
    if plot:
        import pylab
        m.plot_X_1d()
@ -277,23 +278,21 @@ def bgplvm_simulation(optimize='scg',
        m.kern.plot_ARD()
    return m

-def mrd_simulation(optimize=True, plot_sim=False, **kw):
-    D1, D2, D3, N, M, Q = 150, 200, 400, 300, 3, 7
+def mrd_simulation(optimize=True, plot=True, plot_sim=True, **kw):
+    D1, D2, D3, N, M, Q = 150, 200, 400, 500, 3, 7
    slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)

    from GPy.models import mrd
    from GPy import kern
-    from GPy.core.transformations import logexp_clipped

    reload(mrd); reload(kern)

-    k = kern.linear(Q, [0.05] * Q, True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
-    m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="concat", initz='permute', **kw)
+    k = kern.linear(Q, [.05] * Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
+    m = mrd.MRD(Ylist, Q=Q, M=M, kernels=k, initx="", initz='permute', **kw)

    for i, Y in enumerate(Ylist):
        m['{}_noise'.format(i + 1)] = Y.var() / 100.

-    # m.constrain('variance|noise', logexp_clipped(1e-6))
    m.ensure_default_constraints()

    # DEBUG
@ -301,8 +300,10 @@ def mrd_simulation(optimize=True, plot_sim=False, **kw):

    if optimize:
        print "Optimizing Model:"
-        m.optimize('bfgs', messages=1, max_iters=3e3)
-
+        m.optimize('scg', messages=1, max_iters=5e4, max_f_eval=5e4)
+    if plot:
+        m.plot_X_1d()
+        m.plot_scales()
    return m

 def brendan_faces():
@ -323,7 +324,7 @@ def brendan_faces():
    m.ensure_default_constraints()
    m.optimize('scg', messages=1, max_f_eval=10000)

-    ax = m.plot_latent(which_indices=(0,1))
+    ax = m.plot_latent(which_indices=(0, 1))
    y = m.likelihood.Y[0, :]
    data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
    lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
--- a/GPy/inference/SCG.py
+++ b/GPy/inference/SCG.py
@ -52,7 +52,7 @@ def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xto
        ftol = 1e-6
    if gtol is None:
        gtol = 1e-5
-    sigma0 = 1.0e-4
+    sigma0 = 1.0e-8
    fold = f(x, *optargs) # Initial function value.
    function_eval = 1
    fnow = fold
--- a/GPy/models/mrd.py
+++ b/GPy/models/mrd.py
@ -33,8 +33,11 @@ class MRD(model):
        Initial latent space
    :param X_variance:
        Initial latent space variance
-    :param init: [PCA|random]
-        initialization method to use
+    :param init: [cooncat|single|random]
+        initialization method to use: 
+            *concat: PCA on concatenated outputs
+            *single: PCA on each output
+            *random: random
    :param M:
        number of inducing inputs to use
    :param Z:
--- a/GPy/notes.txt
+++ b/GPy/notes.txt
@ -1,61 +0,0 @@
-the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it. 
-FIXED
-
-When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
-FIXED
-
-Change Youter to YYT (Youter doesn't mean anything for matrices).
-FIXED
-
-Change get_param and set_param to get_params and set_params
-FIXED
-
-Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
-FIXED
-
-The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
-FIXED
-
-Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
-TODO, a priority for this release
-
-Missing kernels: polynomial, rational quadratic.
-TODO, should be straightforward when the above is fixed.
-
-Need an implementation of scaled conjugate gradients for the optimizers.
-UPSTREAM: scipy are tidying up the optimize module. let's wait for their next release. 
-
-Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
-As above. 
-
-Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
-Any word from Carl yet?
-
-Get constrain param by default inside model creation.
-Well, we have ensure_default_constraints. There are some techinical difficulties in doing it inside model creation, so perhaps this is something for a later release.
-
-Bug when running classification.crescent_data()
-TODO.
-
-Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
-Upstream: Waiting for the new scipy, where the optimisers have been unified. Obviously it's be much better to be able to specify a unified set of args. 
-
-Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
-Upstream, as above
-
-A dictionary for parameter storage? So we can go through names easily?
-Wontfix. Dictionaries bring up all kinds of problems since they're not ordered. it's easy enough to do:
-for val, name in zip(m._get_params(), m._get_param_names()): foobar
-
-A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
-TODO, agree this would be helpful.
-
-Diagonal noise covariance function
-TODO this is now straightforward using the likelihood framework, or as a kern. NF also requires a similar kind of kern function (a fixed form kernel)
-
-Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
-
-Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
-Excellent proposal, but lots of work: suggest leaving for the next release?
-
-