rebased from master in older to get all the goodies

2026-05-24 14:15:14 +02:00 · 2013-02-25 11:49:15 +00:00 · 2013-02-25 11:49:15 +00:00 · bc80c0b62d
commit bc80c0b62d
parent d82763be39 1705ecce91
109 changed files with 18225 additions and 1854 deletions
--- a/GPy/examples/BGPLVM_demo.py
+++ b/GPy/examples/BGPLVM_demo.py
@ -0,0 +1,37 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+import numpy as np
+import pylab as pb
+import GPy
+np.random.seed(123344)
+
+N = 10
+M = 3
+Q = 2
+D = 4
+#generate GPLVM-like data
+X = np.random.rand(N, Q)
+k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
+K = k.K(X)
+Y = np.random.multivariate_normal(np.zeros(N),K,D).T
+
+k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
+# k = GPy.kern.rbf(Q) + GPy.kern.rbf(Q) + GPy.kern.white(Q)
+# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
+# k = GPy.kern.rbf(Q, ARD = False)  + GPy.kern.white(Q, 0.00001)
+
+m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k,  M=M)
+m.constrain_positive('(rbf|bias|noise|white|S)')
+# m.constrain_fixed('S', 1)
+
+# pb.figure()
+# m.plot()
+# pb.title('PCA initialisation')
+# pb.figure()
+# m.optimize(messages = 1)
+# m.plot()
+# pb.title('After optimisation')
+m.ensure_default_constraints()
+m.randomize()
+m.checkgrad(verbose = 1)
--- a/GPy/examples/init.py
+++ b/GPy/examples/init.py
@ -0,0 +1,8 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+# Please don't delete this without explaining to Neil the right way of doing this. I want to be able to run:
+# GPy.examples.regression.toy_rbf_1D() from ipython having imported GPy, and this seems to be the way to do it!
+import classification
+import regression
+import unsupervised
--- a/GPy/examples/classification.py
+++ b/GPy/examples/classification.py
@ -3,16 +3,15 @@


 """
-Simple Gaussian Processes classification
+Gaussian Processes classification
 """
 import pylab as pb
 import numpy as np
 import GPy

 default_seed=10000
-######################################
-## 2 dimensional example
-def crescent_data(model_type='Full', inducing=10, seed=default_seed):
+
+def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
    """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.

    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -21,20 +20,28 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
    :type inducing: int
    """
+
    data = GPy.util.datasets.crescent_data(seed=seed)
-    likelihood = GPy.inference.likelihoods.probit(data['Y'])
+
+    # Kernel object
+    kernel = GPy.kern.rbf(data['X'].shape[1])
+
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'],distribution)
+

    if model_type=='Full':
-        m = GPy.models.GP_EP(data['X'],likelihood)
+        m = GPy.models.GP(data['X'],likelihood,kernel)
    else:
        # create sparse GP EP model
        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)

-    m.approximate_likelihood()
+    m.update_likelihood_approximation()
    print(m)

    # optimize
-    m.em()
+    m.optimize()
    print(m)

    # plot
@ -42,54 +49,67 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
    return m

 def oil():
-    """Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood."""
+    """
+    Run a Gaussian process classification on the oil data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
+    """
    data = GPy.util.datasets.oil()
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
+    # Kernel object
+    kernel = GPy.kern.rbf(12)

-    # create simple GP model
-    m = GPy.models.GP_EP(data['X'],likelihood)
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(data['Y'][:, 0:1],distribution)

-    # contrain all parameters to be positive
+    # Create GP model
+    m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
+
+    # Contrain all parameters to be positive
    m.constrain_positive('')
    m.tie_param('lengthscale')
-    m.approximate_likelihood()
+    m.update_likelihood_approximation()

-    # optimize
+    # Optimize
    m.optimize()

-    # plot
-    #m.plot()
    print(m)
    return m

-def toy_linear_1d_classification(model_type='Full', inducing=4, seed=default_seed):
-    """Simple 1D classification example.
-    :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
+def toy_linear_1d_classification(seed=default_seed):
+    """
+    Simple 1D classification example
    :param seed : seed value for data generation (default is 4).
    :type seed: int
-    :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
-    :type inducing: int
    """
+
    data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
-    likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
-    assert model_type in ('Full','DTC','FITC')
+    Y = data['Y'][:, 0:1]
+    Y[Y == -1] = 0

-    # create simple GP model
-    if model_type=='Full':
-        m = GPy.models.simple_GP_EP(data['X'],likelihood)
-    else:
-        # create sparse GP EP model
-        m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
-            
+    # Kernel object
+    kernel = GPy.kern.rbf(1)

-    m.constrain_positive('var')
-    m.constrain_positive('len')
-    m.tie_param('lengthscale')
-    m.approximate_likelihood()
+    # Likelihood object
+    distribution = GPy.likelihoods.likelihood_functions.probit()
+    likelihood = GPy.likelihoods.EP(Y,distribution)

-    # Optimize and plot
-    m.em(plot_all=False) # EM algorithm
+    # Model definition
+    m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
+
+    # Optimize
+    """
+    EPEM runs a loop that consists of two steps:
+    1) EP likelihood approximation:
+        m.update_likelihood_approximation()
+    2) Parameters optimization:
+        m.optimize()
+    """
+    m.EPEM()
+
+    # Plot
+    pb.subplot(211)
+    m.plot_f()
+    pb.subplot(212)
    m.plot()
-
    print(m)
+
    return m
--- a/GPy/examples/oil_flow_demo.py
+++ b/GPy/examples/oil_flow_demo.py
@ -0,0 +1,57 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import cPickle as pickle
+import numpy as np
+import pylab as pb
+import GPy
+import pylab as plt
+np.random.seed(3)
+
+def plot_oil(X, theta, labels, label):
+    plt.figure()
+    X = X[:,np.argsort(theta)[:2]]
+    flow_type = (X[labels[:,0]==1])
+    plt.plot(flow_type[:,0], flow_type[:,1], 'rx')
+    flow_type = (X[labels[:,1]==1])
+    plt.plot(flow_type[:,0], flow_type[:,1], 'gx')
+    flow_type = (X[labels[:,2]==1])
+    plt.plot(flow_type[:,0], flow_type[:,1], 'bx')
+    plt.title(label)
+
+data = pickle.load(open('../../../GPy_assembla/datasets/oil_flow_3classes.pickle', 'r'))
+
+Y = data['DataTrn']
+N, D = Y.shape
+selected = np.random.permutation(N)[:350]
+labels = data['DataTrnLbls'][selected]
+Y = Y[selected]
+N, D = Y.shape
+Y -= Y.mean(axis=0)
+# Y /= Y.std(axis=0)
+
+Q = 5
+k = GPy.kern.linear(Q, ARD = True) + GPy.kern.white(Q)
+m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M = 20)
+m.constrain_positive('(rbf|bias|S|linear|white|noise)')
+
+# m.unconstrain('noise')
+# m.constrain_fixed('noise_precision', 50.0)
+# m.unconstrain('white')
+# m.constrain_bounded('white', 1e-6, 10.0)
+# plot_oil(m.X, np.array([1,1]), labels, 'PCA initialization')
+m.optimize(messages = True)
+# m.optimize('tnc', messages = True)
+# plot_oil(m.X, m.kern.parts[0].lengthscale, labels, 'B-GPLVM')
+# # pb.figure()
+# m.plot()
+# pb.title('PCA initialisation')
+# pb.figure()
+# m.optimize(messages = 1)
+# m.plot()
+# pb.title('After optimisation')
+# m = GPy.models.GPLVM(Y, Q)
+# m.constrain_positive('(white|rbf|bias|noise)')
+# m.optimize()
+# plot_oil(m.X, np.array([1,1]), labels, 'GPLVM')
--- a/GPy/examples/poisson.py
+++ b/GPy/examples/poisson.py
@ -0,0 +1,47 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+"""
+Gaussian Processes + Expectation Propagation - Poisson Likelihood
+"""
+import pylab as pb
+import numpy as np
+import GPy
+
+default_seed=10000
+
+def  toy_1d(seed=default_seed):
+    """
+    Simple 1D classification example
+    :param seed : seed value for data generation (default is 4).
+    :type seed: int
+    """
+
+    X = np.arange(0,100,5)[:,None]
+    F = np.round(np.sin(X/18.) + .1*X) + np.arange(5,25)[:,None]
+    E = np.random.randint(-5,5,20)[:,None]
+    Y = F + E
+
+    kernel = GPy.kern.rbf(1)
+    distribution = GPy.likelihoods.likelihood_functions.Poisson()
+    likelihood = GPy.likelihoods.EP(Y,distribution)
+
+    m = GPy.models.GP(X,likelihood,kernel)
+    m.ensure_default_constraints()
+
+    # Approximate likelihood
+    m.update_likelihood_approximation()
+
+    # Optimize and plot
+    m.optimize()
+    #m.EPEM FIXME
+    print m
+
+    # Plot
+    pb.subplot(211)
+    m.plot_f() #GP plot
+    pb.subplot(212)
+    m.plot() #Output plot
+
+    return m
--- a/GPy/examples/regression.py
+++ b/GPy/examples/regression.py
@ -20,7 +20,6 @@ def toy_rbf_1d():
    # optimize
    m.ensure_default_constraints()
    m.optimize()
-
    # plot
    m.plot()
    print(m)
--- a/GPy/examples/sparse_GPLVM_demo.py
+++ b/GPy/examples/sparse_GPLVM_demo.py
@ -9,19 +9,17 @@ np.random.seed(1)
 print "sparse GPLVM with RBF kernel"

 N = 100
-M = 4
-Q = 2
+M = 8
+Q = 1
 D = 2
 #generate GPLVM-like data
 X = np.random.rand(N, Q)
-k = GPy.kern.rbf(Q,1.,2*np.ones((1,))) + GPy.kern.white(Q, 0.00001)
+k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
 K = k.K(X)
 Y = np.random.multivariate_normal(np.zeros(N),K,D).T

 m = GPy.models.sparse_GPLVM(Y, Q, M=M)
-m.constrain_positive('(rbf|bias|noise)')
-m.constrain_bounded('white', 1e-3, 0.1)
-# m.plot()
+m.constrain_positive('(rbf|bias|noise|white)')

 pb.figure()
 m.plot()
--- a/GPy/examples/sparse_GP_regression_demo.py
+++ b/GPy/examples/sparse_GP_regression_demo.py
@ -11,7 +11,7 @@ import numpy as np
 import GPy
 np.random.seed(2)
 pb.ion()
-N = 500
+N = 400
 M = 5

 ######################################
@ -27,20 +27,13 @@ noise = GPy.kern.white(1)
 kernel = rbf + noise

 # create simple GP model
-m1 = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
+m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)

-# contrain all parameters to be positive
-m1.constrain_positive('(variance|lengthscale|precision)')
-#m1.constrain_positive('(variance|lengthscale)')
-#m1.constrain_fixed('prec',10.)
+m.constrain_positive('(variance|lengthscale|precision)')

-
-#check gradient FIXME unit test please
-m1.checkgrad()
-# optimize and plot
-m1.optimize('tnc', messages = 1)
-m1.plot()
-# print(m1)
+m.checkgrad(verbose=1)
+m.optimize('tnc', messages = 1)
+m.plot()

 ######################################
 ## 2 dimensional example
--- a/GPy/examples/sparse_ep_fix.py
+++ b/GPy/examples/sparse_ep_fix.py
@ -0,0 +1,60 @@
+# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
+# Licensed under the BSD 3-clause license (see LICENSE.txt)
+
+
+import numpy as np
+"""
+Sparse Gaussian Processes regression with an RBF kernel
+"""
+import pylab as pb
+import numpy as np
+import GPy
+np.random.seed(2)
+pb.ion()
+N = 500
+M = 5
+
+pb.close('all')
+######################################
+## 1 dimensional example
+
+# sample inputs and outputs
+X = np.random.uniform(-3.,3.,(N,1))
+#Y = np.sin(X)+np.random.randn(N,1)*0.05
+F = np.sin(X)+np.random.randn(N,1)*0.05
+Y = np.ones([F.shape[0],1])
+Y[F<0] = -1
+likelihood = GPy.inference.likelihoods.probit(Y)
+
+# construct kernel
+rbf =  GPy.kern.rbf(1)
+noise = GPy.kern.white(1)
+kernel = rbf + noise
+
+# create simple GP model
+#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
+
+# contrain all parameters to be positive
+#m.constrain_fixed('prec',100.)
+m = GPy.models.sparse_GP(X, Y, kernel, M=M)
+m.ensure_default_constraints()
+#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
+#    m.approximate_likelihood()
+print m.checkgrad()
+m.optimize('tnc', messages = 1)
+m.plot(samples=3)
+print m
+
+n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
+n.ensure_default_constraints()
+if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian):
+    n.approximate_likelihood()
+print n.checkgrad()
+pb.figure()
+n.plot()
+
+"""
+m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
+m.ensure_default_constraints()
+print m.checkgrad()
+"""
--- a/GPy/examples/warped_GP_demo.py
+++ b/GPy/examples/warped_GP_demo.py
@ -7,7 +7,7 @@ import scipy as sp
 import pdb, sys, pickle
 import matplotlib.pylab as plt
 import GPy
-np.random.seed(1)
+np.random.seed(3)

 N = 100
 # sample inputs and outputs
@ -22,14 +22,14 @@ Zmin = Z.min()
 Z = (Z-Zmin)/(Zmax-Zmin) - 0.5

 m = GPy.models.warpedGP(X, Z, warping_terms = 2)
-m.constrain_positive('(tanh_a|tanh_b|tanh_d|rbf|white|bias)')
+m.constrain_positive('(tanh_a|tanh_b|tanh_d|rbf|noise|bias)')
 m.randomize()
 plt.figure()
 plt.xlabel('predicted f(Z)')
 plt.ylabel('actual f(Z)')
-plt.plot(m.Y, Y, 'o', alpha = 0.5, label = 'before training')
+plt.plot(m.likelihood.Y, Y, 'o', alpha = 0.5, label = 'before training')
 m.optimize(messages = True)
-plt.plot(m.Y, Y, 'o', alpha = 0.5, label = 'after training')
+plt.plot(m.likelihood.Y, Y, 'o', alpha = 0.5, label = 'after training')
 plt.legend(loc = 0)
 m.plot_warping()
 plt.figure()
@ -37,7 +37,7 @@ plt.title('warped GP fit')
 m.plot()

 m1 = GPy.models.GP_regression(X, Z)
-m1.constrain_positive('(rbf|white|bias)')
+m1.constrain_positive('(rbf|noise|bias)')
 m1.randomize()
 m1.optimize(messages = True)
 plt.figure()