Merge branch 'master' of github.com:SheffieldML/GPy

2026-05-08 19:42:39 +02:00 · 2012-12-17 11:17:10 +00:00 · 2012-12-17 11:17:10 +00:00 · 3c708b3bd6
commit 3c708b3bd6
parent b56de4762e 7ae0c163c1
10 changed files with 387 additions and 26 deletions
--- a/.gitignore
+++ b/.gitignore
@ -36,3 +36,6 @@ nosetests.xml
 #vim
 *.swp
 #bfgs optimiser leaves this lying around
 iterate.dat
--- a/GPy/core/model.py
+++ b/GPy/core/model.py
@ -227,8 +227,8 @@ class model(parameterised):
            start = self.extract_param()
        optimizer = optimization.get_optimizer(optimizer)
-        opt = optimizer(start, f_fp=f_fp,f=f,fp=fp, **kwargs)
+        opt = optimizer(start, model = self, **kwargs)
-        opt.run()
+        opt.run(f_fp=f_fp, f=f, fp=fp)
        self.optimization_runs.append(opt)
        self.expand_param(opt.x_opt)
--- a/GPy/inference/optimization.py
+++ b/GPy/inference/optimization.py
@ -19,15 +19,12 @@ class Optimizer():
    :param messages: print messages from the optimizer?
    :type messages: (True | False)
    :param max_f_eval: maximum number of function evaluations
-    
+
    :rtype: optimizer object.
-    
+
-    """	
+    """
-    def __init__(self, x_init, f_fp, f, fp , messages=False, max_f_eval=1e4, ftol=None, gtol=None, xtol=None):
+    def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, ftol=None, gtol=None, xtol=None):
        self.opt_name = None
        self.f_fp = f_fp
        self.f = f
        self.fp = fp
        self.x_init = x_init
        self.messages = messages
        self.f_opt = None
@ -40,14 +37,15 @@ class Optimizer():
        self.xtol = xtol
        self.gtol = gtol
        self.ftol = ftol
-
+        self.model = model
-    def run(self):
+        
    def run(self, **kwargs):
        start = dt.datetime.now()
-        self.opt()
+        self.opt(**kwargs)
        end = dt.datetime.now()
        self.time = str(end-start)
-    def opt(self):
+    def opt(self, f_fp = None, f = None, fp = None):
        raise NotImplementedError, "this needs to be implemented to use the optimizer class"
    def plot(self):
@ -71,7 +69,7 @@ class opt_tnc(Optimizer):
        Optimizer.__init__(self, *args, **kwargs)
        self.opt_name = "TNC (Scipy implementation)"
-    def opt(self):
+    def opt(self, f_fp = None, f = None, fp = None):
        """
        Run the TNC optimizer
@ -79,7 +77,7 @@ class opt_tnc(Optimizer):
        tnc_rcstrings = ['Local minimum', 'Converged', 'XConverged', 'Maximum number of f evaluations reached',
             'Line search failed', 'Function is constant']
-        assert self.f_fp != None, "TNC requires f_fp"
+        assert f_fp != None, "TNC requires f_fp"
        opt_dict = {}
        if self.xtol is not None:
@ -89,10 +87,10 @@ class opt_tnc(Optimizer):
        if self.gtol is not None:
            opt_dict['pgtol'] = self.gtol
-        opt_result = optimize.fmin_tnc(self.f_fp, self.x_init, messages = self.messages,
+        opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages,
                       maxfun = self.max_f_eval, **opt_dict)
        self.x_opt = opt_result[0]
-        self.f_opt = self.f_fp(self.x_opt)[0]
+        self.f_opt = f_fp(self.x_opt)[0]
        self.funct_eval = opt_result[1]
        self.status = tnc_rcstrings[opt_result[2]]
@ -101,14 +99,14 @@ class opt_lbfgsb(Optimizer):
        Optimizer.__init__(self, *args, **kwargs)
        self.opt_name = "L-BFGS-B (Scipy implementation)"
-    def opt(self):
+    def opt(self, f_fp = None, f = None, fp = None):
        """
        Run the optimizer
        """
        rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error']
-        assert self.f_fp != None, "BFGS requires f_fp"
+        assert f_fp != None, "BFGS requires f_fp"
        if self.messages:
            iprint = 1
@ -123,10 +121,10 @@ class opt_lbfgsb(Optimizer):
        if self.gtol is not None:
            opt_dict['pgtol'] = self.gtol
-        opt_result = optimize.fmin_l_bfgs_b(self.f_fp, self.x_init, iprint = iprint,
+        opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint,
                                            maxfun = self.max_f_eval, **opt_dict)
        self.x_opt = opt_result[0]
-        self.f_opt = self.f_fp(self.x_opt)[0]
+        self.f_opt = f_fp(self.x_opt)[0]
        self.funct_eval = opt_result[2]['funcalls']
        self.status = rcstrings[opt_result[2]['warnflag']]
@ -135,7 +133,7 @@ class opt_simplex(Optimizer):
        Optimizer.__init__(self, *args, **kwargs)
        self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
-    def opt(self):
+    def opt(self, f_fp = None, f = None, fp = None):
        """
        The simplex optimizer does not require gradients.
        """
@ -150,7 +148,7 @@ class opt_simplex(Optimizer):
        if self.gtol is not None:
            print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
-        opt_result = optimize.fmin(self.f, self.x_init, (), disp = self.messages,
+        opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages,
                   maxfun = self.max_f_eval, full_output=True, **opt_dict)
        self.x_opt = opt_result[0]
--- a/GPy/kern/init.py
+++ b/GPy/kern/init.py
@ -2,5 +2,5 @@
 # Licensed under the BSD 3-clause license (see LICENSE.txt)
-from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD
+from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy
 from kern import kern
--- a/GPy/kern/constructors.py
+++ b/GPy/kern/constructors.py
@ -21,6 +21,7 @@ from Brownian import Brownian as Brownianpart
 #TODO these s=constructors are not as clean as we'd like. Tidy the code up
 #using meta-classes to make the objects construct properly wthout them.
 def rbf(D,variance=1., lengthscale=1.):
    """
    Construct an RBF kernel
@ -170,3 +171,20 @@ def Brownian(D,variance=1.):
    """
    part = Brownianpart(D,variance)
    return kern(D, [part])
 import sympy as sp
 from sympykern import spkern
 from sympy.parsing.sympy_parser import parse_expr
 def rbf_sympy(D,variance=1., lengthscale=1.):
    """
    Radial Basis Function covariance.
    """
    X = [sp.var('x%i'%i) for i in range(D)]
    Z = [sp.var('z%i'%i) for i in range(D)]
    rbf_variance = sp.var('rbf_variance',positive=True)
    rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
    dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
    dist = parse_expr(dist_string)
    f =  rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
    return kern(D,[spkern(D,f,np.array([variance,lengthscale]))])
--- a/GPy/kern/sympy_helpers.cpp
+++ b/GPy/kern/sympy_helpers.cpp
@ -0,0 +1,10 @@
 #include <math.h>
 double DiracDelta(double x){
    if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
        return 1.0;
    else
        return 0.0;
 };
 double DiracDelta(double x,int foo){
    return 0.0;
 };
--- a/GPy/kern/sympy_helpers.h
+++ b/GPy/kern/sympy_helpers.h
@ -0,0 +1,3 @@
 #include <math.h>
 double DiracDelta(double x);
 double DiracDelta(double x, int foo);
--- a/GPy/kern/sympykern.py
+++ b/GPy/kern/sympykern.py
@ -0,0 +1,286 @@
 import numpy as np
 import sympy as sp
 from sympy.utilities.codegen import codegen
 from sympy.core.cache import clear_cache
 from scipy import weave
 import re
 import os
 import sys
 current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
 import tempfile
 import pdb
 from kernpart import kernpart
 class spkern(kernpart):
    """
    A kernel object, where all the hard work in done by sympy.
    :param k: the covariance function
    :type k: a positive definite sympy function of x1, z1, x2, z2...
    To construct a new sympy kernel, you'll need to define:
     - a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
     - that's it! we'll extract the variables from the function k.
    Note:
     - to handle multiple inputs, call them x1, z1, etc
     - to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
    """
    def __init__(self,D,k,param=None):
        self.name='sympykern'
        self._sp_k = k
        sp_vars = [e for e in k.atoms() if e.is_Symbol]
        self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
        self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
        assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
        assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
        assert len(self._sp_x)==len(self._sp_z)
        self.D = len(self._sp_x)
        assert self.D == D
        self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
        self.Nparam = len(self._sp_theta)
        #deal with param
        if param is None:
            param = np.ones(self.Nparam)
        assert param.size==self.Nparam
        self.set_param(param)
        #Differentiate!
        self._sp_dk_dtheta = [sp.diff(k,theta) for theta in self._sp_theta]
        self._sp_dk_dx = [sp.diff(k,xi) for xi in self._sp_x]
        #self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
        #self.compute_psi_stats()
        self._gen_code()
        self.weave_kwargs = {\
            'support_code':self._function_code,\
            'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
            'headers':['"sympy_helpers.h"'],\
            'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
            #'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
            'extra_compile_args':[],\
            'extra_link_args':['-lgomp'],\
            'verbose':True}
    def __add__(self,other):
        return spkern(self._sp_k+other._sp_k)
    def compute_psi_stats(self):
        #define some normal distributions
        mus = [sp.var('mu%i'%i,real=True) for i in range(self.D)]
        Ss = [sp.var('S%i'%i,positive=True) for i in range(self.D)]
        normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
        #do some integration!
        #self._sp_psi0 = ??
        self._sp_psi1 = self._sp_k
        for i in range(self.D):
            print 'perfoming integrals %i of %i'%(i+1,2*self.D)
            sys.stdout.flush()
            self._sp_psi1 *= normals[i]
            self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
            clear_cache()
        self._sp_psi1 = self._sp_psi1.simplify()
        #and here's psi2 (eek!)
        zprime = [sp.Symbol('zp%i'%i) for i in range(self.D)]
        self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
        for i in range(self.D):
            print 'perfoming integrals %i of %i'%(self.D+i+1,2*self.D)
            sys.stdout.flush()
            self._sp_psi2 *= normals[i]
            self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
            clear_cache()
        self._sp_psi2 = self._sp_psi2.simplify()
    def _gen_code(self):
        #generate c functions from sympy objects
        (foo_c,self._function_code),(foo_h,self._function_header) = \
                codegen([('k',self._sp_k)] \
                + [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
                #+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
                + [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
                ,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
        #put the header file where we can find it
        f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
        f.write(self._function_header)
        f.close()
        #get rid of derivatives of DiracDelta
        self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
        #Here's some code to do the looping for K
        arglist = ", ".join(["X[i*D+%s]"%x.name[1:] for x in self._sp_x]\
                + ["Z[j*D+%s]"%z.name[1:] for z in self._sp_z]\
                + ["param[%i]"%i for i in range(self.Nparam)])
        self._K_code =\
        """
        int i;
        int j;
        int N = target_array->dimensions[0];
        int M = target_array->dimensions[1];
        int D = X_array->dimensions[1];
        #pragma omp parallel for private(j)
        for (i=0;i<N;i++){
            for (j=0;j<M;j++){
                target[i*M+j] = k(%s);
            }
        }
        %s
        """%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        diag_arglist = re.sub('Z','X',arglist)
        diag_arglist = re.sub('j','i',diag_arglist)
        #Here's some code to do the looping for Kdiag
        self._Kdiag_code =\
        """
        int i;
        int N = target_array->dimensions[0];
        int D = X_array->dimensions[1];
        #pragma omp parallel for
        for (i=0;i<N;i++){
                target[i] = k(%s);
        }
        %s
        """%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        #here's some code to compute gradients
        funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*M+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in  enumerate(self._sp_theta)])
        self._dK_dtheta_code =\
        """
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int M = partial_array->dimensions[1];
        int D = X_array->dimensions[1];
        #pragma omp parallel for private(j)
        for (i=0;i<N;i++){
            for (j=0;j<M;j++){
 %s
            }
        }
        %s
        """%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        #here's some code to compute gradients for Kdiag TODO: thius is yucky.
        diag_funclist = re.sub('Z','X',funclist,count=0)
        diag_funclist = re.sub('j','i',diag_funclist)
        diag_funclist = re.sub('partial\[i\*M\+i\]','partial[i]',diag_funclist)
        self._dKdiag_dtheta_code =\
        """
        int i;
        int N = partial_array->dimensions[0];
        int D = X_array->dimensions[1];
        for (i=0;i<N;i++){
                %s
        }
        %s
        """%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        #Here's some code to do gradients wrt x
        gradient_funcs = "\n".join(["target[i*D+%i] += partial[i*M+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.D)])
        self._dK_dX_code = \
        """
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int M = partial_array->dimensions[1];
        int D = X_array->dimensions[1];
        #pragma omp parallel for private(j)
        for (i=0;i<N; i++){
            for (j=0; j<M; j++){
                %s
            }
        }
        %s
        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        #Here's some code to do gradients wrt z (should be the same as for X, but this is easier
        gradient_funcs_Z = "\n".join(["target[j*D+%i] += partial[i*M+j]*dk_dz%i(%s);"%(q,q,arglist) for q in range(self.D)])
        self._dK_dZ_code = \
        """
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int M = partial_array->dimensions[1];
        int D = X_array->dimensions[1];
        for (i=0;i<N; i++){
            for (j=0; j<M; j++){
                %s
            }
        }
        %s
        """%(gradient_funcs_Z,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        #now for gradients of Kdiag wrt X
        self._dKdiag_dX_code= \
        """
        int i;
        int j;
        int N = partial_array->dimensions[0];
        int M = 0;
        int D = X_array->dimensions[1];
        for (i=0;i<N; i++){
            j = i;
            %s
        }
        %s
        """%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
        #TODO: insert multiple functions here via string manipulation
        #TODO: similar functions for psi_stats
    def K(self,X,Z,target):
        param = self._param
        weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
        return target
    def Kdiag(self,X,target):
        param = self._param
        weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
        return target
    def dK_dtheta(self,partial,X,Z,target):
        param = self._param
        weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
        return target
    def dKdiag_dtheta(self,partial,X,target):
        param = self._param
        Z = X
        weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
        return target
    def dK_dX(self,partial,X,Z,target):
        target = np.zeros_like(X)
        param = self._param
        weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
        return target
    #def dK_dZ(self,X,Z,partial=None):
        ##TODO: this function might not be necessary
        #target = np.zeros_like(Z)
        #param = self._param
        #weave.inline(self._dK_dZ_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
        #return target
    def dKdiag_dX(self,partial,X,target):
        param = self._param
        Z = X
        weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
        return target
    def set_param(self,param):
        #print param.flags['C_CONTIGUOUS']
        self._param = param.copy()
    def get_param(self):
        return self._param
    def get_param_names(self):
        return [x.name for x in self._sp_theta]
--- a/GPy/models/GPLVM.py
+++ b/GPy/models/GPLVM.py
@ -22,8 +22,9 @@ class GPLVM(GP_regression):
    :type init: 'PCA'|'random'
    """
-    def __init__(self, Y, Q, init='PCA', **kwargs):
+    def __init__(self, Y, Q, init='PCA', X = None, **kwargs):
-        X = self.initialise_latent(init, Q, Y)
+        if X is None:
            X = self.initialise_latent(init, Q, Y)
        GP_regression.__init__(self, X, Y, **kwargs)
    def initialise_latent(self, init, Q, Y):
--- a/GPy/notes.txt
+++ b/GPy/notes.txt
@ -0,0 +1,42 @@
 Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
 The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
 Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
 Missing kernels: polynomial, rational quadratic.
 Kernel implementations are far to obscure. Need to be easily readable for a first time user.
 Need an implementation of scaled conjugate gradients for the optimizers.
 Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
 Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
 Change get_param and set_param to get_params and set_params
 Get constrain param by default inside model creation.
 Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
 Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
 Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
 Change Youter to YYT (Youter doesn't mean anything for matrices).
 Bug when running classification.crescent_data()
 A dictionary for parameter storage? So we can go through names easily?
 A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
 Diagonal noise covariance function
 Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
 When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
 the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it.