mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-08 19:42:39 +02:00
Merge branch 'master' of github.com:SheffieldML/GPy
This commit is contained in:
commit
3c708b3bd6
10 changed files with 387 additions and 26 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
|
@ -36,3 +36,6 @@ nosetests.xml
|
||||||
|
|
||||||
#vim
|
#vim
|
||||||
*.swp
|
*.swp
|
||||||
|
|
||||||
|
#bfgs optimiser leaves this lying around
|
||||||
|
iterate.dat
|
||||||
|
|
|
||||||
|
|
@ -227,8 +227,8 @@ class model(parameterised):
|
||||||
start = self.extract_param()
|
start = self.extract_param()
|
||||||
|
|
||||||
optimizer = optimization.get_optimizer(optimizer)
|
optimizer = optimization.get_optimizer(optimizer)
|
||||||
opt = optimizer(start, f_fp=f_fp,f=f,fp=fp, **kwargs)
|
opt = optimizer(start, model = self, **kwargs)
|
||||||
opt.run()
|
opt.run(f_fp=f_fp, f=f, fp=fp)
|
||||||
self.optimization_runs.append(opt)
|
self.optimization_runs.append(opt)
|
||||||
|
|
||||||
self.expand_param(opt.x_opt)
|
self.expand_param(opt.x_opt)
|
||||||
|
|
|
||||||
|
|
@ -19,15 +19,12 @@ class Optimizer():
|
||||||
:param messages: print messages from the optimizer?
|
:param messages: print messages from the optimizer?
|
||||||
:type messages: (True | False)
|
:type messages: (True | False)
|
||||||
:param max_f_eval: maximum number of function evaluations
|
:param max_f_eval: maximum number of function evaluations
|
||||||
|
|
||||||
:rtype: optimizer object.
|
:rtype: optimizer object.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, x_init, f_fp, f, fp , messages=False, max_f_eval=1e4, ftol=None, gtol=None, xtol=None):
|
def __init__(self, x_init, messages=False, model = None, max_f_eval=1e4, ftol=None, gtol=None, xtol=None):
|
||||||
self.opt_name = None
|
self.opt_name = None
|
||||||
self.f_fp = f_fp
|
|
||||||
self.f = f
|
|
||||||
self.fp = fp
|
|
||||||
self.x_init = x_init
|
self.x_init = x_init
|
||||||
self.messages = messages
|
self.messages = messages
|
||||||
self.f_opt = None
|
self.f_opt = None
|
||||||
|
|
@ -40,14 +37,15 @@ class Optimizer():
|
||||||
self.xtol = xtol
|
self.xtol = xtol
|
||||||
self.gtol = gtol
|
self.gtol = gtol
|
||||||
self.ftol = ftol
|
self.ftol = ftol
|
||||||
|
self.model = model
|
||||||
def run(self):
|
|
||||||
|
def run(self, **kwargs):
|
||||||
start = dt.datetime.now()
|
start = dt.datetime.now()
|
||||||
self.opt()
|
self.opt(**kwargs)
|
||||||
end = dt.datetime.now()
|
end = dt.datetime.now()
|
||||||
self.time = str(end-start)
|
self.time = str(end-start)
|
||||||
|
|
||||||
def opt(self):
|
def opt(self, f_fp = None, f = None, fp = None):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the optimizer class"
|
raise NotImplementedError, "this needs to be implemented to use the optimizer class"
|
||||||
|
|
||||||
def plot(self):
|
def plot(self):
|
||||||
|
|
@ -71,7 +69,7 @@ class opt_tnc(Optimizer):
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
Optimizer.__init__(self, *args, **kwargs)
|
||||||
self.opt_name = "TNC (Scipy implementation)"
|
self.opt_name = "TNC (Scipy implementation)"
|
||||||
|
|
||||||
def opt(self):
|
def opt(self, f_fp = None, f = None, fp = None):
|
||||||
"""
|
"""
|
||||||
Run the TNC optimizer
|
Run the TNC optimizer
|
||||||
|
|
||||||
|
|
@ -79,7 +77,7 @@ class opt_tnc(Optimizer):
|
||||||
tnc_rcstrings = ['Local minimum', 'Converged', 'XConverged', 'Maximum number of f evaluations reached',
|
tnc_rcstrings = ['Local minimum', 'Converged', 'XConverged', 'Maximum number of f evaluations reached',
|
||||||
'Line search failed', 'Function is constant']
|
'Line search failed', 'Function is constant']
|
||||||
|
|
||||||
assert self.f_fp != None, "TNC requires f_fp"
|
assert f_fp != None, "TNC requires f_fp"
|
||||||
|
|
||||||
opt_dict = {}
|
opt_dict = {}
|
||||||
if self.xtol is not None:
|
if self.xtol is not None:
|
||||||
|
|
@ -89,10 +87,10 @@ class opt_tnc(Optimizer):
|
||||||
if self.gtol is not None:
|
if self.gtol is not None:
|
||||||
opt_dict['pgtol'] = self.gtol
|
opt_dict['pgtol'] = self.gtol
|
||||||
|
|
||||||
opt_result = optimize.fmin_tnc(self.f_fp, self.x_init, messages = self.messages,
|
opt_result = optimize.fmin_tnc(f_fp, self.x_init, messages = self.messages,
|
||||||
maxfun = self.max_f_eval, **opt_dict)
|
maxfun = self.max_f_eval, **opt_dict)
|
||||||
self.x_opt = opt_result[0]
|
self.x_opt = opt_result[0]
|
||||||
self.f_opt = self.f_fp(self.x_opt)[0]
|
self.f_opt = f_fp(self.x_opt)[0]
|
||||||
self.funct_eval = opt_result[1]
|
self.funct_eval = opt_result[1]
|
||||||
self.status = tnc_rcstrings[opt_result[2]]
|
self.status = tnc_rcstrings[opt_result[2]]
|
||||||
|
|
||||||
|
|
@ -101,14 +99,14 @@ class opt_lbfgsb(Optimizer):
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
Optimizer.__init__(self, *args, **kwargs)
|
||||||
self.opt_name = "L-BFGS-B (Scipy implementation)"
|
self.opt_name = "L-BFGS-B (Scipy implementation)"
|
||||||
|
|
||||||
def opt(self):
|
def opt(self, f_fp = None, f = None, fp = None):
|
||||||
"""
|
"""
|
||||||
Run the optimizer
|
Run the optimizer
|
||||||
|
|
||||||
"""
|
"""
|
||||||
rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error']
|
rcstrings = ['Converged', 'Maximum number of f evaluations reached', 'Error']
|
||||||
|
|
||||||
assert self.f_fp != None, "BFGS requires f_fp"
|
assert f_fp != None, "BFGS requires f_fp"
|
||||||
|
|
||||||
if self.messages:
|
if self.messages:
|
||||||
iprint = 1
|
iprint = 1
|
||||||
|
|
@ -123,10 +121,10 @@ class opt_lbfgsb(Optimizer):
|
||||||
if self.gtol is not None:
|
if self.gtol is not None:
|
||||||
opt_dict['pgtol'] = self.gtol
|
opt_dict['pgtol'] = self.gtol
|
||||||
|
|
||||||
opt_result = optimize.fmin_l_bfgs_b(self.f_fp, self.x_init, iprint = iprint,
|
opt_result = optimize.fmin_l_bfgs_b(f_fp, self.x_init, iprint = iprint,
|
||||||
maxfun = self.max_f_eval, **opt_dict)
|
maxfun = self.max_f_eval, **opt_dict)
|
||||||
self.x_opt = opt_result[0]
|
self.x_opt = opt_result[0]
|
||||||
self.f_opt = self.f_fp(self.x_opt)[0]
|
self.f_opt = f_fp(self.x_opt)[0]
|
||||||
self.funct_eval = opt_result[2]['funcalls']
|
self.funct_eval = opt_result[2]['funcalls']
|
||||||
self.status = rcstrings[opt_result[2]['warnflag']]
|
self.status = rcstrings[opt_result[2]['warnflag']]
|
||||||
|
|
||||||
|
|
@ -135,7 +133,7 @@ class opt_simplex(Optimizer):
|
||||||
Optimizer.__init__(self, *args, **kwargs)
|
Optimizer.__init__(self, *args, **kwargs)
|
||||||
self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
|
self.opt_name = "Nelder-Mead simplex routine (via Scipy)"
|
||||||
|
|
||||||
def opt(self):
|
def opt(self, f_fp = None, f = None, fp = None):
|
||||||
"""
|
"""
|
||||||
The simplex optimizer does not require gradients.
|
The simplex optimizer does not require gradients.
|
||||||
"""
|
"""
|
||||||
|
|
@ -150,7 +148,7 @@ class opt_simplex(Optimizer):
|
||||||
if self.gtol is not None:
|
if self.gtol is not None:
|
||||||
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
|
print "WARNING: simplex doesn't have an gtol arg, so I'm going to ignore it"
|
||||||
|
|
||||||
opt_result = optimize.fmin(self.f, self.x_init, (), disp = self.messages,
|
opt_result = optimize.fmin(f, self.x_init, (), disp = self.messages,
|
||||||
maxfun = self.max_f_eval, full_output=True, **opt_dict)
|
maxfun = self.max_f_eval, full_output=True, **opt_dict)
|
||||||
|
|
||||||
self.x_opt = opt_result[0]
|
self.x_opt = opt_result[0]
|
||||||
|
|
|
||||||
|
|
@ -2,5 +2,5 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD
|
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy
|
||||||
from kern import kern
|
from kern import kern
|
||||||
|
|
|
||||||
|
|
@ -21,6 +21,7 @@ from Brownian import Brownian as Brownianpart
|
||||||
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
||||||
#using meta-classes to make the objects construct properly wthout them.
|
#using meta-classes to make the objects construct properly wthout them.
|
||||||
|
|
||||||
|
|
||||||
def rbf(D,variance=1., lengthscale=1.):
|
def rbf(D,variance=1., lengthscale=1.):
|
||||||
"""
|
"""
|
||||||
Construct an RBF kernel
|
Construct an RBF kernel
|
||||||
|
|
@ -170,3 +171,20 @@ def Brownian(D,variance=1.):
|
||||||
"""
|
"""
|
||||||
part = Brownianpart(D,variance)
|
part = Brownianpart(D,variance)
|
||||||
return kern(D, [part])
|
return kern(D, [part])
|
||||||
|
|
||||||
|
import sympy as sp
|
||||||
|
from sympykern import spkern
|
||||||
|
from sympy.parsing.sympy_parser import parse_expr
|
||||||
|
|
||||||
|
def rbf_sympy(D,variance=1., lengthscale=1.):
|
||||||
|
"""
|
||||||
|
Radial Basis Function covariance.
|
||||||
|
"""
|
||||||
|
X = [sp.var('x%i'%i) for i in range(D)]
|
||||||
|
Z = [sp.var('z%i'%i) for i in range(D)]
|
||||||
|
rbf_variance = sp.var('rbf_variance',positive=True)
|
||||||
|
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
||||||
|
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
|
||||||
|
dist = parse_expr(dist_string)
|
||||||
|
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
||||||
|
return kern(D,[spkern(D,f,np.array([variance,lengthscale]))])
|
||||||
|
|
|
||||||
10
GPy/kern/sympy_helpers.cpp
Normal file
10
GPy/kern/sympy_helpers.cpp
Normal file
|
|
@ -0,0 +1,10 @@
|
||||||
|
#include <math.h>
|
||||||
|
double DiracDelta(double x){
|
||||||
|
if((x<0.000001) & (x>-0.000001))//go on, laught at my c++ skills
|
||||||
|
return 1.0;
|
||||||
|
else
|
||||||
|
return 0.0;
|
||||||
|
};
|
||||||
|
double DiracDelta(double x,int foo){
|
||||||
|
return 0.0;
|
||||||
|
};
|
||||||
3
GPy/kern/sympy_helpers.h
Normal file
3
GPy/kern/sympy_helpers.h
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
#include <math.h>
|
||||||
|
double DiracDelta(double x);
|
||||||
|
double DiracDelta(double x, int foo);
|
||||||
286
GPy/kern/sympykern.py
Normal file
286
GPy/kern/sympykern.py
Normal file
|
|
@ -0,0 +1,286 @@
|
||||||
|
import numpy as np
|
||||||
|
import sympy as sp
|
||||||
|
from sympy.utilities.codegen import codegen
|
||||||
|
from sympy.core.cache import clear_cache
|
||||||
|
from scipy import weave
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
||||||
|
import tempfile
|
||||||
|
import pdb
|
||||||
|
from kernpart import kernpart
|
||||||
|
|
||||||
|
class spkern(kernpart):
|
||||||
|
"""
|
||||||
|
A kernel object, where all the hard work in done by sympy.
|
||||||
|
|
||||||
|
:param k: the covariance function
|
||||||
|
:type k: a positive definite sympy function of x1, z1, x2, z2...
|
||||||
|
|
||||||
|
To construct a new sympy kernel, you'll need to define:
|
||||||
|
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||||
|
- that's it! we'll extract the variables from the function k.
|
||||||
|
|
||||||
|
Note:
|
||||||
|
- to handle multiple inputs, call them x1, z1, etc
|
||||||
|
- to handle multpile correlated outputs, you'll need to define each covariance function and 'cross' variance function. TODO
|
||||||
|
"""
|
||||||
|
def __init__(self,D,k,param=None):
|
||||||
|
self.name='sympykern'
|
||||||
|
self._sp_k = k
|
||||||
|
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||||
|
self._sp_x= sorted([e for e in sp_vars if e.name[0]=='x'],key=lambda x:int(x.name[1:]))
|
||||||
|
self._sp_z= sorted([e for e in sp_vars if e.name[0]=='z'],key=lambda z:int(z.name[1:]))
|
||||||
|
assert all([x.name=='x%i'%i for i,x in enumerate(self._sp_x)])
|
||||||
|
assert all([z.name=='z%i'%i for i,z in enumerate(self._sp_z)])
|
||||||
|
assert len(self._sp_x)==len(self._sp_z)
|
||||||
|
self.D = len(self._sp_x)
|
||||||
|
assert self.D == D
|
||||||
|
self._sp_theta = sorted([e for e in sp_vars if not (e.name[0]=='x' or e.name[0]=='z')],key=lambda e:e.name)
|
||||||
|
self.Nparam = len(self._sp_theta)
|
||||||
|
|
||||||
|
#deal with param
|
||||||
|
if param is None:
|
||||||
|
param = np.ones(self.Nparam)
|
||||||
|
assert param.size==self.Nparam
|
||||||
|
self.set_param(param)
|
||||||
|
|
||||||
|
#Differentiate!
|
||||||
|
self._sp_dk_dtheta = [sp.diff(k,theta) for theta in self._sp_theta]
|
||||||
|
self._sp_dk_dx = [sp.diff(k,xi) for xi in self._sp_x]
|
||||||
|
#self._sp_dk_dz = [sp.diff(k,zi) for zi in self._sp_z]
|
||||||
|
|
||||||
|
#self.compute_psi_stats()
|
||||||
|
self._gen_code()
|
||||||
|
|
||||||
|
self.weave_kwargs = {\
|
||||||
|
'support_code':self._function_code,\
|
||||||
|
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'kern/')],\
|
||||||
|
'headers':['"sympy_helpers.h"'],\
|
||||||
|
'sources':[os.path.join(current_dir,"kern/sympy_helpers.cpp")],\
|
||||||
|
#'extra_compile_args':['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5'],\
|
||||||
|
'extra_compile_args':[],\
|
||||||
|
'extra_link_args':['-lgomp'],\
|
||||||
|
'verbose':True}
|
||||||
|
|
||||||
|
def __add__(self,other):
|
||||||
|
return spkern(self._sp_k+other._sp_k)
|
||||||
|
|
||||||
|
def compute_psi_stats(self):
|
||||||
|
#define some normal distributions
|
||||||
|
mus = [sp.var('mu%i'%i,real=True) for i in range(self.D)]
|
||||||
|
Ss = [sp.var('S%i'%i,positive=True) for i in range(self.D)]
|
||||||
|
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
||||||
|
|
||||||
|
#do some integration!
|
||||||
|
#self._sp_psi0 = ??
|
||||||
|
self._sp_psi1 = self._sp_k
|
||||||
|
for i in range(self.D):
|
||||||
|
print 'perfoming integrals %i of %i'%(i+1,2*self.D)
|
||||||
|
sys.stdout.flush()
|
||||||
|
self._sp_psi1 *= normals[i]
|
||||||
|
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
|
||||||
|
clear_cache()
|
||||||
|
self._sp_psi1 = self._sp_psi1.simplify()
|
||||||
|
|
||||||
|
#and here's psi2 (eek!)
|
||||||
|
zprime = [sp.Symbol('zp%i'%i) for i in range(self.D)]
|
||||||
|
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
|
||||||
|
for i in range(self.D):
|
||||||
|
print 'perfoming integrals %i of %i'%(self.D+i+1,2*self.D)
|
||||||
|
sys.stdout.flush()
|
||||||
|
self._sp_psi2 *= normals[i]
|
||||||
|
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
|
||||||
|
clear_cache()
|
||||||
|
self._sp_psi2 = self._sp_psi2.simplify()
|
||||||
|
|
||||||
|
|
||||||
|
def _gen_code(self):
|
||||||
|
#generate c functions from sympy objects
|
||||||
|
(foo_c,self._function_code),(foo_h,self._function_header) = \
|
||||||
|
codegen([('k',self._sp_k)] \
|
||||||
|
+ [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]\
|
||||||
|
#+ [('dk_d%s'%z.name,dz) for z,dz in zip(self._sp_z,self._sp_dk_dz)]\
|
||||||
|
+ [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]\
|
||||||
|
,"C",'foobar',argument_sequence=self._sp_x+self._sp_z+self._sp_theta)
|
||||||
|
#put the header file where we can find it
|
||||||
|
f = file(os.path.join(tempfile.gettempdir(),'foobar.h'),'w')
|
||||||
|
f.write(self._function_header)
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
#get rid of derivatives of DiracDelta
|
||||||
|
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
||||||
|
|
||||||
|
#Here's some code to do the looping for K
|
||||||
|
arglist = ", ".join(["X[i*D+%s]"%x.name[1:] for x in self._sp_x]\
|
||||||
|
+ ["Z[j*D+%s]"%z.name[1:] for z in self._sp_z]\
|
||||||
|
+ ["param[%i]"%i for i in range(self.Nparam)])
|
||||||
|
|
||||||
|
self._K_code =\
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = target_array->dimensions[0];
|
||||||
|
int M = target_array->dimensions[1];
|
||||||
|
int D = X_array->dimensions[1];
|
||||||
|
#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
for (j=0;j<M;j++){
|
||||||
|
target[i*M+j] = k(%s);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
diag_arglist = re.sub('Z','X',arglist)
|
||||||
|
diag_arglist = re.sub('j','i',diag_arglist)
|
||||||
|
#Here's some code to do the looping for Kdiag
|
||||||
|
self._Kdiag_code =\
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int N = target_array->dimensions[0];
|
||||||
|
int D = X_array->dimensions[1];
|
||||||
|
#pragma omp parallel for
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
target[i] = k(%s);
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_arglist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
#here's some code to compute gradients
|
||||||
|
funclist = '\n'.join([' '*16 + 'target[%i] += partial[i*M+j]*dk_d%s(%s);'%(i,theta.name,arglist) for i,theta in enumerate(self._sp_theta)])
|
||||||
|
self._dK_dtheta_code =\
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int M = partial_array->dimensions[1];
|
||||||
|
int D = X_array->dimensions[1];
|
||||||
|
#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
for (j=0;j<M;j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
#here's some code to compute gradients for Kdiag TODO: thius is yucky.
|
||||||
|
diag_funclist = re.sub('Z','X',funclist,count=0)
|
||||||
|
diag_funclist = re.sub('j','i',diag_funclist)
|
||||||
|
diag_funclist = re.sub('partial\[i\*M\+i\]','partial[i]',diag_funclist)
|
||||||
|
self._dKdiag_dtheta_code =\
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int D = X_array->dimensions[1];
|
||||||
|
for (i=0;i<N;i++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(diag_funclist,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
#Here's some code to do gradients wrt x
|
||||||
|
gradient_funcs = "\n".join(["target[i*D+%i] += partial[i*M+j]*dk_dx%i(%s);"%(q,q,arglist) for q in range(self.D)])
|
||||||
|
self._dK_dX_code = \
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int M = partial_array->dimensions[1];
|
||||||
|
int D = X_array->dimensions[1];
|
||||||
|
#pragma omp parallel for private(j)
|
||||||
|
for (i=0;i<N; i++){
|
||||||
|
for (j=0; j<M; j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
#Here's some code to do gradients wrt z (should be the same as for X, but this is easier
|
||||||
|
gradient_funcs_Z = "\n".join(["target[j*D+%i] += partial[i*M+j]*dk_dz%i(%s);"%(q,q,arglist) for q in range(self.D)])
|
||||||
|
self._dK_dZ_code = \
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int M = partial_array->dimensions[1];
|
||||||
|
int D = X_array->dimensions[1];
|
||||||
|
for (i=0;i<N; i++){
|
||||||
|
for (j=0; j<M; j++){
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(gradient_funcs_Z,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
#now for gradients of Kdiag wrt X
|
||||||
|
self._dKdiag_dX_code= \
|
||||||
|
"""
|
||||||
|
int i;
|
||||||
|
int j;
|
||||||
|
int N = partial_array->dimensions[0];
|
||||||
|
int M = 0;
|
||||||
|
int D = X_array->dimensions[1];
|
||||||
|
for (i=0;i<N; i++){
|
||||||
|
j = i;
|
||||||
|
%s
|
||||||
|
}
|
||||||
|
%s
|
||||||
|
"""%(gradient_funcs,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||||
|
|
||||||
|
|
||||||
|
#TODO: insert multiple functions here via string manipulation
|
||||||
|
#TODO: similar functions for psi_stats
|
||||||
|
|
||||||
|
def K(self,X,Z,target):
|
||||||
|
param = self._param
|
||||||
|
weave.inline(self._K_code,arg_names=['target','X','Z','param'],**self.weave_kwargs)
|
||||||
|
return target
|
||||||
|
|
||||||
|
def Kdiag(self,X,target):
|
||||||
|
param = self._param
|
||||||
|
weave.inline(self._Kdiag_code,arg_names=['target','X','param'],**self.weave_kwargs)
|
||||||
|
return target
|
||||||
|
|
||||||
|
def dK_dtheta(self,partial,X,Z,target):
|
||||||
|
param = self._param
|
||||||
|
weave.inline(self._dK_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||||
|
return target
|
||||||
|
|
||||||
|
def dKdiag_dtheta(self,partial,X,target):
|
||||||
|
param = self._param
|
||||||
|
Z = X
|
||||||
|
weave.inline(self._dKdiag_dtheta_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||||
|
return target
|
||||||
|
|
||||||
|
def dK_dX(self,partial,X,Z,target):
|
||||||
|
target = np.zeros_like(X)
|
||||||
|
param = self._param
|
||||||
|
weave.inline(self._dK_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||||
|
return target
|
||||||
|
|
||||||
|
#def dK_dZ(self,X,Z,partial=None):
|
||||||
|
##TODO: this function might not be necessary
|
||||||
|
#target = np.zeros_like(Z)
|
||||||
|
#param = self._param
|
||||||
|
#weave.inline(self._dK_dZ_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||||
|
#return target
|
||||||
|
|
||||||
|
def dKdiag_dX(self,partial,X,target):
|
||||||
|
param = self._param
|
||||||
|
Z = X
|
||||||
|
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
|
||||||
|
return target
|
||||||
|
|
||||||
|
def set_param(self,param):
|
||||||
|
#print param.flags['C_CONTIGUOUS']
|
||||||
|
self._param = param.copy()
|
||||||
|
|
||||||
|
def get_param(self):
|
||||||
|
return self._param
|
||||||
|
|
||||||
|
def get_param_names(self):
|
||||||
|
return [x.name for x in self._sp_theta]
|
||||||
|
|
@ -22,8 +22,9 @@ class GPLVM(GP_regression):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, init='PCA', **kwargs):
|
def __init__(self, Y, Q, init='PCA', X = None, **kwargs):
|
||||||
X = self.initialise_latent(init, Q, Y)
|
if X is None:
|
||||||
|
X = self.initialise_latent(init, Q, Y)
|
||||||
GP_regression.__init__(self, X, Y, **kwargs)
|
GP_regression.__init__(self, X, Y, **kwargs)
|
||||||
|
|
||||||
def initialise_latent(self, init, Q, Y):
|
def initialise_latent(self, init, Q, Y):
|
||||||
|
|
|
||||||
42
GPy/notes.txt
Normal file
42
GPy/notes.txt
Normal file
|
|
@ -0,0 +1,42 @@
|
||||||
|
Fails in weird ways if you pass a integer as the input instead of a double to the kernel.
|
||||||
|
|
||||||
|
The Matern kernels (at least the 52) still is working in the ARD manner which means it wouldn't run for very large input dimension. Needs to be fixed to match the RBF.
|
||||||
|
|
||||||
|
Implementing new covariances is too complicated at the moment. We need a barebones example of what to implement and where. Commenting in the covariance matrices needs to be improved. It's not clear to a user what all the psi parts are for. Maybe we need a cut down and simplified example to help with this (perhaps a cut down version of the RBF?). And then we should provide a simple list of what you need to do to get a new kernel going.
|
||||||
|
|
||||||
|
Missing kernels: polynomial, rational quadratic.
|
||||||
|
|
||||||
|
Kernel implementations are far to obscure. Need to be easily readable for a first time user.
|
||||||
|
|
||||||
|
Need an implementation of scaled conjugate gradients for the optimizers.
|
||||||
|
|
||||||
|
Need an implementation of gradient descent for the optimizers (works well with GP-LVM for small random initializations)
|
||||||
|
|
||||||
|
Need Carl Rasmussen's permission to add his conjugate gradients algorithm. In fact, we can just provide a hook for it, and post a separate python implementation of his algorithm.
|
||||||
|
|
||||||
|
Change get_param and set_param to get_params and set_params
|
||||||
|
|
||||||
|
Get constrain param by default inside model creation.
|
||||||
|
|
||||||
|
Randomize doesn't seem to cover a wide enough range for restarts ... try it for a model where inputs are widely spaced apart and length scale is too short. Sampling from N(0,1) is too conservative. Dangerous for people who naively use restarts. Since we have the model we could maybe come up with some sensible heuristics for setting these things. Maybe we should also consider having '.initialize()'. If we can't do this well we should disable the restart method.
|
||||||
|
|
||||||
|
|
||||||
|
Tolerances for optimizers, do we need to introduce some standardization? At the moment does each have its own defaults?
|
||||||
|
|
||||||
|
Do all optimizers work only in terms of function evaluations? Do we need to check for one that uses iterations?
|
||||||
|
|
||||||
|
Change Youter to YYT (Youter doesn't mean anything for matrices).
|
||||||
|
|
||||||
|
Bug when running classification.crescent_data()
|
||||||
|
|
||||||
|
A dictionary for parameter storage? So we can go through names easily?
|
||||||
|
|
||||||
|
A flag on covariance functions that indicates when they are not associated with an underlying function (like white noise or a coregionalization matrix).
|
||||||
|
|
||||||
|
Diagonal noise covariance function
|
||||||
|
|
||||||
|
Long term: automatic Lagrange multiplier calculation for optimizers: constrain two parameters in an unusual way and the model automatically does the Lagrangian. Also augment the parameters with new ones, so define data variance to be white noise plus RBF variance and optimize over that and signal to noise ratio ... for example constrain the sum of variances to equal the known variance of the data.
|
||||||
|
|
||||||
|
When computing kernel.K for kernels like rbf, you can't compute a version with rbf.K(X) you have to do rbf.K(X, X)
|
||||||
|
|
||||||
|
the predict method for GP_regression returns a covariance matrix which is a bad idea as this takes a lot to compute, it's also confusing for first time users. Should only be returned if the user explicitly requests it.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue