merge devel into mrd

This commit is contained in:
Max Zwiessele 2013-04-24 10:11:14 +01:00
commit 2096d062cb
4 changed files with 242 additions and 116 deletions

View file

@ -7,12 +7,13 @@ from scipy import optimize
import sys, pdb import sys, pdb
import multiprocessing as mp import multiprocessing as mp
from GPy.util.misc import opt_wrapper from GPy.util.misc import opt_wrapper
#import numdifftools as ndt # import numdifftools as ndt
from parameterised import parameterised, truncate_pad from parameterised import parameterised, truncate_pad
import priors import priors
from ..util.linalg import jitchol from ..util.linalg import jitchol
from ..inference import optimization from ..inference import optimization
from .. import likelihoods from .. import likelihoods
import re
class model(parameterised): class model(parameterised):
def __init__(self): def __init__(self):
@ -24,14 +25,14 @@ class model(parameterised):
self.preferred_optimizer = 'tnc' self.preferred_optimizer = 'tnc'
def _get_params(self): def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def _set_params(self,x): def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self): def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to use the model class" raise NotImplementedError, "this needs to be implemented to use the model class"
def set_prior(self,which,what): def set_prior(self, which, what):
""" """
Sets priors on the model parameters. Sets priors on the model parameters.
@ -52,38 +53,44 @@ class model(parameterised):
which = self.grep_param_names(which) which = self.grep_param_names(which)
#check tied situation # check tied situation
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))] tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
if len(tie_partial_matches): if len(tie_partial_matches):
raise ValueError, "cannot place prior across partial ties" raise ValueError, "cannot place prior across partial ties"
tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ] tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
if len(tie_matches)>1: if len(tie_matches) > 1:
raise ValueError, "cannot place prior across multiple ties" raise ValueError, "cannot place prior across multiple ties"
elif len(tie_matches)==1: elif len(tie_matches) == 1:
which = which[:1]# just place a prior object on the first parameter which = which[:1] # just place a prior object on the first parameter
#check constraints are okay # check constraints are okay
if isinstance(what, (priors.gamma, priors.log_Gaussian)): if isinstance(what, (priors.gamma, priors.log_Gaussian)):
assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible" assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible" assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
unconst = np.setdiff1d(which, self.constrained_positive_indices) unconst = np.setdiff1d(which, self.constrained_positive_indices)
if len(unconst): if len(unconst):
print "Warning: constraining parameters to be positive:" print "Warning: constraining parameters to be positive:"
print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst]) print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
print '\n' print '\n'
self.constrain_positive(unconst) self.constrain_positive(unconst)
elif isinstance(what,priors.Gaussian): elif isinstance(what, priors.Gaussian):
assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible" assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
else: else:
raise ValueError, "prior not recognised" raise ValueError, "prior not recognised"
#store the prior in a local list # store the prior in a local list
for w in which: for w in which:
self.priors[w] = what self.priors[w] = what
def get(self,name, return_names=False): def __getitem__(self, name):
return self.get(name)
def __setitem(self, name, val):
return self.set(name, val)
def get(self, name, return_names=False):
""" """
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
""" """
@ -94,9 +101,9 @@ class model(parameterised):
else: else:
return self._get_params()[matches] return self._get_params()[matches]
else: else:
raise AttributeError, "no parameter matches %s"%name raise AttributeError, "no parameter matches %s" % name
def set(self,name,val): def set(self, name, val):
""" """
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value. Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
""" """
@ -106,30 +113,30 @@ class model(parameterised):
x[matches] = val x[matches] = val
self._set_params(x) self._set_params(x)
else: else:
raise AttributeError, "no parameter matches %s"%name raise AttributeError, "no parameter matches %s" % name
def get_gradient(self,name, return_names=False): def get_gradient(self, name, return_names=False):
""" """
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned. Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
""" """
matches = self.grep_param_names(name) matches = self.grep_param_names(name)
if len(matches): if len(matches):
if return_names: if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist() return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else: else:
return self._log_likelihood_gradients()[matches] return self._log_likelihood_gradients()[matches]
else: else:
raise AttributeError, "no parameter matches %s"%name raise AttributeError, "no parameter matches %s" % name
def log_prior(self): def log_prior(self):
"""evaluate the prior""" """evaluate the prior"""
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None]) return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
def _log_prior_gradients(self): def _log_prior_gradients(self):
"""evaluate the gradients of the priors""" """evaluate the gradients of the priors"""
x = self._get_params() x = self._get_params()
ret = np.zeros(x.size) ret = np.zeros(x.size)
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None] [np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
return ret return ret
def _transform_gradients(self, g): def _transform_gradients(self, g):
@ -138,13 +145,13 @@ class model(parameterised):
""" """
x = self._get_params() x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices] g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices] g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)] [np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]] [np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.constrained_fixed_indices): if len(self.tied_indices) or len(self.constrained_fixed_indices):
to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices])) to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
return np.delete(g,to_remove) return np.delete(g, to_remove)
else: else:
return g return g
@ -154,15 +161,15 @@ class model(parameterised):
Randomize the model. Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1) Make this draw from the prior if one exists, else draw from N(0,1)
""" """
#first take care of all parameters (from N(0,1)) # first take care of all parameters (from N(0,1))
x = self._get_params_transformed() x = self._get_params_transformed()
x = np.random.randn(x.size) x = np.random.randn(x.size)
self._set_params_transformed(x) self._set_params_transformed(x)
#now draw from prior where possible # now draw from prior where possible
x = self._get_params() x = self._get_params()
[np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None] [np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
self._set_params(x) self._set_params(x)
self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...) self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs): def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
@ -196,10 +203,10 @@ class model(parameterised):
pool = mp.Pool(processes=num_processes) pool = mp.Pool(processes=num_processes)
for i in range(Nrestarts): for i in range(Nrestarts):
self.randomize() self.randomize()
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs) job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job) jobs.append(job)
pool.close() # signal that no more data coming in pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete pool.join() # wait for all the tasks to complete
except KeyboardInterrupt: except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool." print "Ctrl+c received, terminating and joining pool."
@ -215,10 +222,10 @@ class model(parameterised):
self.optimization_runs.append(jobs[i].get()) self.optimization_runs.append(jobs[i].get())
if verbose: if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt)) print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
except Exception as e: except Exception as e:
if robust: if robust:
print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts)) print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
else: else:
raise e raise e
@ -228,22 +235,22 @@ class model(parameterised):
else: else:
self._set_params_transformed(initial_parameters) self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self,warn=False): def ensure_default_constraints(self, warn=False):
""" """
Ensure that any variables which should clearly be positive have been constrained somehow. Ensure that any variables which should clearly be positive have been constrained somehow.
""" """
positive_strings = ['variance','lengthscale', 'precision'] positive_strings = ['variance', 'lengthscale', 'precision']
param_names = self._get_param_names() param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices() currently_constrained = self.all_constrained_indices()
to_make_positive = [] to_make_positive = []
for s in positive_strings: for s in positive_strings:
for i in self.grep_param_names(s): for i in self.grep_param_names(s):
if not (i in currently_constrained): if not (i in currently_constrained):
to_make_positive.append(param_names[i]) to_make_positive.append(re.escape(param_names[i]))
if warn: if warn:
print "Warning! constraining %s postive"%name print "Warning! constraining %s postive" % name
if len(to_make_positive): if len(to_make_positive):
self.constrain_positive('('+'|'.join(to_make_positive)+')') self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
@ -261,14 +268,14 @@ class model(parameterised):
self._set_params_transformed(x) self._set_params_transformed(x)
LL_gradients = self._transform_gradients(self._log_likelihood_gradients()) LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients()) prior_gradients = self._transform_gradients(self._log_prior_gradients())
return - LL_gradients - prior_gradients return -LL_gradients - prior_gradients
def objective_and_gradients(self, x): def objective_and_gradients(self, x):
self._set_params_transformed(x) self._set_params_transformed(x)
obj_f = -self.log_likelihood() - self.log_prior() obj_f = -self.log_likelihood() - self.log_prior()
LL_gradients = self._transform_gradients(self._log_likelihood_gradients()) LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients()) prior_gradients = self._transform_gradients(self._log_prior_gradients())
obj_grads = - LL_gradients - prior_gradients obj_grads = -LL_gradients - prior_gradients
return obj_f, obj_grads return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs): def optimize(self, optimizer=None, start=None, **kwargs):
@ -288,13 +295,13 @@ class model(parameterised):
start = self._get_params_transformed() start = self._get_params_transformed()
optimizer = optimization.get_optimizer(optimizer) optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model = self, **kwargs) opt = optimizer(start, model=self, **kwargs)
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients) opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
self.optimization_runs.append(opt) self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt) self._set_params_transformed(opt.x_opt)
def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs): def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1" # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs) sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
sgd.run() sgd.run()
@ -302,8 +309,8 @@ class model(parameterised):
def Laplace_covariance(self): def Laplace_covariance(self):
"""return the covariance matric of a Laplace approximatino at the current (stationary) point""" """return the covariance matric of a Laplace approximatino at the current (stationary) point"""
#TODO add in the prior contributions for MAP estimation # TODO add in the prior contributions for MAP estimation
#TODO fix the hessian for tied, constrained and fixed components # TODO fix the hessian for tied, constrained and fixed components
if hasattr(self, 'log_likelihood_hessian'): if hasattr(self, 'log_likelihood_hessian'):
A = -self.log_likelihood_hessian() A = -self.log_likelihood_hessian()
@ -317,8 +324,8 @@ class model(parameterised):
A = -h(x) A = -h(x)
self._set_params(x) self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky # check for almost zero components on the diagonal which screw up the cholesky
aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0] aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
A[aa,aa] = 0. A[aa, aa] = 0.
return A return A
def Laplace_evidence(self): def Laplace_evidence(self):
@ -329,11 +336,11 @@ class model(parameterised):
hld = np.sum(np.log(np.diag(jitchol(A)[0]))) hld = np.sum(np.log(np.diag(jitchol(A)[0])))
except: except:
return np.nan return np.nan
return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self): def __str__(self):
s = parameterised.__str__(self).split('\n') s = parameterised.__str__(self).split('\n')
#add priors to the string # add priors to the string
strs = [str(p) if p is not None else '' for p in self.priors] strs = [str(p) if p is not None else '' for p in self.priors]
width = np.array(max([len(p) for p in strs] + [5])) + 4 width = np.array(max([len(p) for p in strs] + [5])) + 4
@ -344,16 +351,16 @@ class model(parameterised):
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior) obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
obj_funct += '\n\n' obj_funct += '\n\n'
s[0] = obj_funct + s[0] s[0] = obj_funct + s[0]
s[0] += "|{h:^{col}}".format(h = 'Prior', col = width) s[0] += "|{h:^{col}}".format(h='Prior', col=width)
s[1] += '-'*(width + 1) s[1] += '-' * (width + 1)
for p in range(2, len(strs)+2): for p in range(2, len(strs) + 2):
s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width) s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
return '\n'.join(s) return '\n'.join(s)
def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3): def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
""" """
Check the gradient of the model by comparing to a numerical estimate. Check the gradient of the model by comparing to a numerical estimate.
If the verbose flag is passed, invividual components are tested (and printed) If the verbose flag is passed, invividual components are tested (and printed)
@ -373,27 +380,27 @@ class model(parameterised):
x = self._get_params_transformed().copy() x = self._get_params_transformed().copy()
if not verbose: if not verbose:
#just check the global ratio # just check the global ratio
dx = step*np.sign(np.random.uniform(-1,1,x.size)) dx = step * np.sign(np.random.uniform(-1, 1, x.size))
#evaulate around the point x # evaulate around the point x
f1, g1 = self.objective_and_gradients(x+dx) f1, g1 = self.objective_and_gradients(x + dx)
f2, g2 = self.objective_and_gradients(x-dx) f2, g2 = self.objective_and_gradients(x - dx)
gradient = self.objective_function_gradients(x) gradient = self.objective_function_gradients(x)
numerical_gradient = (f1-f2)/(2*dx) numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio): if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
return True return True
else: else:
return False return False
else: else:
#check the gradient of each parameter individually, and do some pretty printing # check the gradient of each parameter individually, and do some pretty printing
try: try:
names = self._get_param_names_transformed() names = self._get_param_names_transformed()
except NotImplementedError: except NotImplementedError:
names = ['Variable %i'%i for i in range(len(x))] names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing # Prepare for pretty-printing
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical'] header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
@ -402,9 +409,9 @@ class model(parameterised):
cols = [max_names] cols = [max_names]
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))]) cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
cols = np.array(cols) + 5 cols = np.array(cols) + 5
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))] header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string]) header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-'*len(header_string[0]) separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator]) print '\n'.join([header_string[0], separator])
if target_param is None: if target_param is None:
@ -420,11 +427,11 @@ class model(parameterised):
f2, g2 = self.objective_and_gradients(xx) f2, g2 = self.objective_and_gradients(xx)
gradient = self.objective_function_gradients(x)[i] gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1-f2)/(2*step) numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1-f2)/(2*step*gradient) ratio = (f1 - f2) / (2 * step * gradient)
difference = np.abs((f1-f2)/2/step - gradient) difference = np.abs((f1 - f2) / 2 / step - gradient)
if (np.abs(ratio-1)<tolerance): if (np.abs(ratio - 1) < tolerance):
formatted_name = "\033[92m {0} \033[0m".format(names[i]) formatted_name = "\033[92m {0} \033[0m".format(names[i])
else: else:
formatted_name = "\033[91m {0} \033[0m".format(names[i]) formatted_name = "\033[91m {0} \033[0m".format(names[i])
@ -432,7 +439,7 @@ class model(parameterised):
d = '%.6f' % float(difference) d = '%.6f' % float(difference)
g = '%.6f' % gradient g = '%.6f' % gradient
ng = '%.6f' % float(numerical_gradient) ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4]) grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string print grad_string
def input_sensitivity(self): def input_sensitivity(self):
@ -443,21 +450,21 @@ class model(parameterised):
TODO: proper sensitivity analysis TODO: proper sensitivity analysis
""" """
if not hasattr(self,'kern'): if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel" raise ValueError, "this model has no kernel"
k = [p for p in self.kern.parts if p.name in ['rbf','linear']] k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
if (not len(k)==1) or (not k[0].ARD): if (not len(k) == 1) or (not k[0].ARD):
raise ValueError, "cannot determine sensitivity for this kernel" raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0] k = k[0]
if k.name=='rbf': if k.name == 'rbf':
return k.lengthscale return k.lengthscale
elif k.name=='linear': elif k.name == 'linear':
return 1./k.variances return 1. / k.variances
def pseudo_EM(self,epsilon=.1,**kwargs): def pseudo_EM(self, epsilon=.1, **kwargs):
""" """
TODO: Should this not bein the GP class? TODO: Should this not bein the GP class?
EM - like algorithm for Expectation Propagation and Laplace approximation EM - like algorithm for Expectation Propagation and Laplace approximation
@ -471,7 +478,7 @@ class model(parameterised):
:type optimzer: string TODO: valid strings? :type optimzer: string TODO: valid strings?
""" """
assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods" assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
ll_change = epsilon + 1. ll_change = epsilon + 1.
iteration = 0 iteration = 0
last_ll = -np.exp(1000) last_ll = -np.exp(1000)
@ -491,9 +498,9 @@ class model(parameterised):
ll_change = new_ll - last_ll ll_change = new_ll - last_ll
if ll_change < 0: if ll_change < 0:
self.likelihood = last_approximation #restore previous likelihood approximation self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) #restore model parameters self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True stop = True
else: else:
self.optimize(**kwargs) self.optimize(**kwargs)
@ -502,5 +509,5 @@ class model(parameterised):
stop = True stop = True
iteration += 1 iteration += 1
if stop: if stop:
print "%s iterations." %iteration print "%s iterations." % iteration

View file

@ -2,5 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:
pass
from kern import kern from kern import kern

View file

@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart from rational_quadratic import rational_quadratic as rational_quadraticpart
from rbfcos import rbfcos as rbfcospart from rbfcos import rbfcos as rbfcospart
from independent_outputs import independent_outputs as independent_output_part
#TODO these s=constructors are not as clean as we'd like. Tidy the code up #TODO these s=constructors are not as clean as we'd like. Tidy the code up
#using meta-classes to make the objects construct properly wthout them. #using meta-classes to make the objects construct properly wthout them.
@ -165,34 +166,40 @@ def Brownian(D,variance=1.):
part = Brownianpart(D,variance) part = Brownianpart(D,variance)
return kern(D, [part]) return kern(D, [part])
import sympy as sp try:
from sympykern import spkern import sympy as sp
from sympy.parsing.sympy_parser import parse_expr from sympykern import spkern
from sympy.parsing.sympy_parser import parse_expr
sympy_available = True
except ImportError:
sympy_available = False
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.): if sympy_available:
""" def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
Radial Basis Function covariance. """
""" Radial Basis Function covariance.
X = [sp.var('x%i'%i) for i in range(D)] """
Z = [sp.var('z%i'%i) for i in range(D)] X = [sp.var('x%i'%i) for i in range(D)]
rbf_variance = sp.var('rbf_variance',positive=True) Z = [sp.var('z%i'%i) for i in range(D)]
if ARD: rbf_variance = sp.var('rbf_variance',positive=True)
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)] if ARD:
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)]) rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
dist = parse_expr(dist_string) dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
f = rbf_variance*sp.exp(-dist/2.) dist = parse_expr(dist_string)
else: f = rbf_variance*sp.exp(-dist/2.)
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True) else:
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)]) rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
dist = parse_expr(dist_string) dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2)) dist = parse_expr(dist_string)
return kern(D,[spkern(D,f)]) f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
return kern(D,[spkern(D,f)])
def sympykern(D,k): def sympykern(D,k):
""" """
A kernel from a symbolic sympy representation A kernel from a symbolic sympy representation
""" """
return kern(D,[spkern(D,k)]) return kern(D,[spkern(D,k)])
del sympy_available
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi): def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
""" """
@ -318,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
""" """
part = rbfcospart(D,variance,frequencies,bandwidths,ARD) part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
return kern(D,[part]) return kern(D,[part])
def independent_outputs(k):
"""
Construct a kernel with independent outputs from an existing kernel
"""
for sl in k.input_slices:
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
parts = [independent_output_part(p) for p in k.parts]
return kern(k.D+1,parts)

View file

@ -0,0 +1,97 @@
# Copyright (c) 2012, James Hesnsman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
#contruct the return structure
ind = np.asarray(index,dtype=np.int64)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class independent_outputs(kernpart):
"""
A kernel part shich can reopresent several independent functions.
this kernel 'switches off' parts of the matrix where the output indexes are different.
The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the kernel for computation (in blocks).
"""
def __init__(self,k):
self.D = k.D + 1
self.Nparam = k.Nparam
self.name = 'iops('+ k.name + ')'
self.k = k
def _get_params(self):
return self.k._get_params()
def _set_params(self,x):
self.k._set_params(x)
self.params = x
def _get_param_names(self):
return self.k._get_param_names()
def K(self,X,X2,target):
#Sort out the slices from the input data
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def Kdiag(self,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dK_dX(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dKdiag_dX(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
def dKdiag_dtheta(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]