mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 12:32:40 +02:00
merge devel into mrd
This commit is contained in:
commit
2096d062cb
4 changed files with 242 additions and 116 deletions
|
|
@ -7,12 +7,13 @@ from scipy import optimize
|
||||||
import sys, pdb
|
import sys, pdb
|
||||||
import multiprocessing as mp
|
import multiprocessing as mp
|
||||||
from GPy.util.misc import opt_wrapper
|
from GPy.util.misc import opt_wrapper
|
||||||
#import numdifftools as ndt
|
# import numdifftools as ndt
|
||||||
from parameterised import parameterised, truncate_pad
|
from parameterised import parameterised, truncate_pad
|
||||||
import priors
|
import priors
|
||||||
from ..util.linalg import jitchol
|
from ..util.linalg import jitchol
|
||||||
from ..inference import optimization
|
from ..inference import optimization
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
|
import re
|
||||||
|
|
||||||
class model(parameterised):
|
class model(parameterised):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
@ -24,14 +25,14 @@ class model(parameterised):
|
||||||
self.preferred_optimizer = 'tnc'
|
self.preferred_optimizer = 'tnc'
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
def _set_params(self,x):
|
def _set_params(self, x):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
|
|
||||||
def set_prior(self,which,what):
|
def set_prior(self, which, what):
|
||||||
"""
|
"""
|
||||||
Sets priors on the model parameters.
|
Sets priors on the model parameters.
|
||||||
|
|
||||||
|
|
@ -52,38 +53,44 @@ class model(parameterised):
|
||||||
|
|
||||||
which = self.grep_param_names(which)
|
which = self.grep_param_names(which)
|
||||||
|
|
||||||
#check tied situation
|
# check tied situation
|
||||||
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
|
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
|
||||||
if len(tie_partial_matches):
|
if len(tie_partial_matches):
|
||||||
raise ValueError, "cannot place prior across partial ties"
|
raise ValueError, "cannot place prior across partial ties"
|
||||||
tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
|
tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
|
||||||
if len(tie_matches)>1:
|
if len(tie_matches) > 1:
|
||||||
raise ValueError, "cannot place prior across multiple ties"
|
raise ValueError, "cannot place prior across multiple ties"
|
||||||
elif len(tie_matches)==1:
|
elif len(tie_matches) == 1:
|
||||||
which = which[:1]# just place a prior object on the first parameter
|
which = which[:1] # just place a prior object on the first parameter
|
||||||
|
|
||||||
|
|
||||||
#check constraints are okay
|
# check constraints are okay
|
||||||
if isinstance(what, (priors.gamma, priors.log_Gaussian)):
|
if isinstance(what, (priors.gamma, priors.log_Gaussian)):
|
||||||
assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
|
assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
|
||||||
assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
|
assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
|
||||||
unconst = np.setdiff1d(which, self.constrained_positive_indices)
|
unconst = np.setdiff1d(which, self.constrained_positive_indices)
|
||||||
if len(unconst):
|
if len(unconst):
|
||||||
print "Warning: constraining parameters to be positive:"
|
print "Warning: constraining parameters to be positive:"
|
||||||
print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
|
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
|
||||||
print '\n'
|
print '\n'
|
||||||
self.constrain_positive(unconst)
|
self.constrain_positive(unconst)
|
||||||
elif isinstance(what,priors.Gaussian):
|
elif isinstance(what, priors.Gaussian):
|
||||||
assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
|
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
|
||||||
else:
|
else:
|
||||||
raise ValueError, "prior not recognised"
|
raise ValueError, "prior not recognised"
|
||||||
|
|
||||||
|
|
||||||
#store the prior in a local list
|
# store the prior in a local list
|
||||||
for w in which:
|
for w in which:
|
||||||
self.priors[w] = what
|
self.priors[w] = what
|
||||||
|
|
||||||
def get(self,name, return_names=False):
|
def __getitem__(self, name):
|
||||||
|
return self.get(name)
|
||||||
|
|
||||||
|
def __setitem(self, name, val):
|
||||||
|
return self.set(name, val)
|
||||||
|
|
||||||
|
def get(self, name, return_names=False):
|
||||||
"""
|
"""
|
||||||
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||||
"""
|
"""
|
||||||
|
|
@ -94,9 +101,9 @@ class model(parameterised):
|
||||||
else:
|
else:
|
||||||
return self._get_params()[matches]
|
return self._get_params()[matches]
|
||||||
else:
|
else:
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
def set(self,name,val):
|
def set(self, name, val):
|
||||||
"""
|
"""
|
||||||
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
|
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
|
||||||
"""
|
"""
|
||||||
|
|
@ -106,30 +113,30 @@ class model(parameterised):
|
||||||
x[matches] = val
|
x[matches] = val
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
else:
|
else:
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
def get_gradient(self,name, return_names=False):
|
def get_gradient(self, name, return_names=False):
|
||||||
"""
|
"""
|
||||||
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||||
"""
|
"""
|
||||||
matches = self.grep_param_names(name)
|
matches = self.grep_param_names(name)
|
||||||
if len(matches):
|
if len(matches):
|
||||||
if return_names:
|
if return_names:
|
||||||
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
||||||
else:
|
else:
|
||||||
return self._log_likelihood_gradients()[matches]
|
return self._log_likelihood_gradients()[matches]
|
||||||
else:
|
else:
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
def log_prior(self):
|
def log_prior(self):
|
||||||
"""evaluate the prior"""
|
"""evaluate the prior"""
|
||||||
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
|
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
|
||||||
|
|
||||||
def _log_prior_gradients(self):
|
def _log_prior_gradients(self):
|
||||||
"""evaluate the gradients of the priors"""
|
"""evaluate the gradients of the priors"""
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
ret = np.zeros(x.size)
|
ret = np.zeros(x.size)
|
||||||
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
|
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _transform_gradients(self, g):
|
def _transform_gradients(self, g):
|
||||||
|
|
@ -138,13 +145,13 @@ class model(parameterised):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
|
g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
|
||||||
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
|
g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
|
||||||
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
|
[np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
|
||||||
[np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
|
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
||||||
if len(self.tied_indices) or len(self.constrained_fixed_indices):
|
if len(self.tied_indices) or len(self.constrained_fixed_indices):
|
||||||
to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
|
to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
|
||||||
return np.delete(g,to_remove)
|
return np.delete(g, to_remove)
|
||||||
else:
|
else:
|
||||||
return g
|
return g
|
||||||
|
|
||||||
|
|
@ -154,15 +161,15 @@ class model(parameterised):
|
||||||
Randomize the model.
|
Randomize the model.
|
||||||
Make this draw from the prior if one exists, else draw from N(0,1)
|
Make this draw from the prior if one exists, else draw from N(0,1)
|
||||||
"""
|
"""
|
||||||
#first take care of all parameters (from N(0,1))
|
# first take care of all parameters (from N(0,1))
|
||||||
x = self._get_params_transformed()
|
x = self._get_params_transformed()
|
||||||
x = np.random.randn(x.size)
|
x = np.random.randn(x.size)
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
#now draw from prior where possible
|
# now draw from prior where possible
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
[np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
|
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||||
|
|
||||||
|
|
||||||
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
||||||
|
|
@ -196,10 +203,10 @@ class model(parameterised):
|
||||||
pool = mp.Pool(processes=num_processes)
|
pool = mp.Pool(processes=num_processes)
|
||||||
for i in range(Nrestarts):
|
for i in range(Nrestarts):
|
||||||
self.randomize()
|
self.randomize()
|
||||||
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
|
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
|
||||||
jobs.append(job)
|
jobs.append(job)
|
||||||
|
|
||||||
pool.close() # signal that no more data coming in
|
pool.close() # signal that no more data coming in
|
||||||
pool.join() # wait for all the tasks to complete
|
pool.join() # wait for all the tasks to complete
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print "Ctrl+c received, terminating and joining pool."
|
print "Ctrl+c received, terminating and joining pool."
|
||||||
|
|
@ -215,10 +222,10 @@ class model(parameterised):
|
||||||
self.optimization_runs.append(jobs[i].get())
|
self.optimization_runs.append(jobs[i].get())
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
|
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if robust:
|
if robust:
|
||||||
print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
|
print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
@ -228,22 +235,22 @@ class model(parameterised):
|
||||||
else:
|
else:
|
||||||
self._set_params_transformed(initial_parameters)
|
self._set_params_transformed(initial_parameters)
|
||||||
|
|
||||||
def ensure_default_constraints(self,warn=False):
|
def ensure_default_constraints(self, warn=False):
|
||||||
"""
|
"""
|
||||||
Ensure that any variables which should clearly be positive have been constrained somehow.
|
Ensure that any variables which should clearly be positive have been constrained somehow.
|
||||||
"""
|
"""
|
||||||
positive_strings = ['variance','lengthscale', 'precision']
|
positive_strings = ['variance', 'lengthscale', 'precision']
|
||||||
param_names = self._get_param_names()
|
param_names = self._get_param_names()
|
||||||
currently_constrained = self.all_constrained_indices()
|
currently_constrained = self.all_constrained_indices()
|
||||||
to_make_positive = []
|
to_make_positive = []
|
||||||
for s in positive_strings:
|
for s in positive_strings:
|
||||||
for i in self.grep_param_names(s):
|
for i in self.grep_param_names(s):
|
||||||
if not (i in currently_constrained):
|
if not (i in currently_constrained):
|
||||||
to_make_positive.append(param_names[i])
|
to_make_positive.append(re.escape(param_names[i]))
|
||||||
if warn:
|
if warn:
|
||||||
print "Warning! constraining %s postive"%name
|
print "Warning! constraining %s postive" % name
|
||||||
if len(to_make_positive):
|
if len(to_make_positive):
|
||||||
self.constrain_positive('('+'|'.join(to_make_positive)+')')
|
self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -261,14 +268,14 @@ class model(parameterised):
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
||||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
||||||
return - LL_gradients - prior_gradients
|
return -LL_gradients - prior_gradients
|
||||||
|
|
||||||
def objective_and_gradients(self, x):
|
def objective_and_gradients(self, x):
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
obj_f = -self.log_likelihood() - self.log_prior()
|
obj_f = -self.log_likelihood() - self.log_prior()
|
||||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
||||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
||||||
obj_grads = - LL_gradients - prior_gradients
|
obj_grads = -LL_gradients - prior_gradients
|
||||||
return obj_f, obj_grads
|
return obj_f, obj_grads
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
|
|
@ -288,13 +295,13 @@ class model(parameterised):
|
||||||
start = self._get_params_transformed()
|
start = self._get_params_transformed()
|
||||||
|
|
||||||
optimizer = optimization.get_optimizer(optimizer)
|
optimizer = optimization.get_optimizer(optimizer)
|
||||||
opt = optimizer(start, model = self, **kwargs)
|
opt = optimizer(start, model=self, **kwargs)
|
||||||
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
||||||
self.optimization_runs.append(opt)
|
self.optimization_runs.append(opt)
|
||||||
|
|
||||||
self._set_params_transformed(opt.x_opt)
|
self._set_params_transformed(opt.x_opt)
|
||||||
|
|
||||||
def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
|
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
|
||||||
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
||||||
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
|
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
|
||||||
sgd.run()
|
sgd.run()
|
||||||
|
|
@ -302,8 +309,8 @@ class model(parameterised):
|
||||||
|
|
||||||
def Laplace_covariance(self):
|
def Laplace_covariance(self):
|
||||||
"""return the covariance matric of a Laplace approximatino at the current (stationary) point"""
|
"""return the covariance matric of a Laplace approximatino at the current (stationary) point"""
|
||||||
#TODO add in the prior contributions for MAP estimation
|
# TODO add in the prior contributions for MAP estimation
|
||||||
#TODO fix the hessian for tied, constrained and fixed components
|
# TODO fix the hessian for tied, constrained and fixed components
|
||||||
if hasattr(self, 'log_likelihood_hessian'):
|
if hasattr(self, 'log_likelihood_hessian'):
|
||||||
A = -self.log_likelihood_hessian()
|
A = -self.log_likelihood_hessian()
|
||||||
|
|
||||||
|
|
@ -317,8 +324,8 @@ class model(parameterised):
|
||||||
A = -h(x)
|
A = -h(x)
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
# check for almost zero components on the diagonal which screw up the cholesky
|
# check for almost zero components on the diagonal which screw up the cholesky
|
||||||
aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
|
aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
|
||||||
A[aa,aa] = 0.
|
A[aa, aa] = 0.
|
||||||
return A
|
return A
|
||||||
|
|
||||||
def Laplace_evidence(self):
|
def Laplace_evidence(self):
|
||||||
|
|
@ -329,11 +336,11 @@ class model(parameterised):
|
||||||
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
|
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
|
||||||
except:
|
except:
|
||||||
return np.nan
|
return np.nan
|
||||||
return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
|
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
s = parameterised.__str__(self).split('\n')
|
s = parameterised.__str__(self).split('\n')
|
||||||
#add priors to the string
|
# add priors to the string
|
||||||
strs = [str(p) if p is not None else '' for p in self.priors]
|
strs = [str(p) if p is not None else '' for p in self.priors]
|
||||||
width = np.array(max([len(p) for p in strs] + [5])) + 4
|
width = np.array(max([len(p) for p in strs] + [5])) + 4
|
||||||
|
|
||||||
|
|
@ -344,16 +351,16 @@ class model(parameterised):
|
||||||
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
|
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
|
||||||
obj_funct += '\n\n'
|
obj_funct += '\n\n'
|
||||||
s[0] = obj_funct + s[0]
|
s[0] = obj_funct + s[0]
|
||||||
s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
|
s[0] += "|{h:^{col}}".format(h='Prior', col=width)
|
||||||
s[1] += '-'*(width + 1)
|
s[1] += '-' * (width + 1)
|
||||||
|
|
||||||
for p in range(2, len(strs)+2):
|
for p in range(2, len(strs) + 2):
|
||||||
s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
|
s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
|
||||||
|
|
||||||
return '\n'.join(s)
|
return '\n'.join(s)
|
||||||
|
|
||||||
|
|
||||||
def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
|
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
|
||||||
"""
|
"""
|
||||||
Check the gradient of the model by comparing to a numerical estimate.
|
Check the gradient of the model by comparing to a numerical estimate.
|
||||||
If the verbose flag is passed, invividual components are tested (and printed)
|
If the verbose flag is passed, invividual components are tested (and printed)
|
||||||
|
|
@ -373,27 +380,27 @@ class model(parameterised):
|
||||||
x = self._get_params_transformed().copy()
|
x = self._get_params_transformed().copy()
|
||||||
|
|
||||||
if not verbose:
|
if not verbose:
|
||||||
#just check the global ratio
|
# just check the global ratio
|
||||||
dx = step*np.sign(np.random.uniform(-1,1,x.size))
|
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
|
||||||
|
|
||||||
#evaulate around the point x
|
# evaulate around the point x
|
||||||
f1, g1 = self.objective_and_gradients(x+dx)
|
f1, g1 = self.objective_and_gradients(x + dx)
|
||||||
f2, g2 = self.objective_and_gradients(x-dx)
|
f2, g2 = self.objective_and_gradients(x - dx)
|
||||||
gradient = self.objective_function_gradients(x)
|
gradient = self.objective_function_gradients(x)
|
||||||
|
|
||||||
numerical_gradient = (f1-f2)/(2*dx)
|
numerical_gradient = (f1 - f2) / (2 * dx)
|
||||||
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
|
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
|
||||||
|
|
||||||
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
|
if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
|
||||||
return True
|
return True
|
||||||
else:
|
else:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
#check the gradient of each parameter individually, and do some pretty printing
|
# check the gradient of each parameter individually, and do some pretty printing
|
||||||
try:
|
try:
|
||||||
names = self._get_param_names_transformed()
|
names = self._get_param_names_transformed()
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
names = ['Variable %i'%i for i in range(len(x))]
|
names = ['Variable %i' % i for i in range(len(x))]
|
||||||
|
|
||||||
# Prepare for pretty-printing
|
# Prepare for pretty-printing
|
||||||
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
|
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
|
||||||
|
|
@ -402,9 +409,9 @@ class model(parameterised):
|
||||||
cols = [max_names]
|
cols = [max_names]
|
||||||
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
||||||
cols = np.array(cols) + 5
|
cols = np.array(cols) + 5
|
||||||
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
|
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
|
||||||
header_string = map(lambda x: '|'.join(x), [header_string])
|
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||||
separator = '-'*len(header_string[0])
|
separator = '-' * len(header_string[0])
|
||||||
print '\n'.join([header_string[0], separator])
|
print '\n'.join([header_string[0], separator])
|
||||||
|
|
||||||
if target_param is None:
|
if target_param is None:
|
||||||
|
|
@ -420,11 +427,11 @@ class model(parameterised):
|
||||||
f2, g2 = self.objective_and_gradients(xx)
|
f2, g2 = self.objective_and_gradients(xx)
|
||||||
gradient = self.objective_function_gradients(x)[i]
|
gradient = self.objective_function_gradients(x)[i]
|
||||||
|
|
||||||
numerical_gradient = (f1-f2)/(2*step)
|
numerical_gradient = (f1 - f2) / (2 * step)
|
||||||
ratio = (f1-f2)/(2*step*gradient)
|
ratio = (f1 - f2) / (2 * step * gradient)
|
||||||
difference = np.abs((f1-f2)/2/step - gradient)
|
difference = np.abs((f1 - f2) / 2 / step - gradient)
|
||||||
|
|
||||||
if (np.abs(ratio-1)<tolerance):
|
if (np.abs(ratio - 1) < tolerance):
|
||||||
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
||||||
else:
|
else:
|
||||||
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
||||||
|
|
@ -432,7 +439,7 @@ class model(parameterised):
|
||||||
d = '%.6f' % float(difference)
|
d = '%.6f' % float(difference)
|
||||||
g = '%.6f' % gradient
|
g = '%.6f' % gradient
|
||||||
ng = '%.6f' % float(numerical_gradient)
|
ng = '%.6f' % float(numerical_gradient)
|
||||||
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
|
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
||||||
print grad_string
|
print grad_string
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
|
|
@ -443,21 +450,21 @@ class model(parameterised):
|
||||||
TODO: proper sensitivity analysis
|
TODO: proper sensitivity analysis
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if not hasattr(self,'kern'):
|
if not hasattr(self, 'kern'):
|
||||||
raise ValueError, "this model has no kernel"
|
raise ValueError, "this model has no kernel"
|
||||||
|
|
||||||
k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
|
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
|
||||||
if (not len(k)==1) or (not k[0].ARD):
|
if (not len(k) == 1) or (not k[0].ARD):
|
||||||
raise ValueError, "cannot determine sensitivity for this kernel"
|
raise ValueError, "cannot determine sensitivity for this kernel"
|
||||||
k = k[0]
|
k = k[0]
|
||||||
|
|
||||||
if k.name=='rbf':
|
if k.name == 'rbf':
|
||||||
return k.lengthscale
|
return k.lengthscale
|
||||||
elif k.name=='linear':
|
elif k.name == 'linear':
|
||||||
return 1./k.variances
|
return 1. / k.variances
|
||||||
|
|
||||||
|
|
||||||
def pseudo_EM(self,epsilon=.1,**kwargs):
|
def pseudo_EM(self, epsilon=.1, **kwargs):
|
||||||
"""
|
"""
|
||||||
TODO: Should this not bein the GP class?
|
TODO: Should this not bein the GP class?
|
||||||
EM - like algorithm for Expectation Propagation and Laplace approximation
|
EM - like algorithm for Expectation Propagation and Laplace approximation
|
||||||
|
|
@ -471,7 +478,7 @@ class model(parameterised):
|
||||||
:type optimzer: string TODO: valid strings?
|
:type optimzer: string TODO: valid strings?
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
|
assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
|
||||||
ll_change = epsilon + 1.
|
ll_change = epsilon + 1.
|
||||||
iteration = 0
|
iteration = 0
|
||||||
last_ll = -np.exp(1000)
|
last_ll = -np.exp(1000)
|
||||||
|
|
@ -491,9 +498,9 @@ class model(parameterised):
|
||||||
ll_change = new_ll - last_ll
|
ll_change = new_ll - last_ll
|
||||||
|
|
||||||
if ll_change < 0:
|
if ll_change < 0:
|
||||||
self.likelihood = last_approximation #restore previous likelihood approximation
|
self.likelihood = last_approximation # restore previous likelihood approximation
|
||||||
self._set_params(last_params) #restore model parameters
|
self._set_params(last_params) # restore model parameters
|
||||||
print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
|
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
|
||||||
stop = True
|
stop = True
|
||||||
else:
|
else:
|
||||||
self.optimize(**kwargs)
|
self.optimize(**kwargs)
|
||||||
|
|
@ -502,5 +509,5 @@ class model(parameterised):
|
||||||
stop = True
|
stop = True
|
||||||
iteration += 1
|
iteration += 1
|
||||||
if stop:
|
if stop:
|
||||||
print "%s iterations." %iteration
|
print "%s iterations." % iteration
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2,5 +2,9 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
|
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
|
||||||
|
try:
|
||||||
|
from constructors import rbf_sympy, sympykern # these depend on sympy
|
||||||
|
except:
|
||||||
|
pass
|
||||||
from kern import kern
|
from kern import kern
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
|
||||||
from coregionalise import coregionalise as coregionalise_part
|
from coregionalise import coregionalise as coregionalise_part
|
||||||
from rational_quadratic import rational_quadratic as rational_quadraticpart
|
from rational_quadratic import rational_quadratic as rational_quadraticpart
|
||||||
from rbfcos import rbfcos as rbfcospart
|
from rbfcos import rbfcos as rbfcospart
|
||||||
|
from independent_outputs import independent_outputs as independent_output_part
|
||||||
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
||||||
#using meta-classes to make the objects construct properly wthout them.
|
#using meta-classes to make the objects construct properly wthout them.
|
||||||
|
|
||||||
|
|
@ -165,34 +166,40 @@ def Brownian(D,variance=1.):
|
||||||
part = Brownianpart(D,variance)
|
part = Brownianpart(D,variance)
|
||||||
return kern(D, [part])
|
return kern(D, [part])
|
||||||
|
|
||||||
import sympy as sp
|
try:
|
||||||
from sympykern import spkern
|
import sympy as sp
|
||||||
from sympy.parsing.sympy_parser import parse_expr
|
from sympykern import spkern
|
||||||
|
from sympy.parsing.sympy_parser import parse_expr
|
||||||
|
sympy_available = True
|
||||||
|
except ImportError:
|
||||||
|
sympy_available = False
|
||||||
|
|
||||||
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
|
if sympy_available:
|
||||||
"""
|
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
|
||||||
Radial Basis Function covariance.
|
"""
|
||||||
"""
|
Radial Basis Function covariance.
|
||||||
X = [sp.var('x%i'%i) for i in range(D)]
|
"""
|
||||||
Z = [sp.var('z%i'%i) for i in range(D)]
|
X = [sp.var('x%i'%i) for i in range(D)]
|
||||||
rbf_variance = sp.var('rbf_variance',positive=True)
|
Z = [sp.var('z%i'%i) for i in range(D)]
|
||||||
if ARD:
|
rbf_variance = sp.var('rbf_variance',positive=True)
|
||||||
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
|
if ARD:
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
|
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
|
||||||
dist = parse_expr(dist_string)
|
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
|
||||||
f = rbf_variance*sp.exp(-dist/2.)
|
dist = parse_expr(dist_string)
|
||||||
else:
|
f = rbf_variance*sp.exp(-dist/2.)
|
||||||
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
else:
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
|
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
||||||
dist = parse_expr(dist_string)
|
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
|
||||||
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
dist = parse_expr(dist_string)
|
||||||
return kern(D,[spkern(D,f)])
|
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
||||||
|
return kern(D,[spkern(D,f)])
|
||||||
|
|
||||||
def sympykern(D,k):
|
def sympykern(D,k):
|
||||||
"""
|
"""
|
||||||
A kernel from a symbolic sympy representation
|
A kernel from a symbolic sympy representation
|
||||||
"""
|
"""
|
||||||
return kern(D,[spkern(D,k)])
|
return kern(D,[spkern(D,k)])
|
||||||
|
del sympy_available
|
||||||
|
|
||||||
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
|
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
|
||||||
"""
|
"""
|
||||||
|
|
@ -318,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
|
||||||
"""
|
"""
|
||||||
part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
|
part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
|
||||||
return kern(D,[part])
|
return kern(D,[part])
|
||||||
|
|
||||||
|
def independent_outputs(k):
|
||||||
|
"""
|
||||||
|
Construct a kernel with independent outputs from an existing kernel
|
||||||
|
"""
|
||||||
|
for sl in k.input_slices:
|
||||||
|
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
||||||
|
parts = [independent_output_part(p) for p in k.parts]
|
||||||
|
return kern(k.D+1,parts)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
97
GPy/kern/independent_outputs.py
Normal file
97
GPy/kern/independent_outputs.py
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
# Copyright (c) 2012, James Hesnsman
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
from kernpart import kernpart
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def index_to_slices(index):
|
||||||
|
"""
|
||||||
|
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||||
|
|
||||||
|
e.g.
|
||||||
|
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
||||||
|
returns
|
||||||
|
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
|
||||||
|
|
||||||
|
or, a more complicated example
|
||||||
|
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
|
||||||
|
returns
|
||||||
|
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
#contruct the return structure
|
||||||
|
ind = np.asarray(index,dtype=np.int64)
|
||||||
|
ret = [[] for i in range(ind.max()+1)]
|
||||||
|
|
||||||
|
#find the switchpoints
|
||||||
|
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
|
||||||
|
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
|
||||||
|
|
||||||
|
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
||||||
|
return ret
|
||||||
|
|
||||||
|
class independent_outputs(kernpart):
|
||||||
|
"""
|
||||||
|
A kernel part shich can reopresent several independent functions.
|
||||||
|
this kernel 'switches off' parts of the matrix where the output indexes are different.
|
||||||
|
|
||||||
|
The index of the functions is given by the last column in the input X
|
||||||
|
the rest of the columns of X are passed to the kernel for computation (in blocks).
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self,k):
|
||||||
|
self.D = k.D + 1
|
||||||
|
self.Nparam = k.Nparam
|
||||||
|
self.name = 'iops('+ k.name + ')'
|
||||||
|
self.k = k
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return self.k._get_params()
|
||||||
|
|
||||||
|
def _set_params(self,x):
|
||||||
|
self.k._set_params(x)
|
||||||
|
self.params = x
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return self.k._get_param_names()
|
||||||
|
|
||||||
|
def K(self,X,X2,target):
|
||||||
|
#Sort out the slices from the input data
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
|
||||||
|
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
def Kdiag(self,X,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||||
|
|
||||||
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
|
||||||
|
def dK_dX(self,dL_dK,X,X2,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
|
||||||
|
|
||||||
|
|
||||||
|
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
|
||||||
Loading…
Add table
Add a link
Reference in a new issue