Merge branch 'devel' of https://github.com/SheffieldML/GPy into devel

This commit is contained in:
Neil Lawrence 2013-04-29 21:45:42 +01:00
commit 71114eef8c
2 changed files with 97 additions and 94 deletions

View file

@ -12,9 +12,10 @@ before_install:
- sudo apt-get install -qq python-matplotlib
install:
- pip install --upgrade numpy==1.7.1
- pip install sphinx
- pip install nose
- pip install . --use-mirrors
# command to run tests, e.g. python setup.py test
script:
- nosetests GPy/testing
- nosetests GPy/testing

View file

@ -2,17 +2,19 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import optimize
import sys, pdb
import multiprocessing as mp
from GPy.util.misc import opt_wrapper
#import numdifftools as ndt
from parameterised import parameterised, truncate_pad
import priors
from ..util.linalg import jitchol
from ..inference import optimization
from .. import likelihoods
from ..inference import optimization
from ..util.linalg import jitchol
from GPy.util.misc import opt_wrapper
from parameterised import parameterised, truncate_pad
from scipy import optimize
import multiprocessing as mp
import numpy as np
import priors
import re
import sys
import pdb
# import numdifftools as ndt
class model(parameterised):
def __init__(self):
@ -24,14 +26,14 @@ class model(parameterised):
self.preferred_optimizer = 'tnc'
def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def _set_params(self,x):
def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the model class"
def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def set_prior(self,which,what):
def set_prior(self, which, what):
"""
Sets priors on the model parameters.
@ -52,59 +54,59 @@ class model(parameterised):
which = self.grep_param_names(which)
#check tied situation
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
# check tied situation
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
if len(tie_partial_matches):
raise ValueError, "cannot place prior across partial ties"
tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
if len(tie_matches)>1:
tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
if len(tie_matches) > 1:
raise ValueError, "cannot place prior across multiple ties"
elif len(tie_matches)==1:
which = which[:1]# just place a prior object on the first parameter
elif len(tie_matches) == 1:
which = which[:1] # just place a prior object on the first parameter
#check constraints are okay
# check constraints are okay
if isinstance(what, (priors.gamma, priors.log_Gaussian)):
assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
unconst = np.setdiff1d(which, self.constrained_positive_indices)
if len(unconst):
print "Warning: constraining parameters to be positive:"
print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
print '\n'
self.constrain_positive(unconst)
elif isinstance(what,priors.Gaussian):
assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
elif isinstance(what, priors.Gaussian):
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
else:
raise ValueError, "prior not recognised"
#store the prior in a local list
# store the prior in a local list
for w in which:
self.priors[w] = what
def get_gradient(self,name, return_names=False):
def get_gradient(self, name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._log_likelihood_gradients()[matches]
else:
raise AttributeError, "no parameter matches %s"%name
raise AttributeError, "no parameter matches %s" % name
def log_prior(self):
"""evaluate the prior"""
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
x = self._get_params()
ret = np.zeros(x.size)
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
return ret
def _transform_gradients(self, g):
@ -113,13 +115,13 @@ class model(parameterised):
"""
x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
[np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.constrained_fixed_indices):
to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
return np.delete(g,to_remove)
to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
return np.delete(g, to_remove)
else:
return g
@ -129,15 +131,15 @@ class model(parameterised):
Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1)
"""
#first take care of all parameters (from N(0,1))
# first take care of all parameters (from N(0,1))
x = self._get_params_transformed()
x = np.random.randn(x.size)
self._set_params_transformed(x)
#now draw from prior where possible
# now draw from prior where possible
x = self._get_params()
[np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
self._set_params(x)
self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
@ -171,10 +173,10 @@ class model(parameterised):
pool = mp.Pool(processes=num_processes)
for i in range(Nrestarts):
self.randomize()
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job)
pool.close() # signal that no more data coming in
pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete
except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool."
@ -190,10 +192,10 @@ class model(parameterised):
self.optimization_runs.append(jobs[i].get())
if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
except Exception as e:
if robust:
print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
else:
raise e
@ -203,22 +205,22 @@ class model(parameterised):
else:
self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self,warn=False):
def ensure_default_constraints(self, warn=False):
"""
Ensure that any variables which should clearly be positive have been constrained somehow.
"""
positive_strings = ['variance','lengthscale', 'precision']
positive_strings = ['variance', 'lengthscale', 'precision']
param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices()
to_make_positive = []
for s in positive_strings:
for i in self.grep_param_names(s):
if not (i in currently_constrained):
to_make_positive.append(param_names[i])
to_make_positive.append(re.escape(param_names[i]))
if warn:
print "Warning! constraining %s postive"%name
print "Warning! constraining %s positive" % s
if len(to_make_positive):
self.constrain_positive('('+'|'.join(to_make_positive)+')')
self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
@ -236,14 +238,14 @@ class model(parameterised):
self._set_params_transformed(x)
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients())
return - LL_gradients - prior_gradients
return -LL_gradients - prior_gradients
def objective_and_gradients(self, x):
self._set_params_transformed(x)
obj_f = -self.log_likelihood() - self.log_prior()
obj_f = -self.log_likelihood() - self.log_prior()
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients())
obj_grads = - LL_gradients - prior_gradients
obj_grads = -LL_gradients - prior_gradients
return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs):
@ -269,7 +271,7 @@ class model(parameterised):
self._set_params_transformed(opt.x_opt)
def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
sgd.run()
@ -277,8 +279,8 @@ class model(parameterised):
def Laplace_covariance(self):
"""return the covariance matric of a Laplace approximatino at the current (stationary) point"""
#TODO add in the prior contributions for MAP estimation
#TODO fix the hessian for tied, constrained and fixed components
# TODO add in the prior contributions for MAP estimation
# TODO fix the hessian for tied, constrained and fixed components
if hasattr(self, 'log_likelihood_hessian'):
A = -self.log_likelihood_hessian()
@ -292,8 +294,8 @@ class model(parameterised):
A = -h(x)
self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky
aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
A[aa,aa] = 0.
aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
A[aa, aa] = 0.
return A
def Laplace_evidence(self):
@ -304,11 +306,11 @@ class model(parameterised):
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
except:
return np.nan
return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self):
s = parameterised.__str__(self).split('\n')
#add priors to the string
# add priors to the string
strs = [str(p) if p is not None else '' for p in self.priors]
width = np.array(max([len(p) for p in strs] + [5])) + 4
@ -319,16 +321,16 @@ class model(parameterised):
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
obj_funct += '\n\n'
s[0] = obj_funct + s[0]
s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
s[1] += '-'*(width + 1)
s[0] += "|{h:^{col}}".format(h='Prior', col=width)
s[1] += '-' * (width + 1)
for p in range(2, len(strs)+2):
s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
for p in range(2, len(strs) + 2):
s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
return '\n'.join(s)
def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
"""
Check the gradient of the model by comparing to a numerical estimate.
If the verbose flag is passed, invividual components are tested (and printed)
@ -348,27 +350,27 @@ class model(parameterised):
x = self._get_params_transformed().copy()
if not verbose:
#just check the global ratio
dx = step*np.sign(np.random.uniform(-1,1,x.size))
# just check the global ratio
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
#evaulate around the point x
f1, g1 = self.objective_and_gradients(x+dx)
f2, g2 = self.objective_and_gradients(x-dx)
# evaulate around the point x
f1, g1 = self.objective_and_gradients(x + dx)
f2, g2 = self.objective_and_gradients(x - dx)
gradient = self.objective_function_gradients(x)
numerical_gradient = (f1-f2)/(2*dx)
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
return True
else:
return False
else:
#check the gradient of each parameter individually, and do some pretty printing
# check the gradient of each parameter individually, and do some pretty printing
try:
names = self._get_param_names_transformed()
except NotImplementedError:
names = ['Variable %i'%i for i in range(len(x))]
names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
@ -377,9 +379,9 @@ class model(parameterised):
cols = [max_names]
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
cols = np.array(cols) + 5
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-'*len(header_string[0])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
if target_param is None:
@ -395,11 +397,11 @@ class model(parameterised):
f2, g2 = self.objective_and_gradients(xx)
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1-f2)/(2*step)
ratio = (f1-f2)/(2*step*gradient)
difference = np.abs((f1-f2)/2/step - gradient)
numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient)
difference = np.abs((f1 - f2) / 2 / step - gradient)
if (np.abs(ratio-1)<tolerance):
if (np.abs(ratio - 1) < tolerance):
formatted_name = "\033[92m {0} \033[0m".format(names[i])
else:
formatted_name = "\033[91m {0} \033[0m".format(names[i])
@ -407,7 +409,7 @@ class model(parameterised):
d = '%.6f' % float(difference)
g = '%.6f' % gradient
ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string
def input_sensitivity(self):
@ -418,21 +420,21 @@ class model(parameterised):
TODO: proper sensitivity analysis
"""
if not hasattr(self,'kern'):
if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel"
k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
if (not len(k)==1) or (not k[0].ARD):
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
if (not len(k) == 1) or (not k[0].ARD):
raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0]
if k.name=='rbf':
if k.name == 'rbf':
return k.lengthscale
elif k.name=='linear':
return 1./k.variances
elif k.name == 'linear':
return 1. / k.variances
def pseudo_EM(self,epsilon=.1,**kwargs):
def pseudo_EM(self, epsilon=.1, **kwargs):
"""
TODO: Should this not bein the GP class?
EM - like algorithm for Expectation Propagation and Laplace approximation
@ -446,7 +448,7 @@ class model(parameterised):
:type optimzer: string TODO: valid strings?
"""
assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
ll_change = epsilon + 1.
iteration = 0
last_ll = -np.exp(1000)
@ -466,9 +468,9 @@ class model(parameterised):
ll_change = new_ll - last_ll
if ll_change < 0:
self.likelihood = last_approximation #restore previous likelihood approximation
self._set_params(last_params) #restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True
else:
self.optimize(**kwargs)
@ -477,5 +479,5 @@ class model(parameterised):
stop = True
iteration += 1
if stop:
print "%s iterations." %iteration
print "%s iterations." % iteration