merged master

This commit is contained in:
Nicolo Fusi 2013-01-31 09:57:40 +00:00
commit 49d2e4e4f6
62 changed files with 2469 additions and 1230 deletions

11
.travis.yml Normal file
View file

@ -0,0 +1,11 @@
language: python
python:
- "2.7"
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
install:
- sudo apt-get install python-scipy
- pip install sphinx
- pip install . --use-mirrors
# command to run tests, e.g. python setup.py test
script:
- nosetests --with-xcoverage --with-xunit --cover-package=GPy --cover-erase GPy/testing

View file

@ -6,5 +6,6 @@ import kern
import models import models
import inference import inference
import util import util
import examples
#import examples TODO: discuss! #import examples TODO: discuss!
from core import priors from core import priors

View file

@ -14,18 +14,18 @@ from ..inference import optimization
class model(parameterised): class model(parameterised):
def __init__(self): def __init__(self):
parameterised.__init__(self) parameterised.__init__(self)
self.priors = [None for i in range(self.get_param().size)] self.priors = [None for i in range(self._get_params().size)]
self.optimization_runs = [] self.optimization_runs = []
self.sampling_runs = [] self.sampling_runs = []
self.set_param(self.get_param()) self._set_params(self._get_params())
self.preferred_optimizer = 'tnc' self.preferred_optimizer = 'tnc'
def get_param(self): def _get_params(self):
raise NotImplementedError, "this needs to be implemented to utilise the model class" raise NotImplementedError, "this needs to be implemented to utilise the model class"
def set_param(self,x): def _set_params(self,x):
raise NotImplementedError, "this needs to be implemented to utilise the model class" raise NotImplementedError, "this needs to be implemented to utilise the model class"
def log_likelihood(self): def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to utilise the model class" raise NotImplementedError, "this needs to be implemented to utilise the model class"
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to utilise the model class" raise NotImplementedError, "this needs to be implemented to utilise the model class"
def set_prior(self,which,what): def set_prior(self,which,what):
@ -67,7 +67,7 @@ class model(parameterised):
unconst = np.setdiff1d(which, self.constrained_positive_indices) unconst = np.setdiff1d(which, self.constrained_positive_indices)
if len(unconst): if len(unconst):
print "Warning: constraining parameters to be positive:" print "Warning: constraining parameters to be positive:"
print '\n'.join([n for i,n in enumerate(self.get_param_names()) if i in unconst]) print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
print '\n' print '\n'
self.constrain_positive(unconst) self.constrain_positive(unconst)
elif isinstance(what,priors.Gaussian): elif isinstance(what,priors.Gaussian):
@ -80,48 +80,65 @@ class model(parameterised):
for w in which: for w in which:
self.priors[w] = what self.priors[w] = what
def get(self,name): def get(self,name, return_names=False):
""" """
get a model parameter by name Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
""" """
matches = self.grep_param_names(name) matches = self.grep_param_names(name)
if len(matches): if len(matches):
return self.get_param()[matches] if return_names:
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._get_params()[matches]
else: else:
raise AttributeError, "no parameter matches %s"%name raise AttributeError, "no parameter matches %s"%name
def set(self,name,val): def set(self,name,val):
""" """
Set a model parameter by name Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
""" """
matches = self.grep_param_names(name) matches = self.grep_param_names(name)
if len(matches): if len(matches):
x = self.get_param() x = self._get_params()
x[matches] = val x[matches] = val
self.set_param(x) self._set_params(x)
else:
raise AttributeError, "no parameter matches %s"%name
def get_gradient(self,name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._log_likelihood_gradients()[matches]
else: else:
raise AttributeError, "no parameter matches %s"%name raise AttributeError, "no parameter matches %s"%name
def log_prior(self): def log_prior(self):
"""evaluate the prior""" """evaluate the prior"""
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self.get_param()) if p is not None]) return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
def log_prior_gradients(self): def _log_prior_gradients(self):
"""evaluate the gradients of the priors""" """evaluate the gradients of the priors"""
x = self.get_param() x = self._get_params()
ret = np.zeros(x.size) ret = np.zeros(x.size)
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None] [np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
return ret return ret
def extract_gradients(self): def _log_likelihood_gradients_transformed(self):
""" """
Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model. Use self.log_likelihood_gradients and self.prior_gradients to get the gradients of the model.
Adjust the gradient for constraints and ties, return. Adjust the gradient for constraints and ties, return.
""" """
g = self.log_likelihood_gradients() + self.log_prior_gradients() g = self._log_likelihood_gradients() + self._log_prior_gradients()
x = self.get_param() x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices] g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices] g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)] [np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
@ -138,14 +155,14 @@ class model(parameterised):
Make this draw from the prior if one exists, else draw from N(0,1) Make this draw from the prior if one exists, else draw from N(0,1)
""" """
#first take care of all parameters (from N(0,1)) #first take care of all parameters (from N(0,1))
x = self.extract_param() x = self._get_params_transformed()
x = np.random.randn(x.size) x = np.random.randn(x.size)
self.expand_param(x) self._set_params_transformed(x)
#now draw from prior where possible #now draw from prior where possible
x = self.get_param() x = self._get_params()
[np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None] [np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
self.set_param(x) self._set_params(x)
self.expand_param(self.extract_param())#makes sure all of the tied parameters get the same init (since there's only one prior object...) self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, **kwargs): def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, **kwargs):
@ -165,7 +182,7 @@ class model(parameterised):
:verbose: whether to show informations about the current restart :verbose: whether to show informations about the current restart
""" """
initial_parameters = self.extract_param() initial_parameters = self._get_params_transformed()
for i in range(Nrestarts): for i in range(Nrestarts):
try: try:
self.randomize() self.randomize()
@ -182,9 +199,9 @@ class model(parameterised):
raise e raise e
if len(self.optimization_runs): if len(self.optimization_runs):
i = np.argmin([o.f_opt for o in self.optimization_runs]) i = np.argmin([o.f_opt for o in self.optimization_runs])
self.expand_param(self.optimization_runs[i].x_opt) self._set_params_transformed(self.optimization_runs[i].x_opt)
else: else:
self.expand_param(initial_parameters) self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self,warn=False): def ensure_default_constraints(self,warn=False):
""" """
@ -194,7 +211,7 @@ class model(parameterised):
for s in positive_strings: for s in positive_strings:
for i in self.grep_param_names(s): for i in self.grep_param_names(s):
if not (i in self.all_constrained_indices()): if not (i in self.all_constrained_indices()):
name = self.get_param_names()[i] name = self._get_param_names()[i]
self.constrain_positive(name) self.constrain_positive(name)
if warn: if warn:
print "Warning! constraining %s postive"%name print "Warning! constraining %s postive"%name
@ -214,24 +231,24 @@ class model(parameterised):
optimizer = self.preferred_optimizer optimizer = self.preferred_optimizer
def f(x): def f(x):
self.expand_param(x) self._set_params_transformed(x)
return -self.log_likelihood()-self.log_prior() return -self.log_likelihood()-self.log_prior()
def fp(x): def fp(x):
self.expand_param(x) self._set_params_transformed(x)
return -self.extract_gradients() return -self._log_likelihood_gradients_transformed()
def f_fp(x): def f_fp(x):
self.expand_param(x) self._set_params_transformed(x)
return -self.log_likelihood()-self.log_prior(),-self.extract_gradients() return -self.log_likelihood()-self.log_prior(),-self._log_likelihood_gradients_transformed()
if start == None: if start == None:
start = self.extract_param() start = self._get_params_transformed()
optimizer = optimization.get_optimizer(optimizer) optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model = self, **kwargs) opt = optimizer(start, model = self, **kwargs)
opt.run(f_fp=f_fp, f=f, fp=fp) opt.run(f_fp=f_fp, f=f, fp=fp)
self.optimization_runs.append(opt) self.optimization_runs.append(opt)
self.expand_param(opt.x_opt) self._set_params_transformed(opt.x_opt)
def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs): def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1" # assert self.Y.shape[1] > 1, "SGD only works with D > 1"
@ -248,13 +265,13 @@ class model(parameterised):
else: else:
print "numerically calculating hessian. please be patient!" print "numerically calculating hessian. please be patient!"
x = self.get_param() x = self._get_params()
def f(x): def f(x):
self.set_param(x) self._set_params(x)
return self.log_likelihood() return self.log_likelihood()
h = ndt.Hessian(f) h = ndt.Hessian(f)
A = -h(x) A = -h(x)
self.set_param(x) self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky # check for almost zero components on the diagonal which screw up the cholesky
aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0] aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
A[aa,aa] = 0. A[aa,aa] = 0.
@ -268,7 +285,7 @@ class model(parameterised):
hld = np.sum(np.log(np.diag(jitchol(A)[0]))) hld = np.sum(np.log(np.diag(jitchol(A)[0])))
except: except:
return np.nan return np.nan
return 0.5*self.get_param().size*np.log(2*np.pi) + self.log_likelihood() - hld return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
def __str__(self): def __str__(self):
s = parameterised.__str__(self).split('\n') s = parameterised.__str__(self).split('\n')
@ -292,18 +309,18 @@ class model(parameterised):
If the overall gradient fails, invividual components are tested. If the overall gradient fails, invividual components are tested.
""" """
x = self.extract_param().copy() x = self._get_params_transformed().copy()
#choose a random direction to step in: #choose a random direction to step in:
dx = step*np.sign(np.random.uniform(-1,1,x.size)) dx = step*np.sign(np.random.uniform(-1,1,x.size))
#evaulate around the point x #evaulate around the point x
self.expand_param(x+dx) self._set_params_transformed(x+dx)
f1,g1 = self.log_likelihood() + self.log_prior(), self.extract_gradients() f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
self.expand_param(x-dx) self._set_params_transformed(x-dx)
f2,g2 = self.log_likelihood() + self.log_prior(), self.extract_gradients() f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()
self.expand_param(x) self._set_params_transformed(x)
gradient = self.extract_gradients() gradient = self._log_likelihood_gradients_transformed()
numerical_gradient = (f1-f2)/(2*dx) numerical_gradient = (f1-f2)/(2*dx)
global_ratio = (f1-f2)/(2*np.dot(dx,gradient)) global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
@ -319,7 +336,7 @@ class model(parameterised):
print "Global check failed. Testing individual gradients\n" print "Global check failed. Testing individual gradients\n"
try: try:
names = self.extract_param_names() names = self._get_param_names_transformed()
except NotImplementedError: except NotImplementedError:
names = ['Variable %i'%i for i in range(len(x))] names = ['Variable %i'%i for i in range(len(x))]
@ -338,13 +355,13 @@ class model(parameterised):
for i in range(len(x)): for i in range(len(x)):
xx = x.copy() xx = x.copy()
xx[i] += step xx[i] += step
self.expand_param(xx) self._set_params_transformed(xx)
f1,g1 = self.log_likelihood() + self.log_prior(), self.extract_gradients()[i] f1,g1 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
xx[i] -= 2.*step xx[i] -= 2.*step
self.expand_param(xx) self._set_params_transformed(xx)
f2,g2 = self.log_likelihood() + self.log_prior(), self.extract_gradients()[i] f2,g2 = self.log_likelihood() + self.log_prior(), self._log_likelihood_gradients_transformed()[i]
self.expand_param(x) self._set_params_transformed(x)
gradient = self.extract_gradients()[i] gradient = self._log_likelihood_gradients_transformed()[i]
numerical_gradient = (f1-f2)/(2*step) numerical_gradient = (f1-f2)/(2*step)

View file

@ -66,7 +66,7 @@ class parameterised(object):
if hasattr(self,'prior'): if hasattr(self,'prior'):
pass pass
self.expand_param(self.extract_param())# sets tied parameters to single value self._set_params_transformed(self._get_params_transformed())# sets tied parameters to single value
def untie_everything(self): def untie_everything(self):
"""Unties all parameters by setting tied_indices to an empty list.""" """Unties all parameters by setting tied_indices to an empty list."""
@ -87,7 +87,7 @@ class parameterised(object):
Returns Returns
------- -------
the indices of self.get_param_names which match the regular expression. the indices of self._get_param_names which match the regular expression.
Notes Notes
----- -----
@ -96,9 +96,9 @@ class parameterised(object):
if type(expr) is str: if type(expr) is str:
expr = re.compile(expr) expr = re.compile(expr)
return np.nonzero([expr.search(name) for name in self.get_param_names()])[0] return np.nonzero([expr.search(name) for name in self._get_param_names()])[0]
elif type(expr) is re._pattern_type: elif type(expr) is re._pattern_type:
return np.nonzero([expr.search(name) for name in self.get_param_names()])[0] return np.nonzero([expr.search(name) for name in self._get_param_names()])[0]
else: else:
return expr return expr
@ -115,11 +115,11 @@ class parameterised(object):
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained" assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
self.constrained_positive_indices = np.hstack((self.constrained_positive_indices, matches)) self.constrained_positive_indices = np.hstack((self.constrained_positive_indices, matches))
#check to ensure constraint is in place #check to ensure constraint is in place
x = self.get_param() x = self._get_params()
for i,xx in enumerate(x): for i,xx in enumerate(x):
if (xx<0) & (i in matches): if (xx<0) & (i in matches):
x[i] = -xx x[i] = -xx
self.set_param(x) self._set_params(x)
def unconstrain(self,which): def unconstrain(self,which):
@ -163,11 +163,11 @@ class parameterised(object):
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained" assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
self.constrained_negative_indices = np.hstack((self.constrained_negative_indices, matches)) self.constrained_negative_indices = np.hstack((self.constrained_negative_indices, matches))
#check to ensure constraint is in place #check to ensure constraint is in place
x = self.get_param() x = self._get_params()
for i,xx in enumerate(x): for i,xx in enumerate(x):
if (xx>0.) and (i in matches): if (xx>0.) and (i in matches):
x[i] = -xx x[i] = -xx
self.set_param(x) self._set_params(x)
@ -187,11 +187,11 @@ class parameterised(object):
self.constrained_bounded_uppers.append(upper) self.constrained_bounded_uppers.append(upper)
self.constrained_bounded_lowers.append(lower) self.constrained_bounded_lowers.append(lower)
#check to ensure constraint is in place #check to ensure constraint is in place
x = self.get_param() x = self._get_params()
for i,xx in enumerate(x): for i,xx in enumerate(x):
if ((xx<=lower)|(xx>=upper)) & (i in matches): if ((xx<=lower)|(xx>=upper)) & (i in matches):
x[i] = sigmoid(xx)*(upper-lower) + lower x[i] = sigmoid(xx)*(upper-lower) + lower
self.set_param(x) self._set_params(x)
def constrain_fixed(self, which, value = None): def constrain_fixed(self, which, value = None):
@ -213,14 +213,14 @@ class parameterised(object):
if value != None: if value != None:
self.constrained_fixed_values.append(value) self.constrained_fixed_values.append(value)
else: else:
self.constrained_fixed_values.append(self.get_param()[self.constrained_fixed_indices[-1]]) self.constrained_fixed_values.append(self._get_params()[self.constrained_fixed_indices[-1]])
#self.constrained_fixed_values.append(value) #self.constrained_fixed_values.append(value)
self.expand_param(self.extract_param()) self._set_params_transformed(self._get_params_transformed())
def extract_param(self): def _get_params_transformed(self):
"""use self.get_param to get the 'true' parameters of the model, which are then tied, constrained and fixed""" """use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
x = self.get_param() x = self._get_params()
x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices]) x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices])
x[self.constrained_negative_indices] = np.log(-x[self.constrained_negative_indices]) x[self.constrained_negative_indices] = np.log(-x[self.constrained_negative_indices])
[np.put(x,i,np.log(np.clip(x[i]-l,1e-10,np.inf)/np.clip(h-x[i],1e-10,np.inf))) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)] [np.put(x,i,np.log(np.clip(x[i]-l,1e-10,np.inf)/np.clip(h-x[i],1e-10,np.inf))) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
@ -232,8 +232,8 @@ class parameterised(object):
return x return x
def expand_param(self,x): def _set_params_transformed(self,x):
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self.set_param""" """ takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
#work out how many places are fixed, and where they are. tricky logic! #work out how many places are fixed, and where they are. tricky logic!
Nfix_places = 0. Nfix_places = 0.
@ -257,14 +257,14 @@ class parameterised(object):
xx[self.constrained_positive_indices] = np.exp(xx[self.constrained_positive_indices]) xx[self.constrained_positive_indices] = np.exp(xx[self.constrained_positive_indices])
xx[self.constrained_negative_indices] = -np.exp(xx[self.constrained_negative_indices]) xx[self.constrained_negative_indices] = -np.exp(xx[self.constrained_negative_indices])
[np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)] [np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
self.set_param(xx) self._set_params(xx)
def extract_param_names(self): def _get_param_names_transformed(self):
""" """
Returns the parameter names as propagated after constraining, Returns the parameter names as propagated after constraining,
tying or fixing, i.e. a list of the same length as extract_param() tying or fixing, i.e. a list of the same length as _get_params_transformed()
""" """
n = self.get_param_names() n = self._get_param_names()
#remove/concatenate the tied parameter names #remove/concatenate the tied parameter names
if len(self.tied_indices): if len(self.tied_indices):
@ -294,13 +294,13 @@ class parameterised(object):
""" """
Return a string describing the parameter names and their ties and constraints Return a string describing the parameter names and their ties and constraints
""" """
names = self.get_param_names() names = self._get_param_names()
N = len(names) N = len(names)
if not N: if not N:
return "This object has no free parameters." return "This object has no free parameters."
header = ['Name','Value','Constraints','Ties'] header = ['Name','Value','Constraints','Ties']
values = self.get_param() #map(str,self.get_param()) values = self._get_params() #map(str,self._get_params())
#sort out the constraints #sort out the constraints
constraints = ['']*len(names) constraints = ['']*len(names)
for i in self.constrained_positive_indices: for i in self.constrained_positive_indices:

View file

@ -10,6 +10,7 @@ from ..util.linalg import pdinv
class prior: class prior:
def pdf(self,x): def pdf(self,x):
return np.exp(self.lnpdf(x)) return np.exp(self.lnpdf(x))
def plot(self): def plot(self):
rvs = self.rvs(1000) rvs = self.rvs(1000)
pb.hist(rvs,100,normed=True) pb.hist(rvs,100,normed=True)
@ -17,45 +18,79 @@ class prior:
xx = np.linspace(xmin,xmax,1000) xx = np.linspace(xmin,xmax,1000)
pb.plot(xx,self.pdf(xx),'r',linewidth=2) pb.plot(xx,self.pdf(xx),'r',linewidth=2)
class Gaussian(prior): class Gaussian(prior):
""" """
Implementation of the univariate Gaussian probability function, coupled with random variables, since scipy.stats sucks. Implementation of the univariate Gaussian probability function, coupled with random variables.
Using Bishop 2006 notation"""
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
"""
def __init__(self,mu,sigma): def __init__(self,mu,sigma):
self.mu = float(mu) self.mu = float(mu)
self.sigma = float(sigma) self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma) self.sigma2 = np.square(self.sigma)
self.constant = -0.5*np.log(2*np.pi*self.sigma2) self.constant = -0.5*np.log(2*np.pi*self.sigma2)
def __str__(self): def __str__(self):
return "N("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')' return "N("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
def lnpdf(self,x): def lnpdf(self,x):
return self.constant - 0.5*np.square(x-self.mu)/self.sigma2 return self.constant - 0.5*np.square(x-self.mu)/self.sigma2
def lnpdf_grad(self,x): def lnpdf_grad(self,x):
return -(x-self.mu)/self.sigma2 return -(x-self.mu)/self.sigma2
def rvs(self,n): def rvs(self,n):
return np.random.randn(n)*self.sigma + self.mu return np.random.randn(n)*self.sigma + self.mu
class log_Gaussian(prior): class log_Gaussian(prior):
""" """
Implementation of the univariate *log*-Gaussian probability function, coupled with random variables.
:param mu: mean
:param sigma: standard deviation
.. Note:: Bishop 2006 notation is used throughout the code
""" """
def __init__(self,mu,sigma): def __init__(self,mu,sigma):
self.mu = float(mu) self.mu = float(mu)
self.sigma = float(sigma) self.sigma = float(sigma)
self.sigma2 = np.square(self.sigma) self.sigma2 = np.square(self.sigma)
self.constant = -0.5*np.log(2*np.pi*self.sigma2) self.constant = -0.5*np.log(2*np.pi*self.sigma2)
def __str__(self): def __str__(self):
return "lnN("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')' return "lnN("+str(np.round(self.mu))+', '+str(np.round(self.sigma2))+')'
def lnpdf(self,x): def lnpdf(self,x):
return self.constant - 0.5*np.square(np.log(x)-self.mu)/self.sigma2 -np.log(x) return self.constant - 0.5*np.square(np.log(x)-self.mu)/self.sigma2 -np.log(x)
def lnpdf_grad(self,x): def lnpdf_grad(self,x):
return -((np.log(x)-self.mu)/self.sigma2+1.)/x return -((np.log(x)-self.mu)/self.sigma2+1.)/x
def rvs(self,n): def rvs(self,n):
return np.exp(np.random.randn(n)*self.sigma + self.mu) return np.exp(np.random.randn(n)*self.sigma + self.mu)
class multivariate_Gaussian: class multivariate_Gaussian:
""" """
Implementation of the multivariate Gaussian probability function, coupled with random variables, since scipy.stats sucks. Implementation of the multivariate Gaussian probability function, coupled with random variables.
Using Bishop 2006 notation"""
:param mu: mean (N-dimensional array)
:param var: covariance matrix (NxN)
.. Note:: Bishop 2006 notation is used throughout the code
"""
def __init__(self,mu,var): def __init__(self,mu,var):
self.mu = np.array(mu).flatten() self.mu = np.array(mu).flatten()
self.var = np.array(var) self.var = np.array(var)
@ -67,17 +102,22 @@ class multivariate_Gaussian:
self.constant = -0.5*self.D*np.log(2*np.pi) - self.hld self.constant = -0.5*self.D*np.log(2*np.pi) - self.hld
def summary(self): def summary(self):
pass #TODO raise NotImplementedError
def pdf(self,x): def pdf(self,x):
return np.exp(self.lnpdf(x)) return np.exp(self.lnpdf(x))
def lnpdf(self,x): def lnpdf(self,x):
d = x-self.mu d = x-self.mu
return self.constant - 0.5*np.sum(d*np.dot(d,self.inv),1) return self.constant - 0.5*np.sum(d*np.dot(d,self.inv),1)
def lnpdf_grad(self,x): def lnpdf_grad(self,x):
d = x-self.mu d = x-self.mu
return -np.dot(self.inv,d) return -np.dot(self.inv,d)
def rvs(self,n): def rvs(self,n):
return np.random.multivariate_normal(self.mu, self.var,n) return np.random.multivariate_normal(self.mu, self.var,n)
def plot(self): def plot(self):
if self.D==2: if self.D==2:
rvs = self.rvs(200) rvs = self.rvs(200)
@ -91,7 +131,15 @@ class multivariate_Gaussian:
def gamma_from_EV(E,V): def gamma_from_EV(E,V):
"""create an instance of a gamma prior by specifying the Expected value(s) and Variance(s) of the distribution""" """
Creates an instance of a gamma prior by specifying the Expected value(s)
and Variance(s) of the distribution.
:param E: expected value
:param V: variance
"""
a = np.square(E)/V a = np.square(E)/V
b = E/V b = E/V
return gamma(a,b) return gamma(a,b)
@ -99,15 +147,23 @@ def gamma_from_EV(E,V):
class gamma(prior): class gamma(prior):
""" """
Implementation of the Gamma probability function, coupled with random variables, since scipy.stats sucks. Implementation of the Gamma probability function, coupled with random variables.
Using Bishop 2006 notation
:param a: shape parameter
:param b: rate parameter (warning: it's the *inverse* of the scale)
.. Note:: Bishop 2006 notation is used throughout the code
""" """
def __init__(self,a,b): def __init__(self,a,b):
self.a = float(a) self.a = float(a)
self.b = float(b) self.b = float(b)
self.constant = -gammaln(self.a) + a*np.log(b) self.constant = -gammaln(self.a) + a*np.log(b)
def __str__(self): def __str__(self):
return "Ga("+str(np.round(self.a))+', '+str(np.round(self.b))+')' return "Ga("+str(np.round(self.a))+', '+str(np.round(self.b))+')'
def summary(self): def summary(self):
ret = {"E[x]": self.a/self.b,\ ret = {"E[x]": self.a/self.b,\
"E[ln x]": digamma(self.a) - np.log(self.b),\ "E[ln x]": digamma(self.a) - np.log(self.b),\
@ -118,10 +174,12 @@ class gamma(prior):
else: else:
ret['mode'] = np.nan ret['mode'] = np.nan
return ret return ret
def lnpdf(self,x): def lnpdf(self,x):
return self.constant + (self.a-1)*np.log(x) - self.b*x return self.constant + (self.a-1)*np.log(x) - self.b*x
def lnpdf_grad(self,x): def lnpdf_grad(self,x):
return (self.a-1.)/x - self.b return (self.a-1.)/x - self.b
def rvs(self,n): def rvs(self,n):
return np.random.gamma(scale=1./self.b,shape=self.a,size=n) return np.random.gamma(scale=1./self.b,shape=self.a,size=n)

View file

@ -0,0 +1,33 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
np.random.seed(123344)
N = 10
M = 5
Q = 3
D = 4
#generate GPLVM-like data
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
# k = GPy.kern.rbf(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
k = GPy.kern.linear(Q, ARD = False) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel = k, M=M)
m.constrain_positive('(rbf|bias|noise|white|S)')
# m.constrain_fixed('S', 1)
# pb.figure()
# m.plot()
# pb.title('PCA initialisation')
# pb.figure()
# m.optimize(messages = 1)
# m.plot()
# pb.title('After optimisation')
m.randomize()
m.checkgrad(verbose = 1)

View file

@ -1,28 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
import GPy
np.random.seed(1)
print "GPLVM with RBF kernel"
N = 100
Q = 1
D = 2
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q, 1.0, 2.0) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.GPLVM(Y, Q)
m.constrain_positive('(rbf|bias|white)')
pb.figure()
m.plot()
pb.title('PCA initialisation')
pb.figure()
m.optimize(messages = 1)
m.plot()
pb.title('After optimisation')

View file

@ -1,51 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Simple Gaussian Processes regression with an RBF kernel
"""
import pylab as pb
import numpy as np
import GPy
pb.ion()
pb.close('all')
######################################
## 1 dimensional example
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X)+np.random.randn(20,1)*0.05
# create simple GP model
m = GPy.models.GP_regression(X,Y)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)
######################################
## 2 dimensional example
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(40,1)*0.05
# create simple GP model
m = GPy.models.GP_regression(X,Y)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
pb.figure()
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)

View file

@ -1,33 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
Simple one-dimensional Gaussian Processes with assorted kernel functions
"""
import pylab as pb
import numpy as np
import GPy
# sample inputs and outputs
D = 1
X = np.random.randn(10,D)*2
X = np.linspace(-1.5,1.5,5)[:,None]
X = np.append(X,[[5]],0)
Y = np.sin(np.pi*X/2) #+np.random.randn(X.shape[0],1)*0.05
models = [GPy.models.GP_regression(X,Y, k) for k in (GPy.kern.rbf(D), GPy.kern.Matern52(D), GPy.kern.Matern32(D), GPy.kern.exponential(D), GPy.kern.linear(D) + GPy.kern.white(D), GPy.kern.bias(D) + GPy.kern.white(D))]
pb.figure(figsize=(12,8))
for i,m in enumerate(models):
m.constrain_positive('')
m.optimize()
pb.subplot(3,2,i+1)
m.plot()
#pb.title(m.kern.parts[0].name)
GPy.util.plot.align_subplots(3,2,(-3,6),(-2.5,2.5))
pb.show()

8
GPy/examples/__init__.py Normal file
View file

@ -0,0 +1,8 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Please don't delete this without explaining to Neil the right way of doing this. I want to be able to run:
# GPy.examples.regression.toy_rbf_1D() from ipython having imported GPy, and this seems to be the way to do it!
import classification
import regression
import unsupervised

View file

@ -8,8 +8,6 @@ Simple Gaussian Processes classification
import pylab as pb import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
pb.ion()
pb.close('all')
default_seed=10000 default_seed=10000
###################################### ######################################
@ -27,7 +25,7 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed):
likelihood = GPy.inference.likelihoods.probit(data['Y']) likelihood = GPy.inference.likelihoods.probit(data['Y'])
if model_type=='Full': if model_type=='Full':
m = GPy.models.simple_GP_EP(data['X'],likelihood) m = GPy.models.GP_EP(data['X'],likelihood)
else: else:
# create sparse GP EP model # create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type) m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
@ -49,7 +47,7 @@ def oil():
likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1]) likelihood = GPy.inference.likelihoods.probit(data['Y'][:, 0:1])
# create simple GP model # create simple GP model
m = GPy.models.simple_GP_EP(data['X'],likelihood) m = GPy.models.GP_EP(data['X'],likelihood)
# contrain all parameters to be positive # contrain all parameters to be positive
m.constrain_positive('') m.constrain_positive('')

View file

@ -8,8 +8,6 @@ Gaussian Processes regression examples
import pylab as pb import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
pb.ion()
pb.close('all')
def toy_rbf_1d(): def toy_rbf_1d():
@ -19,10 +17,8 @@ def toy_rbf_1d():
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GP_regression(data['X'],data['Y'])
# contrain all parameters to be positive
m.constrain_positive('')
# optimize # optimize
m.ensure_default_constraints()
m.optimize() m.optimize()
# plot # plot
@ -37,10 +33,8 @@ def rogers_girolami_olympics():
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GP_regression(data['X'],data['Y'])
# contrain all parameters to be positive
m.constrain_positive('')
# optimize # optimize
m.ensure_default_constraints()
m.optimize() m.optimize()
# plot # plot
@ -48,6 +42,10 @@ def rogers_girolami_olympics():
print(m) print(m)
return m return m
def della_gatta_TRP63_gene_expression(number=942):
"""Run a standard Gaussian process regression on the della Gatta et al TRP63 Gene Expression data set for a given gene number."""
def toy_rbf_1d_50(): def toy_rbf_1d_50():
"""Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance.""" """Run a simple demonstration of a standard Gaussian process fitting it to data sampled from an RBF covariance."""
data = GPy.util.datasets.toy_rbf_1d_50() data = GPy.util.datasets.toy_rbf_1d_50()
@ -55,10 +53,8 @@ def toy_rbf_1d_50():
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GP_regression(data['X'],data['Y'])
# contrain all parameters to be positive
m.constrain_positive('')
# optimize # optimize
m.ensure_default_constraints()
m.optimize() m.optimize()
# plot # plot
@ -73,11 +69,95 @@ def silhouette():
# create simple GP model # create simple GP model
m = GPy.models.GP_regression(data['X'],data['Y']) m = GPy.models.GP_regression(data['X'],data['Y'])
# contrain all parameters to be positive
m.constrain_positive('')
# optimize # optimize
m.ensure_default_constraints()
m.optimize() m.optimize()
print(m) print(m)
return m return m
def multiple_optima(gene_number=937,resolution=80, model_restarts=10, seed=10000):
"""Show an example of a multimodal error surface for Gaussian process regression. Gene 939 has bimodal behaviour where the noisey mode is higher."""
# Contour over a range of length scales and signal/noise ratios.
length_scales = np.linspace(0.1, 60., resolution)
log_SNRs = np.linspace(-3., 4., resolution)
data = GPy.util.datasets.della_gatta_TRP63_gene_expression(gene_number)
# Sub sample the data to ensure multiple optima
#data['Y'] = data['Y'][0::2, :]
#data['X'] = data['X'][0::2, :]
# Remove the mean (no bias kernel to ensure signal/noise is in RBF/white)
data['Y'] = data['Y'] - np.mean(data['Y'])
lls = GPy.examples.regression.contour_data(data, length_scales, log_SNRs, GPy.kern.rbf)
pb.contour(length_scales, log_SNRs, np.exp(lls), 20)
ax = pb.gca()
pb.xlabel('length scale')
pb.ylabel('log_10 SNR')
xlim = ax.get_xlim()
ylim = ax.get_ylim()
# Now run a few optimizations
models = []
optim_point_x = np.empty(2)
optim_point_y = np.empty(2)
np.random.seed(seed=seed)
for i in range(0, model_restarts):
kern = GPy.kern.rbf(1, variance=np.random.exponential(1.), lengthscale=np.random.exponential(50.)) + GPy.kern.white(1,variance=np.random.exponential(1.))
m = GPy.models.GP_regression(data['X'],data['Y'], kernel=kern)
optim_point_x[0] = m.get('rbf_lengthscale')
optim_point_y[0] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
# optimize
m.ensure_default_constraints()
m.optimize(xtol=1e-6,ftol=1e-6)
optim_point_x[1] = m.get('rbf_lengthscale')
optim_point_y[1] = np.log10(m.get('rbf_variance')) - np.log10(m.get('white_variance'));
pb.arrow(optim_point_x[0], optim_point_y[0], optim_point_x[1]-optim_point_x[0], optim_point_y[1]-optim_point_y[0], label=str(i), head_length=1, head_width=0.5, fc='k', ec='k')
models.append(m)
ax.set_xlim(xlim)
ax.set_ylim(ylim)
return (models, lls)
def contour_data(data, length_scales, log_SNRs, signal_kernel_call=GPy.kern.rbf):
"""Evaluate the GP objective function for a given data set for a range of signal to noise ratios and a range of lengthscales.
:data_set: A data set from the utils.datasets director.
:length_scales: a list of length scales to explore for the contour plot.
:log_SNRs: a list of base 10 logarithm signal to noise ratios to explore for the contour plot.
:signal_kernel: a kernel to use for the 'signal' portion of the data."""
lls = []
total_var = np.var(data['Y'])
for log_SNR in log_SNRs:
SNR = 10**log_SNR
length_scale_lls = []
for length_scale in length_scales:
noise_var = 1.
signal_var = SNR
noise_var = noise_var/(noise_var + signal_var)*total_var
signal_var = signal_var/(noise_var + signal_var)*total_var
signal_kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale)
noise_kernel = GPy.kern.white(1, variance=noise_var)
kernel = signal_kernel + noise_kernel
K = kernel.K(data['X'])
total_var = (np.dot(np.dot(data['Y'].T,GPy.util.linalg.pdinv(K)[0]), data['Y'])/data['Y'].shape[0])[0,0]
noise_var *= total_var
signal_var *= total_var
kernel = signal_kernel_call(1, variance=signal_var, lengthscale=length_scale) + GPy.kern.white(1, variance=noise_var)
model = GPy.models.GP_regression(data['X'], data['Y'], kernel=kernel)
model.constrain_positive('')
length_scale_lls.append(model.log_likelihood())
lls.append(length_scale_lls)
return np.array(lls)

View file

@ -0,0 +1,25 @@
"""
Usupervised learning with Gaussian Processes.
"""
import pylab as pb
import numpy as np
import GPy
######################################
## Oil data subsampled to 100 points.
def oil_100():
data = GPy.util.datasets.oil_100()
# create simple GP model
m = GPy.models.GPLVM(data['X'], 2)
# optimize
m.ensure_default_constraints()
m.optimize()
# plot
print(m)
return m

View file

@ -1,45 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import scipy as sp
import pdb, sys, pickle
import matplotlib.pylab as plt
import GPy
np.random.seed(1)
N = 100
# sample inputs and outputs
X = np.random.uniform(-np.pi,np.pi,(N,1))
Y = np.sin(X)+np.random.randn(N,1)*0.05
# Y += np.abs(Y.min()) + 0.5
Z = np.exp(Y)# Y**(1/3.0)
# rescaling targets?
Zmax = Z.max()
Zmin = Z.min()
Z = (Z-Zmin)/(Zmax-Zmin) - 0.5
m = GPy.models.warpedGP(X, Z, warping_terms = 2)
m.constrain_positive('(tanh_a|tanh_b|rbf|white|bias)')
m.randomize()
plt.figure()
plt.xlabel('predicted f(Z)')
plt.ylabel('actual f(Z)')
plt.plot(m.Y, Y, 'o', alpha = 0.5, label = 'before training')
m.optimize(messages = True)
plt.plot(m.Y, Y, 'o', alpha = 0.5, label = 'after training')
plt.legend(loc = 0)
m.plot_warping()
plt.figure()
plt.title('warped GP fit')
m.plot()
m1 = GPy.models.GP_regression(X, Z)
m1.constrain_positive('(rbf|white|bias)')
m1.randomize()
m1.optimize(messages = True)
plt.figure()
plt.title('GP fit')
m1.plot()

View file

@ -9,7 +9,7 @@ import pylab as pb
from ..util.plot import gpplot from ..util.plot import gpplot
class likelihood: class likelihood:
def __init__(self,Y): def __init__(self,Y,location=0,scale=1):
""" """
Likelihood class for doing Expectation propagation Likelihood class for doing Expectation propagation
@ -18,6 +18,8 @@ class likelihood:
""" """
self.Y = Y self.Y = Y
self.N = self.Y.shape[0] self.N = self.Y.shape[0]
self.location = location
self.scale = scale
def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u): def plot1Da(self,X_new,Mean_new,Var_new,X_u,Mean_u,Var_u):
""" """
@ -99,6 +101,119 @@ class probit(likelihood):
def predictive_mean(self,mu,variance): def predictive_mean(self,mu,variance):
return stats.norm.cdf(mu/np.sqrt(1+variance)) return stats.norm.cdf(mu/np.sqrt(1+variance))
def log_likelihood_gradients(): def _log_likelihood_gradients():
raise NotImplementedError raise NotImplementedError
class poisson(likelihood):
"""
Poisson likelihood
Y is expected to take values in {0,1,2,...}
-----
$$
L(x) = \exp(\lambda) * \lambda**Y_i / Y_i!
$$
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
def poisson_norm(f):
"""
Product of the likelihood and the cavity distribution
"""
pdf_norm_f = stats.norm.pdf(f,loc=mu,scale=sigma)
rate = np.exp( (f*self.scale)+self.location)
poisson = stats.poisson.pmf(float(self.Y[i]),rate)
return pdf_norm_f*poisson
def log_pnm(f):
"""
Log of poisson_norm
"""
return -(-.5*(f-mu)**2/sigma**2 - np.exp( (f*self.scale)+self.location) + ( (f*self.scale)+self.location)*self.Y[i])
"""
Golden Search and Simpson's Rule
--------------------------------
Simpson's Rule is used to calculate the moments mumerically, it needs a grid of points as input.
Golden Search is used to find the mode in the poisson_norm distribution and define around it the grid for Simpson's Rule
"""
#TODO golden search & simpson's rule can be defined in the general likelihood class, rather than in each specific case.
#Golden search
golden_A = -1 if self.Y[i] == 0 else np.array([np.log(self.Y[i]),mu]).min() #Lower limit
golden_B = np.array([np.log(self.Y[i]),mu]).max() #Upper limit
golden_A = (golden_A - self.location)/self.scale
golden_B = (golden_B - self.location)/self.scale
opt = sp.optimize.golden(log_pnm,brack=(golden_A,golden_B)) #Better to work with log_pnm than with poisson_norm
# Simpson's approximation
width = 3./np.log(max(self.Y[i],2))
A = opt - width #Lower limit
B = opt + width #Upper limit
K = 10*int(np.log(max(self.Y[i],150))) #Number of points in the grid, we DON'T want K to be the same number for every case
h = (B-A)/K # length of the intervals
grid_x = np.hstack([np.linspace(opt-width,opt,K/2+1)[1:-1], np.linspace(opt,opt+width,K/2+1)]) # grid of points (X axis)
x = np.hstack([A,B,grid_x[range(1,K,2)],grid_x[range(2,K-1,2)]]) # grid_x rearranged, just to make Simpson's algorithm easier
zeroth = np.hstack([poisson_norm(A),poisson_norm(B),[4*poisson_norm(f) for f in grid_x[range(1,K,2)]],[2*poisson_norm(f) for f in grid_x[range(2,K-1,2)]]]) # grid of points (Y axis) rearranged like x
first = zeroth*x
second = first*x
Z_hat = sum(zeroth)*h/3 # Zero-th moment
mu_hat = sum(first)*h/(3*Z_hat) # First moment
m2 = sum(second)*h/(3*Z_hat) # Second moment
sigma2_hat = m2 - mu_hat**2 # Second central moment
return float(Z_hat), float(mu_hat), float(sigma2_hat)
def plot1Db(self,X,X_new,F_new,F2_new=None,U=None):
pb.subplot(212)
#gpplot(X_new,F_new,np.sqrt(F2_new))
pb.plot(X_new,F_new)#,np.sqrt(F2_new)) #FIXME
pb.plot(X,self.Y,'kx',mew=1.5)
if U is not None:
pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
def predictive_mean(self,mu,variance):
return np.exp(mu*self.scale + self.location)
def predictive_variance(self,mu,variance):
return mu
def _log_likelihood_gradients():
raise NotImplementedError
class gaussian(likelihood):
"""
Gaussian likelihood
Y is expected to take values in (-inf,inf)
"""
def moments_match(self,i,tau_i,v_i):
"""
Moments match of the marginal approximation in EP algorithm
:param i: number of observation (int)
:param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float)
"""
mu = v_i/tau_i
sigma = np.sqrt(1./tau_i)
s = 1. if self.Y[i] == 0 else 1./self.Y[i]
sigma2_hat = 1./(1./sigma**2 + 1./s**2)
mu_hat = sigma2_hat*(mu/sigma**2 + self.Y[i]/s**2)
Z_hat = 1./np.sqrt(2*np.pi) * 1./np.sqrt(sigma**2+s**2) * np.exp(-.5*(mu-self.Y[i])**2/(sigma**2 + s**2))
return Z_hat, mu_hat, sigma2_hat
def plot1Db(self,X,X_new,F_new,U=None):
assert X.shape[1] == 1, 'Number of dimensions must be 1'
gpplot(X_new,F_new,np.zeros(X_new.shape[0]))
pb.plot(X,self.Y,'kx',mew=1.5)
if U is not None:
pb.plot(U,np.ones(U.shape[0])*self.Y.min()*.8,'r|',mew=1.5,markersize=12)
def predictive_mean(self,mu,Sigma):
return mu
def _log_likelihood_gradients():
raise NotImplementedError

View file

@ -17,7 +17,7 @@ class Metropolis_Hastings:
def __init__(self,model,cov=None): def __init__(self,model,cov=None):
"""Metropolis Hastings, with tunings according to Gelman et al. """ """Metropolis Hastings, with tunings according to Gelman et al. """
self.model = model self.model = model
current = self.model.extract_param() current = self.model._get_params_transformed()
self.D = current.size self.D = current.size
self.chains = [] self.chains = []
if cov is None: if cov is None:
@ -32,19 +32,19 @@ class Metropolis_Hastings:
if start is None: if start is None:
self.model.randomize() self.model.randomize()
else: else:
self.model.expand_param(start) self.model._set_params_transformed(start)
def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400): def sample(self, Ntotal, Nburn, Nthin, tune=True, tune_throughout=False, tune_interval=400):
current = self.model.extract_param() current = self.model._get_params_transformed()
fcurrent = self.model.log_likelihood() + self.model.log_prior() fcurrent = self.model.log_likelihood() + self.model.log_prior()
accepted = np.zeros(Ntotal,dtype=np.bool) accepted = np.zeros(Ntotal,dtype=np.bool)
for it in range(Ntotal): for it in range(Ntotal):
print "sample %d of %d\r"%(it,Ntotal), print "sample %d of %d\r"%(it,Ntotal),
sys.stdout.flush() sys.stdout.flush()
prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale) prop = np.random.multivariate_normal(current, self.cov*self.scale*self.scale)
self.model.expand_param(prop) self.model._set_params_transformed(prop)
fprop = self.model.log_likelihood() + self.model.log_prior() fprop = self.model.log_likelihood() + self.model.log_prior()
if fprop>fcurrent:#sample accepted, going 'uphill' if fprop>fcurrent:#sample accepted, going 'uphill'
@ -73,12 +73,12 @@ class Metropolis_Hastings:
def predict(self,function,args): def predict(self,function,args):
"""Make a prediction for the function, to which we will pass the additional arguments""" """Make a prediction for the function, to which we will pass the additional arguments"""
param = self.model.get_param() param = self.model._get_params()
fs = [] fs = []
for p in self.chain: for p in self.chain:
self.model.set_param(p) self.model._set_params(p)
fs.append(function(*args)) fs.append(function(*args))
self.model.set_param(param)# reset model to starting state self.model._set_params(param)# reset model to starting state
return fs return fs

View file

@ -23,16 +23,16 @@ class Brownian(kernpart):
assert self.D==1, "Brownian motion in 1D only" assert self.D==1, "Brownian motion in 1D only"
self.Nparam = 1. self.Nparam = 1.
self.name = 'Brownian' self.name = 'Brownian'
self.set_param(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())
def get_param(self): def _get_params(self):
return self.variance return self.variance
def set_param(self,x): def _set_params(self,x):
assert x.shape==(1,) assert x.shape==(1,)
self.variance = x self.variance = x
def get_param_names(self): def _get_param_names(self):
return ['variance'] return ['variance']
def K(self,X,X2,target): def K(self,X,X2,target):

View file

@ -20,43 +20,54 @@ class Matern32(kernpart):
:type D: int :type D: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the lengthscales :math:`\ell_i` :param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,) :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,D,variance=1.,lengthscales=None): def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D self.D = D
if lengthscales is not None: self.ARD = ARD
assert lengthscales.shape==(self.D,) if ARD == False:
else: self.Nparam = 2
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'Mat32' self.name = 'Mat32'
self.set_param(np.hstack((variance,lengthscales))) if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else:
self.Nparam = self.D + 1
self.name = 'Mat32_ARD'
if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
def get_param(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales)) return np.hstack((self.variance,self.lengthscale))
def set_param(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
assert x.size==(self.D+1) assert x.size == self.Nparam
self.variance = x[0] self.variance = x[0]
self.lengthscales = x[1:] self.lengthscale = x[1:]
def get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
if self.D==1: if self.Nparam == 2:
return ['variance','lengthscale'] return ['variance','lengthscale']
else: else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)] return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target): def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2.""" """Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target) np.add(self.variance*(1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist), target,target)
def Kdiag(self,X,target): def Kdiag(self,X,target):
@ -66,13 +77,20 @@ class Matern32(kernpart):
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist) dvar = (1+np.sqrt(3.)*dist)*np.exp(-np.sqrt(3.)*dist)
invdist = 1./np.where(dist!=0.,dist,np.inf) invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3 dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial) target[0] += np.sum(dvar*partial)
if self.ARD == True:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
#dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,partial,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
@ -81,8 +99,8 @@ class Matern32(kernpart):
def dK_dX(self,partial,X,X2,target): def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*partial.T[:,:,None],0)
@ -104,7 +122,7 @@ class Matern32(kernpart):
""" """
assert self.D == 1 assert self.D == 1
def L(x,i): def L(x,i):
return(3./self.lengthscales**2*F[i](x) + 2*np.sqrt(3)/self.lengthscales*F1[i](x) + F2[i](x)) return(3./self.lengthscale**2*F[i](x) + 2*np.sqrt(3)/self.lengthscale*F1[i](x) + F2[i](x))
n = F.shape[0] n = F.shape[0]
G = np.zeros((n,n)) G = np.zeros((n,n))
for i in range(n): for i in range(n):
@ -114,5 +132,5 @@ class Matern32(kernpart):
F1lower = np.array([f(lower) for f in F1])[:,None] F1lower = np.array([f(lower) for f in F1])[:,None]
#print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n" #print "OLD \n", np.dot(F1lower,F1lower.T), "\n \n"
#return(G) #return(G)
return(self.lengthscales**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscales**2/(3.*self.variance)*np.dot(F1lower,F1lower.T)) return(self.lengthscale**3/(12.*np.sqrt(3)*self.variance) * G + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))

View file

@ -19,42 +19,53 @@ class Matern52(kernpart):
:type D: int :type D: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the lengthscales :math:`\ell_i` :param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,) :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,D,variance=1.,lengthscales=None): def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D self.D = D
if lengthscales is not None: self.ARD = ARD
assert lengthscales.shape==(self.D,) if ARD == False:
self.Nparam = 2
self.name = 'Mat32'
if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else: else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1 self.Nparam = self.D + 1
self.name = 'Mat52' self.name = 'Mat32_ARD'
self.set_param(np.hstack((variance,lengthscales))) if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
def get_param(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales)) return np.hstack((self.variance,self.lengthscale))
def set_param(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
assert x.size==(self.D+1) assert x.size == self.Nparam
self.variance = x[0] self.variance = x[0]
self.lengthscales = x[1:] self.lengthscale = x[1:]
def get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
if self.D==1: if self.Nparam == 2:
return ['variance','lengthscale'] return ['variance','lengthscale']
else: else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)] return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target): def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2.""" """Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target) np.add(self.variance*(1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist), target,target)
def Kdiag(self,X,target): def Kdiag(self,X,target):
@ -64,13 +75,20 @@ class Matern52(kernpart):
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf) invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3 dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist) dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial) target[0] += np.sum(dvar*partial)
if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
def dKdiag_dtheta(self,X,target): def dKdiag_dtheta(self,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
@ -79,8 +97,8 @@ class Matern52(kernpart):
def dK_dX(self,partial,X,X2,target): def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*partial.T[:,:,None],0)
@ -104,18 +122,18 @@ class Matern52(kernpart):
""" """
assert self.D == 1 assert self.D == 1
def L(x,i): def L(x,i):
return(5*np.sqrt(5)/self.lengthscales**3*F[i](x) + 15./self.lengthscales**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscales*F2[i](x) + F3[i](x)) return(5*np.sqrt(5)/self.lengthscale**3*F[i](x) + 15./self.lengthscale**2*F1[i](x)+ 3*np.sqrt(5)/self.lengthscale*F2[i](x) + F3[i](x))
n = F.shape[0] n = F.shape[0]
G = np.zeros((n,n)) G = np.zeros((n,n))
for i in range(n): for i in range(n):
for j in range(i,n): for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
G_coef = 3.*self.lengthscales**5/(400*np.sqrt(5)) G_coef = 3.*self.lengthscale**5/(400*np.sqrt(5))
Flower = np.array([f(lower) for f in F])[:,None] Flower = np.array([f(lower) for f in F])[:,None]
F1lower = np.array([f(lower) for f in F1])[:,None] F1lower = np.array([f(lower) for f in F1])[:,None]
F2lower = np.array([f(lower) for f in F2])[:,None] F2lower = np.array([f(lower) for f in F2])[:,None]
orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscales**4/200*np.dot(F2lower,F2lower.T) orig = 9./8*np.dot(Flower,Flower.T) + 9.*self.lengthscale**4/200*np.dot(F2lower,F2lower.T)
orig2 = 3./5*self.lengthscales**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T)) orig2 = 3./5*self.lengthscale**2 * ( np.dot(F1lower,F1lower.T) + 1./8*np.dot(Flower,F2lower.T) + 1./8*np.dot(F2lower,Flower.T))
return(1./self.variance* (G_coef*G + orig + orig2)) return(1./self.variance* (G_coef*G + orig + orig2))

View file

@ -2,5 +2,5 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, rbf_ARD, spline, Brownian, linear_ARD, rbf_sympy, sympykern from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52
from kern import kern from kern import kern

View file

@ -17,16 +17,16 @@ class bias(kernpart):
self.D = D self.D = D
self.Nparam = 1 self.Nparam = 1
self.name = 'bias' self.name = 'bias'
self.set_param(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())
def get_param(self): def _get_params(self):
return self.variance return self.variance
def set_param(self,x): def _set_params(self,x):
assert x.shape==(1,) assert x.shape==(1,)
self.variance = x self.variance = x
def get_param_names(self): def _get_param_names(self):
return ['variance'] return ['variance']
def K(self,X,X2,target): def K(self,X,X2,target):

View file

@ -6,10 +6,8 @@ import numpy as np
from kern import kern from kern import kern
from rbf import rbf as rbfpart from rbf import rbf as rbfpart
from rbf_ARD import rbf_ARD as rbf_ARD_part
from white import white as whitepart from white import white as whitepart
from linear import linear as linearpart from linear import linear as linearpart
from linear_ARD import linear_ARD as linear_ARD_part
from exponential import exponential as exponentialpart from exponential import exponential as exponentialpart
from Matern32 import Matern32 as Matern32part from Matern32 import Matern32 as Matern32part
from Matern52 import Matern52 as Matern52part from Matern52 import Matern52 as Matern52part
@ -17,12 +15,15 @@ from bias import bias as biaspart
from finite_dimensional import finite_dimensional as finite_dimensionalpart from finite_dimensional import finite_dimensional as finite_dimensionalpart
from spline import spline as splinepart from spline import spline as splinepart
from Brownian import Brownian as Brownianpart from Brownian import Brownian as Brownianpart
from periodic_exponential import periodic_exponential as periodic_exponentialpart
from periodic_Matern32 import periodic_Matern32 as periodic_Matern32part
from periodic_Matern52 import periodic_Matern52 as periodic_Matern52part
#TODO these s=constructors are not as clean as we'd like. Tidy the code up #TODO these s=constructors are not as clean as we'd like. Tidy the code up
#using meta-classes to make the objects construct properly wthout them. #using meta-classes to make the objects construct properly wthout them.
def rbf(D,variance=1., lengthscale=1.): def rbf(D,variance=1., lengthscale=None,ARD=False):
""" """
Construct an RBF kernel Construct an RBF kernel
@ -32,46 +33,23 @@ def rbf(D,variance=1., lengthscale=1.):
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the lengthscale of the kernel
:type lengthscale: float :type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
""" """
part = rbfpart(D,variance,lengthscale) part = rbfpart(D,variance,lengthscale,ARD)
return kern(D, [part]) return kern(D, [part])
def rbf_ARD(D,variance=1., lengthscales=None): def linear(D,variances=None,ARD=True):
"""
Construct an RBF kernel with Automatic Relevance Determination (ARD)
:param D: dimensionality of the kernel, obligatory
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscales: the lengthscales of the kernel
:type lengthscales: None|np.ndarray
"""
part = rbf_ARD_part(D,variance,lengthscales)
return kern(D, [part])
def linear(D,lengthscales=None):
""" """
Construct a linear kernel. Construct a linear kernel.
Arguments Arguments
--------- ---------
D (int), obligatory D (int), obligatory
lengthscales (np.ndarray) variances (np.ndarray)
ARD (boolean)
""" """
part = linearpart(D,lengthscales) part = linearpart(D,variances,ARD)
return kern(D, [part])
def linear_ARD(D,lengthscales=None):
"""
Construct a linear ARD kernel.
Arguments
---------
D (int), obligatory
lengthscales (np.ndarray)
"""
part = linear_ARD_part(D,lengthscales)
return kern(D, [part]) return kern(D, [part])
def white(D,variance=1.): def white(D,variance=1.):
@ -86,43 +64,52 @@ def white(D,variance=1.):
part = whitepart(D,variance) part = whitepart(D,variance)
return kern(D, [part]) return kern(D, [part])
def exponential(D,variance=1., lengthscales=None): def exponential(D,variance=1., lengthscale=None, ARD=False):
""" """
Construct a exponential kernel. Construct an exponential kernel
Arguments :param D: dimensionality of the kernel, obligatory
--------- :type D: int
D (int), obligatory :param variance: the variance of the kernel
variance (float) :type variance: float
lengthscales (np.ndarray) :param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
""" """
part = exponentialpart(D,variance, lengthscales) part = exponentialpart(D,variance, lengthscale, ARD)
return kern(D, [part]) return kern(D, [part])
def Matern32(D,variance=1., lengthscales=None): def Matern32(D,variance=1., lengthscale=None, ARD=False):
""" """
Construct a Matern 3/2 kernel. Construct a Matern 3/2 kernel.
Arguments :param D: dimensionality of the kernel, obligatory
--------- :type D: int
D (int), obligatory :param variance: the variance of the kernel
variance (float) :type variance: float
lengthscales (np.ndarray) :param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
""" """
part = Matern32part(D,variance, lengthscales) part = Matern32part(D,variance, lengthscale, ARD)
return kern(D, [part]) return kern(D, [part])
def Matern52(D,variance=1., lengthscales=None): def Matern52(D,variance=1., lengthscale=None, ARD=False):
""" """
Construct a Matern 5/2 kernel. Construct a Matern 5/2 kernel.
Arguments :param D: dimensionality of the kernel, obligatory
--------- :type D: int
D (int), obligatory :param variance: the variance of the kernel
variance (float) :type variance: float
lengthscales (np.ndarray) :param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param ARD: Auto Relevance Determination (one lengthscale per dimension)
:type ARD: Boolean
""" """
part = Matern52part(D,variance, lengthscales) part = Matern52part(D,variance, lengthscale, ARD)
return kern(D, [part]) return kern(D, [part])
def bias(D,variance=1.): def bias(D,variance=1.):
@ -200,3 +187,57 @@ def sympykern(D,k):
A kernel from a symbolic sympy representation A kernel from a symbolic sympy representation
""" """
return kern(D,[spkern(D,k)]) return kern(D,[spkern(D,k)])
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
"""
Construct an periodic exponential kernel
:param D: dimensionality, only defined for D=1
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = periodic_exponentialpart(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part])
def periodic_Matern32(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
"""
Construct a periodic Matern 3/2 kernel.
:param D: dimensionality, only defined for D=1
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = periodic_Matern32part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part])
def periodic_Matern52(D,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
"""
Construct a periodic Matern 5/2 kernel.
:param D: dimensionality, only defined for D=1
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel
:type lengthscale: float
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
"""
part = periodic_Matern52part(D,variance, lengthscale, period, n_freq, lower, upper)
return kern(D, [part])

View file

@ -19,42 +19,53 @@ class exponential(kernpart):
:type D: int :type D: int
:param variance: the variance :math:`\sigma^2` :param variance: the variance :math:`\sigma^2`
:type variance: float :type variance: float
:param lengthscale: the lengthscales :math:`\ell_i` :param lengthscale: the vector of lengthscale :math:`\ell_i`
:type lengthscale: np.ndarray of size (D,) :type lengthscale: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object :rtype: kernel object
""" """
def __init__(self,D,variance=1.,lengthscales=None): def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D self.D = D
if lengthscales is not None: self.ARD = ARD
assert lengthscales.shape==(self.D,) if ARD == False:
else: self.Nparam = 2
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'exp' self.name = 'exp'
self.set_param(np.hstack((variance,lengthscales))) if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else:
self.Nparam = self.D + 1
self.name = 'exp_ARD'
if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
def get_param(self): def _get_params(self):
"""return the value of the parameters.""" """return the value of the parameters."""
return np.hstack((self.variance,self.lengthscales)) return np.hstack((self.variance,self.lengthscale))
def set_param(self,x): def _set_params(self,x):
"""set the value of the parameters.""" """set the value of the parameters."""
assert x.size==(self.D+1) assert x.size == self.Nparam
self.variance = x[0] self.variance = x[0]
self.lengthscales = x[1:] self.lengthscale = x[1:]
def get_param_names(self): def _get_param_names(self):
"""return parameter names.""" """return parameter names."""
if self.D==1: if self.Nparam == 2:
return ['variance','lengthscale'] return ['variance','lengthscale']
else: else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)] return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target): def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2.""" """Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
np.add(self.variance*np.exp(-dist), target,target) np.add(self.variance*np.exp(-dist), target,target)
def Kdiag(self,X,target): def Kdiag(self,X,target):
@ -64,13 +75,17 @@ class exponential(kernpart):
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf) invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscales**3 dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = np.exp(-dist) dvar = np.exp(-dist)
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[0] += np.sum(dvar*partial) target[0] += np.sum(dvar*partial)
if self.ARD == True:
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,partial,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
@ -80,8 +95,8 @@ class exponential(kernpart):
def dK_dX(self,partial,X,X2,target): def dK_dX(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscales),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscales**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*partial.T[:,:,None],0)
@ -101,14 +116,14 @@ class exponential(kernpart):
""" """
assert self.D == 1 assert self.D == 1
def L(x,i): def L(x,i):
return(1./self.lengthscales*F[i](x) + F1[i](x)) return(1./self.lengthscale*F[i](x) + F1[i](x))
n = F.shape[0] n = F.shape[0]
G = np.zeros((n,n)) G = np.zeros((n,n))
for i in range(n): for i in range(n):
for j in range(i,n): for j in range(i,n):
G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0] G[i,j] = G[j,i] = integrate.quad(lambda x : L(x,i)*L(x,j),lower,upper)[0]
Flower = np.array([f(lower) for f in F])[:,None] Flower = np.array([f(lower) for f in F])[:,None]
return(self.lengthscales/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T)) return(self.lengthscale/2./self.variance * G + 1./self.variance * np.dot(Flower,Flower.T))

View file

@ -27,15 +27,15 @@ class finite_dimensional(kernpart):
weights = np.ones(self.n) weights = np.ones(self.n)
self.Nparam = self.n + 1 self.Nparam = self.n + 1
self.name = 'finite_dim' self.name = 'finite_dim'
self.set_param(np.hstack((variance,weights))) self._set_params(np.hstack((variance,weights)))
def get_param(self): def _get_params(self):
return np.hstack((self.variance,self.weights)) return np.hstack((self.variance,self.weights))
def set_param(self,x): def _set_params(self,x):
assert x.size == (self.Nparam) assert x.size == (self.Nparam)
self.variance = x[0] self.variance = x[0]
self.weights = x[1:] self.weights = x[1:]
def get_param_names(self): def _get_param_names(self):
if self.n==1: if self.n==1:
return ['variance','weight'] return ['variance','weight']
else: else:

View file

@ -133,20 +133,20 @@ class kern(parameterised):
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices] newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern return newkern
def get_param(self): def _get_params(self):
return np.hstack([p.get_param() for p in self.parts]) return np.hstack([p._get_params() for p in self.parts])
def set_param(self,x): def _set_params(self,x):
[p.set_param(x[s]) for p, s in zip(self.parts, self.param_slices)] [p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
def get_param_names(self): def _get_param_names(self):
#this is a bit nasty: we wat to distinguish between parts with the same name by appending a count #this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
part_names = np.array([k.name for k in self.parts],dtype=np.str) part_names = np.array([k.name for k in self.parts],dtype=np.str)
counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)] counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)] cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)] names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
return sum([[name+'_'+n for n in k.get_param_names()] for name,k in zip(names,self.parts)],[]) return sum([[name+'_'+n for n in k._get_param_names()] for name,k in zip(names,self.parts)],[])
def K(self,X,X2=None,slices1=None,slices2=None): def K(self,X,X2=None,slices1=None,slices2=None):
assert X.shape[1]==self.D assert X.shape[1]==self.D
@ -284,6 +284,8 @@ class kern(parameterised):
# 1. get all the psi1 statistics # 1. get all the psi1 statistics
psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts] psi1_matrices = [np.zeros((mu.shape[0], Z.shape[0])) for p in self.parts]
[p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)] [p.psi1(Z[s2],mu[s1],S[s1],psi1_target[s1,s2]) for p,s1,s2,psi1_target in zip(self.parts,slices1,slices2, psi1_matrices)]
partial1 = np.zeros_like(partial1)
# 2. get all the dpsi1/dtheta gradients # 2. get all the dpsi1/dtheta gradients
psi1_gradients = [np.zeros(self.Nparam) for p in self.parts] psi1_gradients = [np.zeros(self.Nparam) for p in self.parts]
[p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)] [p.dpsi1_dtheta(partial1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],psi1g_target[ps]) for p,ps,s1,s2,i_s,psi1g_target in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices,psi1_gradients)]
@ -292,7 +294,7 @@ class kern(parameterised):
for a,b in itertools.combinations(range(len(psi1_matrices)), 2): for a,b in itertools.combinations(range(len(psi1_matrices)), 2):
gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0) gne = (psi1_gradients[a][None]*psi1_matrices[b].sum(0)[:,None]).sum(0)
target += 0#(gne[None] + gne[:, None]).sum(0) target += (gne[None] + gne[:, None]).sum(0)
return target return target
def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None):

View file

@ -16,11 +16,11 @@ class kernpart(object):
self.Nparam = 1 self.Nparam = 1
self.name = 'unnamed' self.name = 'unnamed'
def get_param(self): def _get_params(self):
raise NotImplementedError raise NotImplementedError
def set_param(self,x): def _set_params(self,x):
raise NotImplementedError raise NotImplementedError
def get_param_names(self): def _get_param_names(self):
raise NotImplementedError raise NotImplementedError
def K(self,X,X2,target): def K(self,X,X2,target):
raise NotImplementedError raise NotImplementedError

View file

@ -1,63 +1,148 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart from kernpart import kernpart
import numpy as np import numpy as np
class linear(kernpart): class linear(kernpart):
""" """
Linear kernel Linear kernel
.. math::
k(x,y) = \sum_{i=1}^D \sigma^2_i x_iy_i
:param D: the number of input dimensions :param D: the number of input dimensions
:type D: int :type D: int
:param variance: variance :param variances: the vector of variances :math:`\sigma^2_i`
:type variance: None|float :type variances: np.ndarray of size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single variance parameter \sigma^2), otherwise there is one variance parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
""" """
def __init__(self, D, variance=None): def __init__(self,D,variances=None,ARD=True):
self.D = D self.D = D
if variance is None: self.ARD = ARD
variance = 1.0 if ARD == False:
self.Nparam = 1 self.Nparam = 1
self.name = 'linear' self.name = 'linear'
self.set_param(variance) if variances is not None:
assert variances.shape == (1,)
else:
variances = np.ones(1)
self._Xcache, self._X2cache = np.empty(shape=(2,)) self._Xcache, self._X2cache = np.empty(shape=(2,))
else:
self.Nparam = self.D
self.name = 'linear_ARD'
if variances is not None:
assert variances.shape == (self.D,)
else:
variances = np.ones(self.D)
self._set_params(variances)
def get_param(self): def _get_params(self):
return self.variance return self.variances
def set_param(self,x): def _set_params(self,x):
self.variance = x assert x.size==(self.Nparam)
self.variances = x
self.variances2 = np.square(self.variances)
def get_param_names(self): def _get_param_names(self):
if self.Nparam == 1:
return ['variance'] return ['variance']
else:
return ['variance_%i'%i for i in range(self.variances.size)]
def K(self,X,X2,target): def K(self,X,X2,target):
if self.ARD:
XX = X*np.sqrt(self.variances)
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
self._K_computations(X, X2) self._K_computations(X, X2)
target += self.variance * self._dot_product target += self.variances * self._dot_product
def Kdiag(self,X,target): def Kdiag(self,X,target):
np.add(target,np.sum(self.variance*np.square(X),-1),target) np.add(target,np.sum(self.variances*np.square(X),-1),target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,partial,X,X2,target):
""" if self.ARD:
Computes the derivatives wrt theta product = X[:,None,:]*X2[None,:,:]
Return shape is NxMx(Ntheta) target += (partial[:,:,None]*product).sum(0).sum(0)
""" else:
self._K_computations(X, X2) self._K_computations(X, X2)
product = self._dot_product target += np.sum(self._dot_product*partial)
# product = np.dot(X, X2.T)
target += np.sum(product*partial)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,partial,X,X2,target):
target += self.variance * np.sum(partial[:,None,:]*X2.T[None,:,:],-1) target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
def dKdiag_dtheta(self,partial,X,target): #---------------------------------------#
target += np.sum(partial*np.square(X).sum(1)) # PSI statistics #
#---------------------------------------#
def psi0(self,Z,mu,S,target):
expected = np.square(mu) + S
target += np.sum(self.variances*expected)
def dpsi0_dtheta(self,partial,Z,mu,S,target):
expected = np.square(mu) + S
target += (partial[:, None] * (-2.*np.sum(expected,0))).sum()
def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S):
target_mu += partial[:, None] * (2*mu*self.variances)
target_S += partial[:, None] * self.variances
def dpsi0_dZ(self,Z,mu,S,target):
pass
def psi1(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.K(mu,Z,target)
def dpsi1_dtheta(self,partial,Z,mu,S,target):
"""the variance, it does nothing"""
self.dK_dtheta(partial,mu,Z,target)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Do nothing for S, it does not affect psi1"""
target_mu += (partial.T[:,:, None]*(Z/self.variances)).sum(1)
def dpsi1_dZ(self,partial,Z,mu,S,target):
self.dK_dX(partial.T,Z,mu,target)
def psi2(self,Z,mu,S,target):
"""
returns N,M,M matrix
"""
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
psi2 = ZZ*np.square(self.variances)*mu2_S[:, None, None, :]
target += psi2.sum(-1)
def dpsi2_dtheta(self,partial,Z,mu,S,target):
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
target += (partial[:,:,:,None]*(2.*ZZ*mu2_S[:,None,None,:]*self.variances)).sum()
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
mu2_S = np.sum(np.square(mu)+S,0)# Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
tmp = ZZ*np.square(self.variances) # M,M,Q
target_mu += (partial[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
target_S += (partial[:,:,:,None]*tmp).sum(1).sum(1)
def dpsi2_dZ(self,partial,Z,mu,S,target):
mu2_S = np.sum(np.square(mu)+S,0)# Q,
target += (partial[:,:,:,None]* (Z * mu2_S * np.square(self.variances))).sum(0).sum(0)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self,X,X2): def _K_computations(self,X,X2):
# (Nicolo) changed the logic here. If X2 is None, we want to cache
# (X,X). In practice X2 should always be passed.
if X2 is None: if X2 is None:
X2 = X X2 = X
if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)): if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
@ -67,58 +152,3 @@ class linear(kernpart):
else: else:
# print "Cache hit!" # print "Cache hit!"
pass # TODO: insert debug message here (logging framework) pass # TODO: insert debug message here (logging framework)
# def psi0(self,Z,mu,S,target):
# expected = np.square(mu) + S
# np.add(target,np.sum(self.variance*expected),target)
# def dpsi0_dtheta(self,Z,mu,S,target):
# expected = np.square(mu) + S
# return -2.*np.sum(expected,0)
# def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
# np.add(target_mu,2*mu*self.variances,target_mu)
# np.add(target_S,self.variances,target_S)
# def dpsi0_dZ(self,Z,mu,S,target):
# pass
# def psi1(self,Z,mu,S,target):
# """the variance, it does nothing"""
# self.K(mu,Z,target)
# def dpsi1_dtheta(self,Z,mu,S,target):
# """the variance, it does nothing"""
# self.dK_dtheta(mu,Z,target)
# def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
# """Do nothing for S, it does not affect psi1"""
# np.add(target_mu,Z/self.variances2,target_mu)
# def dpsi1_dZ(self,Z,mu,S,target):
# self.dK_dX(mu,Z,target)
# def psi2(self,Z,mu,S,target):
# """Think N,M,M,Q """
# mu2_S = np.square(mu)+SN,Q,
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# psi2 = ZZ*np.square(self.variances)*mu2_S
# np.add(target, psi2.sum(-1),target) M,M
# def dpsi2_dtheta(self,Z,mu,S,target):
# mu2_S = np.square(mu)+SN,Q,
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# target += 2.*ZZ*mu2_S*self.variances
# def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
# """Think N,M,M,Q """
# mu2_S = np.sum(np.square(mu)+S,0)Q,
# ZZ = Z[:,None,:]*Z[None,:,:] M,M,Q
# tmp = ZZ*np.square(self.variances) M,M,Q
# np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) N,M,M,Q
# np.add(target_S, tmp, target_S) N,M,M,Q
# def dpsi2_dZ(self,Z,mu,S,target):
# mu2_S = np.sum(np.square(mu)+S,0)Q,
# target += Z[:,None,:]*np.square(self.variances)*mu2_S

View file

@ -1,108 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
class linear_ARD(kernpart):
"""
Linear ARD kernel
:param D: the number of input dimensions
:type D: int
:param variances: ARD variances
:type variances: None|np.ndarray
"""
def __init__(self,D,variances=None):
self.D = D
if variances is not None:
assert variances.shape==(self.D,)
else:
variances = np.ones(self.D)
self.Nparam = int(self.D)
self.name = 'linear'
self.set_param(variances)
def get_param(self):
return self.variances
def set_param(self,x):
assert x.size==(self.Nparam)
self.variances = x
def get_param_names(self):
if self.D==1:
return ['variance']
else:
return ['variance_%i'%i for i in range(self.variances.size)]
def K(self,X,X2,target):
XX = X*np.sqrt(self.variances)
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
def Kdiag(self,X,target):
np.add(target,np.sum(self.variances*np.square(X),-1),target)
def dK_dtheta(self,partial,X,X2,target):
product = X[:,None,:]*X2[None,:,:]
target += (partial[:,:,None]*product).sum(0).sum(0)
def dK_dX(self,partial,X,X2,target):
target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0)
def psi0(self,Z,mu,S,target):
expected = np.square(mu) + S
np.add(target,np.sum(self.variances*expected),target)
def dpsi0_dtheta(self,Z,mu,S,target):
expected = np.square(mu) + S
return -2.*np.sum(expected,0)
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
np.add(target_mu,2*mu*self.variances,target_mu)
np.add(target_S,self.variances,target_S)
def dpsi0_dZ(self,Z,mu,S,target):
pass
def psi1(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.K(mu,Z,target)
def dpsi1_dtheta(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.dK_dtheta(mu,Z,target)
def dpsi1_dmuS(self,Z,mu,S,target_mu,target_S):
"""Do nothing for S, it does not affect psi1"""
np.add(target_mu,Z/self.variances2,target_mu)
def dpsi1_dZ(self,Z,mu,S,target):
self.dK_dX(mu,Z,target)
def psi2(self,Z,mu,S,target):
"""Think N,M,M,Q """
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
psi2 = ZZ*np.square(self.variances)*mu2_S
np.add(target, psi2.sum(-1),target) # M,M
def dpsi2_dtheta(self,Z,mu,S,target):
mu2_S = np.square(mu)+S# N,Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
target += 2.*ZZ*mu2_S*self.variances
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
mu2_S = np.sum(np.square(mu)+S,0)# Q,
ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
tmp = ZZ*np.square(self.variances) # M,M,Q
np.add(target_mu, tmp*2.*mu[:,None,None,:],target_mu) #N,M,M,Q
np.add(target_S, tmp, target_S) #N,M,M,Q
def dpsi2_dZ(self,Z,mu,S,target):
mu2_S = np.sum(np.square(mu)+S,0)# Q,
target += Z[:,None,:]*np.square(self.variances)*mu2_S

View file

@ -0,0 +1,172 @@
from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
class periodic_Matern32(kernpart):
"""
Kernel of the periodic subspace (up to a given frequency) of a Matern 3/2 RKHS. Only defined for D=1.
:param D: the number of input dimensions
:type D: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (D,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self,D=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
assert D==1
self.name = 'periodic_Mat32'
self.D = D
if lengthscale is not None:
assert lengthscale.shape==(self.D,)
else:
lengthscale = np.ones(self.D)
self.lower,self.upper = lower, upper
self.Nparam = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
self.a = [3./self.lengthscale**2, 2*np.sqrt(3)/self.lengthscale, 1.]
self.b = [1,self.lengthscale**2/3]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
return(self.lengthscale**3/(12*np.sqrt(3)*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T) + self.lengthscale**2/(3.*self.variance)*np.dot(F1lower,F1lower.T))
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
np.add(mdot(FX,self.Gi,FX2.T), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega,self.a[2]*self.basis_omega**2))
Lo = np.column_stack((self.basis_omega,self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-6/self.lengthscale**3,-2*np.sqrt(3)/self.lengthscale**2,0.]
db_dlen = [0.,2*self.lengthscale/3.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega,da_dlen[2]*self.basis_omega**2))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = self.lengthscale**2/(4*np.sqrt(3))*Gint + self.lengthscale**3/(12*np.sqrt(3))*dGint_dlen + db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F1lower,F1lower.T)
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale**3/(12*np.sqrt(3))*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)) + self.b[1]*(np.dot(dF1lower_dper,F1lower.T)+np.dot(F1lower,dF1lower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial)

View file

@ -0,0 +1,184 @@
from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
class periodic_Matern52(kernpart):
"""
Kernel of the periodic subspace (up to a given frequency) of a Matern 5/2 RKHS. Only defined for D=1.
:param D: the number of input dimensions
:type D: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (D,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self,D=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
assert D==1
self.name = 'periodic_Mat52'
self.D = D
if lengthscale is not None:
assert lengthscale.shape==(self.D,)
else:
lengthscale = np.ones(self.D)
self.lower,self.upper = lower, upper
self.Nparam = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
self.a = [5*np.sqrt(5)/self.lengthscale**3, 15./self.lengthscale**2,3*np.sqrt(5)/self.lengthscale, 1.]
self.b = [9./8, 9*self.lengthscale**4/200., 3*self.lengthscale**2/5., 3*self.lengthscale**2/(5*8.), 3*self.lengthscale**2/(5*8.)]
self.basis_alpha = np.ones((2*self.n_freq,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
lower_terms = self.b[0]*np.dot(Flower,Flower.T) + self.b[1]*np.dot(F2lower,F2lower.T) + self.b[2]*np.dot(F1lower,F1lower.T) + self.b[3]*np.dot(F2lower,Flower.T) + self.b[4]*np.dot(Flower,F2lower.T)
return(3*self.lengthscale**5/(400*np.sqrt(5)*self.variance) * Gint + 1./self.variance*lower_terms)
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
np.add(mdot(FX,self.Gi,FX2.T), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)), self.a[1]*self.basis_omega, self.a[2]*self.basis_omega**2, self.a[3]*self.basis_omega**3))
Lo = np.column_stack((self.basis_omega, self.basis_omega, self.basis_omega, self.basis_omega))
Lp = np.column_stack((self.basis_phi, self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
F1lower = np.array(self._cos(self.basis_alpha*self.basis_omega,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
F2lower = np.array(self._cos(self.basis_alpha*self.basis_omega**2,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-3*self.a[0]/self.lengthscale, -2*self.a[1]/self.lengthscale, -self.a[2]/self.lengthscale, 0.]
db_dlen = [0., 4*self.b[1]/self.lengthscale, 2*self.b[2]/self.lengthscale, 2*self.b[3]/self.lengthscale, 2*self.b[4]/self.lengthscale]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)), da_dlen[1]*self.basis_omega, da_dlen[2]*self.basis_omega**2, da_dlen[3]*self.basis_omega**3))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dlower_terms_dlen = db_dlen[0]*np.dot(Flower,Flower.T) + db_dlen[1]*np.dot(F2lower,F2lower.T) + db_dlen[2]*np.dot(F1lower,F1lower.T) + db_dlen[3]*np.dot(F2lower,Flower.T) + db_dlen[4]*np.dot(Flower,F2lower.T)
dG_dlen = 15*self.lengthscale**4/(400*np.sqrt(5))*Gint + 3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dlen + dlower_terms_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period, -self.a[2]*self.basis_omega**3/self.period, -self.a[3]*self.basis_omega**4/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi,self.basis_phi+np.pi*3/2,self.basis_phi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period, -2*self.a[2]*self.basis_omega**2/self.period, -3*self.a[3]*self.basis_omega**3/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2, self.basis_phi+np.pi, self.basis_phi+np.pi*3/2))
r2,omega2,phi2 = self._cos_factorization(dLa_dper2,Lo[:,0:2],dLp_dper2)
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF1lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower)+self._cos(-self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dF2lower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega**3/self.period,self.basis_omega,self.basis_phi+np.pi*3/2)(self.lower) + self._cos(-2*self.basis_alpha*self.basis_omega**2/self.period,self.basis_omega,self.basis_phi+np.pi)(self.lower))[:,None]
dlower_terms_dper = self.b[0] * (np.dot(dFlower_dper,Flower.T) + np.dot(Flower.T,dFlower_dper))
dlower_terms_dper += self.b[1] * (np.dot(dF2lower_dper,F2lower.T) + np.dot(F2lower,dF2lower_dper.T)) - 4*self.b[1]/self.period*np.dot(F2lower,F2lower.T)
dlower_terms_dper += self.b[2] * (np.dot(dF1lower_dper,F1lower.T) + np.dot(F1lower,dF1lower_dper.T)) - 2*self.b[2]/self.period*np.dot(F1lower,F1lower.T)
dlower_terms_dper += self.b[3] * (np.dot(dF2lower_dper,Flower.T) + np.dot(F2lower,dFlower_dper.T)) - 2*self.b[3]/self.period*np.dot(F2lower,Flower.T)
dlower_terms_dper += self.b[4] * (np.dot(dFlower_dper,F2lower.T) + np.dot(Flower,dF2lower_dper.T)) - 2*self.b[4]/self.period*np.dot(Flower,F2lower.T)
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial)

View file

@ -0,0 +1,169 @@
from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
class periodic_exponential(kernpart):
"""
Kernel of the periodic subspace (up to a given frequency) of a exponential (Matern 1/2) RKHS. Only defined for D=1.
:param D: the number of input dimensions
:type D: int
:param variance: the variance of the Matern kernel
:type variance: float
:param lengthscale: the lengthscale of the Matern kernel
:type lengthscale: np.ndarray of size (D,)
:param period: the period
:type period: float
:param n_freq: the number of frequencies considered for the periodic subspace
:type n_freq: int
:rtype: kernel object
"""
def __init__(self,D=1,variance=1.,lengthscale=None,period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
assert D==1
self.name = 'periodic_exp'
self.D = D
if lengthscale is not None:
assert lengthscale.shape==(self.D,)
else:
lengthscale = np.ones(self.D)
self.lower,self.upper = lower, upper
self.Nparam = 3
self.n_freq = n_freq
self.n_basis = 2*n_freq
self._set_params(np.hstack((variance,lengthscale,period)))
def _cos(self,alpha,omega,phase):
def f(x):
return alpha*np.cos(omega*x+phase)
return f
def _cos_factorization(self,alpha,omega,phase):
r1 = np.sum(alpha*np.cos(phase),axis=1)[:,None]
r2 = np.sum(alpha*np.sin(phase),axis=1)[:,None]
r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
#Gint2[0,0] = 2.*(self.upper-self.lower)*np.cos(phi1[0,0])*np.cos(phi2[0,0])
Gint = np.dot(r1,r2.T)/2 * np.where(np.isnan(Gint1),Gint2,Gint1)
return Gint
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.variance,self.lengthscale,self.period))
def _set_params(self,x):
"""set the value of the parameters."""
assert x.size==3
self.variance = x[0]
self.lengthscale = x[1]
self.period = x[2]
self.a = [1./self.lengthscale, 1.]
self.b = [1]
self.basis_alpha = np.ones((self.n_basis,))
self.basis_omega = np.array(sum([[i*2*np.pi/self.period]*2 for i in range(1,self.n_freq+1)],[]))
self.basis_phi = np.array(sum([[-np.pi/2, 0.] for i in range(1,self.n_freq+1)],[]))
self.G = self.Gram_matrix()
self.Gi = np.linalg.inv(self.G)
def _get_param_names(self):
"""return parameter names."""
return ['variance','lengthscale','period']
def Gram_matrix(self):
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
return(self.lengthscale/(2*self.variance) * Gint + 1./self.variance*np.dot(Flower,Flower.T))
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
if X2 is None:
FX2 = FX
else:
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
np.add(mdot(FX,self.Gi,FX2.T), target,target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
FX2 = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X2)
La = np.column_stack((self.a[0]*np.ones((self.n_basis,1)),self.a[1]*self.basis_omega))
Lo = np.column_stack((self.basis_omega,self.basis_omega))
Lp = np.column_stack((self.basis_phi,self.basis_phi+np.pi/2))
r,omega,phi = self._cos_factorization(La,Lo,Lp)
Gint = self._int_computation( r,omega,phi, r,omega,phi)
Flower = np.array(self._cos(self.basis_alpha,self.basis_omega,self.basis_phi)(self.lower))[:,None]
#dK_dvar
dK_dvar = 1./self.variance*mdot(FX,self.Gi,FX2.T)
#dK_dlen
da_dlen = [-1./self.lengthscale**2,0.]
dLa_dlen = np.column_stack((da_dlen[0]*np.ones((self.n_basis,1)),da_dlen[1]*self.basis_omega))
r1,omega1,phi1 = self._cos_factorization(dLa_dlen,Lo,Lp)
dGint_dlen = self._int_computation(r1,omega1,phi1, r,omega,phi)
dGint_dlen = dGint_dlen + dGint_dlen.T
dG_dlen = 1./2*Gint + self.lengthscale/2*dGint_dlen
dK_dlen = -mdot(FX,self.Gi,dG_dlen/self.variance,self.Gi,FX2.T)
#dK_dper
dFX_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X ,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X)
dFX2_dper = self._cos(-self.basis_alpha[None,:]*self.basis_omega[None,:]/self.period*X2,self.basis_omega[None,:],self.basis_phi[None,:]+np.pi/2)(X2)
dLa_dper = np.column_stack((-self.a[0]*self.basis_omega/self.period, -self.a[1]*self.basis_omega**2/self.period))
dLp_dper = np.column_stack((self.basis_phi+np.pi/2,self.basis_phi+np.pi))
r1,omega1,phi1 = self._cos_factorization(dLa_dper,Lo,dLp_dper)
IPPprim1 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi/2))
IPPprim1 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + 1./(omega-omega1.T)*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi/2))
IPPprim2 = self.upper*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi/2) + self.upper*np.cos(phi-phi1.T))
IPPprim2 -= self.lower*(1./(omega+omega1.T)*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi/2) + self.lower*np.cos(phi-phi1.T))
#IPPprim2[0,0] = 2*(self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPprim = np.where(np.isnan(IPPprim1),IPPprim2,IPPprim1)
IPPint1 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.upper+phi-phi1.T-np.pi)
IPPint1 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./(omega-omega1.T)**2*np.cos((omega-omega1.T)*self.lower+phi-phi1.T-np.pi)
IPPint2 = 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.upper+phi+phi1.T-np.pi) + 1./2*self.upper**2*np.cos(phi-phi1.T)
IPPint2 -= 1./(omega+omega1.T)**2*np.cos((omega+omega1.T)*self.lower+phi+phi1.T-np.pi) + 1./2*self.lower**2*np.cos(phi-phi1.T)
#IPPint2[0,0] = (self.upper**2 - self.lower**2)*np.cos(phi[0,0])*np.cos(phi1[0,0])
IPPint = np.where(np.isnan(IPPint1),IPPint2,IPPint1)
dLa_dper2 = np.column_stack((-self.a[1]*self.basis_omega/self.period))
dLp_dper2 = np.column_stack((self.basis_phi+np.pi/2))
r2,omega2,phi2 = dLa_dper2.T,Lo[:,0:1],dLp_dper2.T
dGint_dper = np.dot(r,r1.T)/2 * (IPPprim - IPPint) + self._int_computation(r2,omega2,phi2, r,omega,phi)
dGint_dper = dGint_dper + dGint_dper.T
dFlower_dper = np.array(self._cos(-self.lower*self.basis_alpha*self.basis_omega/self.period,self.basis_omega,self.basis_phi+np.pi/2)(self.lower))[:,None]
dG_dper = 1./self.variance*(self.lengthscale/2*dGint_dper + self.b[0]*(np.dot(dFlower_dper,Flower.T)+np.dot(Flower,dFlower_dper.T)))
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial)
#np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial)
#np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial)

View file

@ -8,46 +8,68 @@ import hashlib
class rbf(kernpart): class rbf(kernpart):
""" """
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel. Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel:
.. math:: .. math::
k(r) = \sigma^2 \exp(- \frac{r^2}{2\ell}) \qquad \qquad \\text{ where } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}} k(r) = \sigma^2 \exp(- \frac{1}{2}r^2) \qquad \qquad \\text{ where } r^2 = \sum_{i=1}^d \frac{ (x_i-x^\prime_i)^2}{\ell_i^2}}
where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input. where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param D: the number of input dimensions :param D: the number of input dimensions
:type D: int :type D: int
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
:param lengthscale: the lengthscale of the kernel :param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: float :type lengthscale: np.ndarray od size (1,) or (D,) depending on ARD
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: for rbf with different lengthscale on each dimension, see rbf_ARD .. Note: for rbf with different lengthscale on each dimension, see rbf_ARD
""" """
def __init__(self,D,variance=1.,lengthscale=1.): def __init__(self,D,variance=1.,lengthscale=None,ARD=False):
self.D = D self.D = D
self.ARD = ARD
if not ARD:
self.Nparam = 2 self.Nparam = 2
self.name = 'rbf' self.name = 'rbf'
self.set_param(np.hstack((variance,lengthscale))) if lengthscale is not None:
assert lengthscale.shape == (1,)
else:
lengthscale = np.ones(1)
else:
self.Nparam = self.D + 1
self.name = 'rbf_ARD'
if lengthscale is not None:
assert lengthscale.shape == (self.D,)
else:
lengthscale = np.ones(self.D)
self._set_params(np.hstack((variance,lengthscale)))
#initialize cache #initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1)) self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1)) self._X, self._X2, self._params = np.empty(shape=(3,1))
def get_param(self): def _get_params(self):
return np.hstack((self.variance,self.lengthscale)) return np.hstack((self.variance,self.lengthscale))
def set_param(self,x): def _set_params(self,x):
self.variance, self.lengthscale = x assert x.size==(self.Nparam)
self.variance = x[0]
self.lengthscale = x[1:]
self.lengthscale2 = np.square(self.lengthscale) self.lengthscale2 = np.square(self.lengthscale)
#reset cached results #reset cached results
self._X, self._X2, self._params = np.empty(shape=(3,1)) self._X, self._X2, self._params = np.empty(shape=(3,1))
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param_names(self): def _get_param_names(self):
if self.Nparam == 2:
return ['variance','lengthscale'] return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target): def K(self,X,X2,target):
if X2 is None: if X2 is None:
@ -61,7 +83,12 @@ class rbf(kernpart):
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,partial,X,X2,target):
self._K_computations(X,X2) self._K_computations(X,X2)
target[0] += np.sum(self._K_dvar*partial) target[0] += np.sum(self._K_dvar*partial)
target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial) if self.ARD == True:
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
else:
target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*partial)
#np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,partial,X,target):
#NB: derivative of diagonal elements wrt lengthscale is 0 #NB: derivative of diagonal elements wrt lengthscale is 0
@ -76,20 +103,10 @@ class rbf(kernpart):
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,partial,X,target):
pass pass
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)): #---------------------------------------#
self._X = X # PSI statistics #
self._X2 = X2 #---------------------------------------#
if X2 is None: X2 = X
XXT = np.dot(X,X2.T)
if X is X2:
self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
else:
self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])/self.lengthscale2
# TODO Remove comments if this is fine.
# Commented out by Neil as doesn't seem to be used elsewhere.
#self._K_exponent = -0.5*self._K_dist2
self._K_dvar = np.exp(-0.5*self._K_dist2)
def psi0(self,Z,mu,S,target): def psi0(self,Z,mu,S,target):
target += self.variance target += self.variance
@ -109,7 +126,11 @@ class rbf(kernpart):
denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:]) denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv) d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
target[0] += np.sum(partial*self._psi1/self.variance) target[0] += np.sum(partial*self._psi1/self.variance)
target[1] += np.sum(d_length*partial[:,:,None]) dpsi1_dlength = d_length*partial[:,:,None]
if not self.ARD:
target[1] += dpsi1_dlength.sum()
else:
target[1:] += dpsi1_dlength.sum(0).sum(0)
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
@ -125,30 +146,52 @@ class rbf(kernpart):
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise) target += self._psi2
def dpsi2_dtheta(self,partial,Z,mu,S,target): def dpsi2_dtheta(self,partial,Z,mu,S,target):
"""Shape N,M,M,Ntheta""" """Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
d_var = np.sum(2.*self._psi2/self.variance,0) d_var = 2.*self._psi2/self.variance
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
d_length = d_length.sum(0) d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var) target[0] += np.sum(partial*d_var)
target[1] += np.sum(d_length*partial[:,:,None]) dpsi2_dlength = d_length*partial[:,:,:,None]
if not self.ARD:
target[1] += dpsi2_dlength.sum()
else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2) dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[None,:,:,None]*dZ).sum(0).sum(0) target += (partial[:,:,:,None]*dZ).sum(0).sum(0) # <----------------- TODO not sure about the first ':' here, should be a None (WAS a none in the debug branch)
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (partial[None,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial[None,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
self._X = X
self._X2 = X2
if X2 is None: X2 = X
self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
self._params = np.empty(shape=(1,0)) #ensure the next section gets called
if not np.all(self._params == self._get_params()):
self._params == self._get_params()
self._K_dist2 = np.square(self._K_dist/self.lengthscale)
self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
def _psi_computations(self,Z,mu,S): def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2 #here are the "statistics" for psi1 and psi2

View file

@ -1,253 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
import hashlib
class rbf_ARD(kernpart):
def __init__(self,D,variance=1.,lengthscales=None):
"""
Arguments
----------
D: int - the number of input dimensions
variance: float
lengthscales : np.ndarray of shape (D,)
"""
self.D = D
if lengthscales is not None:
assert lengthscales.shape==(self.D,)
else:
lengthscales = np.ones(self.D)
self.Nparam = self.D + 1
self.name = 'rbf_ARD'
self.set_param(np.hstack((variance,lengthscales)))
#initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1))
def get_param(self):
return np.hstack((self.variance,self.lengthscales))
def set_param(self,x):
assert x.size==(self.D+1)
self.variance = x[0]
self.lengthscales = x[1:]
self.lengthscales2 = np.square(self.lengthscales)
#reset cached results
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def get_param_names(self):
if self.D==1:
return ['variance','lengthscale']
else:
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscales.size)]
def K(self,X,X2,target):
self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target)
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target):
self._K_computations(X,X2)
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscales
target[0] += np.sum(self._K_dvar*partial)
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0)
def dKdiag_dtheta(self,X,target):
target[0] += np.sum(partial)
def dK_dX(self,partial,X,X2,target):
self._K_computations(X,X2)
dZ = self.variance*self._K_dvar[:,:,None]*self._K_dist/self.lengthscales2
dK_dX = -dZ.transpose(1,0,2)
target += np.sum(dK_dX*partial.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target):
pass
def psi0(self,Z,mu,S,target):
target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += np.sum(partial)
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
np.add(target, self._psi1,target)
def dpsi1_dtheta(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S)
denom_deriv = S[:,None,:]/(self.lengthscales**3+self.lengthscales*S[:,None,:])
d_length = self._psi1[:,:,None]*(self.lengthscales*np.square(self._psi1_dist/(self.lengthscales2+S[:,None,:])) + denom_deriv)
target[0] += np.sum(partial*self._psi1/self.variance)
target[1:] += (d_length*partial[:,:,None]).sum(0).sum(0)
def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S)
# np.add(target,-self._psi1[:,:,None]*self._psi1_dist/self.lengthscales2/self._psi1_denom,target)
denominator = (self.lengthscales2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""return shapes are N,M,Q"""
self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscales2/self._psi1_denom
target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += self._psi2
def dpsi2_dtheta(self,partial,Z,mu,S,target):
"""Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S)
d_var = 2.*self._psi2/self.variance
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscales2)/(self.lengthscales*self._psi2_denom)
# d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var)
target[1:] += (d_length*partial[:,:,:,None]).sum(0).sum(0).sum(0)
def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S)
term1 = 0.5*self._psi2_Zdist/self.lengthscales2 # M, M, Q
term2 = self._psi2_mudist/self._psi2_denom/self.lengthscales2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[:,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscales2/self._psi2_denom
target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
self._X = X
self._X2 = X2
if X2 is None: X2 = X
self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
self._params = np.empty(shape=(1,0))#ensure the next section gets called
if not np.all(self._params == self.get_param()):
self._params == self.get_param()
self._K_dist2 = np.square(self._K_dist/self.lengthscales)
self._K_exponent = -0.5*self._K_dist2.sum(-1)
self._K_dvar = np.exp(-0.5*self._K_dist2.sum(-1))
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
#Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscales2 # M,M,Q
self._Z = Z
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
#something's changed. recompute EVERYTHING
#psi1
self._psi1_denom = S[:,None,:]/self.lengthscales2 + 1.
self._psi1_dist = Z[None,:,:]-mu[:,None,:]
self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscales2/self._psi1_denom
self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
self._psi1 = self.variance*np.exp(self._psi1_exponent)
#psi2
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscales2+1. # N,M,M,Q
self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscales2*self._psi2_denom)
self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S
if __name__=='__main__':
#run some simple tests on the kernel (TODO:move these to unititest)
#TODO: these are broken in this new structure!
N = 10
M = 5
Q = 3
Z = np.random.randn(M,Q)
mu = np.random.randn(N,Q)
S = np.random.rand(N,Q)
var = 2.5
lengthscales = np.ones(Q)*0.7
k = rbf(Q,var,lengthscales)
from checkgrad import checkgrad
def k_theta_test(param,k):
k.set_param(param)
K = k.K(Z)
dK_dtheta = k.dK_dtheta(Z)
f = np.sum(K)
df = dK_dtheta.sum(0).sum(0)
return f,np.array(df)
print "dk_dtheta_test"
checkgrad(k_theta_test,np.random.randn(1+Q),args=(k,))
def psi1_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[0].sum(1)
return f,df.flatten()
print "psi1_mu_test"
checkgrad(psi1_mu_test,np.random.randn(N*Q),args=(k,))
def psi1_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi1(Z,mu,S))
df = k.dpsi1_dmuS(Z,mu,S)[1].sum(1)
return f,df.flatten()
print "psi1_S_test"
checkgrad(psi1_S_test,np.random.rand(N*Q),args=(k,))
def psi1_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi1(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi1_dtheta(Z,mu,S)])
return f,df
print "psi1_theta_test"
checkgrad(psi1_theta_test,np.random.rand(1+Q),args=(k,))
def psi2_mu_test(mu,k):
mu = mu.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[0].sum(1).sum(1)
return f,df.flatten()
print "psi2_mu_test"
checkgrad(psi2_mu_test,np.random.randn(N*Q),args=(k,))
def psi2_S_test(S,k):
S = S.reshape(N,Q)
f = np.sum(k.psi2(Z,mu,S))
df = k.dpsi2_dmuS(Z,mu,S)[1].sum(1).sum(1)
return f,df.flatten()
print "psi2_S_test"
checkgrad(psi2_S_test,np.random.rand(N*Q),args=(k,))
def psi2_theta_test(theta,k):
k.set_param(theta)
f = np.sum(k.psi2(Z,mu,S))
df = np.array([np.sum(grad) for grad in k.dpsi2_dtheta(Z,mu,S)])
return f,df
print "psi2_theta_test"
checkgrad(psi2_theta_test,np.random.rand(1+Q),args=(k,))

View file

@ -25,15 +25,15 @@ class spline(kernpart):
assert self.D==1 assert self.D==1
self.Nparam = 1 self.Nparam = 1
self.name = 'spline' self.name = 'spline'
self.set_param(np.squeeze(variance)) self._set_params(np.squeeze(variance))
def get_param(self): def _get_params(self):
return self.variance return self.variance
def set_param(self,x): def _set_params(self,x):
self.variance = x self.variance = x
def get_param_names(self): def _get_param_names(self):
return ['variance'] return ['variance']
def K(self,X,X2,target): def K(self,X,X2,target):

View file

@ -44,7 +44,7 @@ class spkern(kernpart):
if param is None: if param is None:
param = np.ones(self.Nparam) param = np.ones(self.Nparam)
assert param.size==self.Nparam assert param.size==self.Nparam
self.set_param(param) self._set_params(param)
#Differentiate! #Differentiate!
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta] self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
@ -247,12 +247,12 @@ class spkern(kernpart):
Z = X Z = X
weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs) weave.inline(self._dKdiag_dX_code,arg_names=['target','X','Z','param','partial'],**self.weave_kwargs)
def set_param(self,param): def _set_params(self,param):
#print param.flags['C_CONTIGUOUS'] #print param.flags['C_CONTIGUOUS']
self._param = param.copy() self._param = param.copy()
def get_param(self): def _get_params(self):
return self._param return self._param
def get_param_names(self): def _get_param_names(self):
return [x.name for x in self._sp_theta] return [x.name for x in self._sp_theta]

View file

@ -17,16 +17,16 @@ class white(kernpart):
self.D = D self.D = D
self.Nparam = 1 self.Nparam = 1
self.name = 'white' self.name = 'white'
self.set_param(np.array([variance]).flatten()) self._set_params(np.array([variance]).flatten())
def get_param(self): def _get_params(self):
return self.variance return self.variance
def set_param(self,x): def _set_params(self,x):
assert x.shape==(1,) assert x.shape==(1,)
self.variance = x self.variance = x
def get_param_names(self): def _get_param_names(self):
return ['variance'] return ['variance']
def K(self,X,X2,target): def K(self,X,X2,target):

View file

@ -28,12 +28,12 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs) sparse_GP_regression.__init__(self, X, Y, X_uncertainty = S, **kwargs)
def get_param_names(self): def _get_param_names(self):
X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) X_names = sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) S_names = sum([['S_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
return (X_names + S_names + sparse_GP_regression.get_param_names(self)) return (X_names + S_names + sparse_GP_regression._get_param_names(self))
def get_param(self): def _get_params(self):
""" """
Horizontally stacks the parameters in order to present them to the optimizer. Horizontally stacks the parameters in order to present them to the optimizer.
The resulting 1-D array has this structure: The resulting 1-D array has this structure:
@ -43,13 +43,13 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
=============================================================== ===============================================================
""" """
return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression.get_param(self))) return np.hstack((self.X.flatten(), self.X_uncertainty.flatten(), sparse_GP_regression._get_params(self)))
def set_param(self,x): def _set_params(self,x):
N, Q = self.N, self.Q N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N,Q).copy() self.X = x[:self.X.size].reshape(N,Q).copy()
self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy() self.X_uncertainty = x[(N*Q):(2*N*Q)].reshape(N,Q).copy()
sparse_GP_regression.set_param(self, x[(2*N*Q):]) sparse_GP_regression._set_params(self, x[(2*N*Q):])
def dL_dmuS(self): def dL_dmuS(self):
dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty) dL_dmu_psi0, dL_dS_psi0 = self.kern.dpsi1_dmuS(self.dL_dpsi1,self.Z,self.X,self.X_uncertainty)
@ -60,5 +60,6 @@ class Bayesian_GPLVM(sparse_GP_regression, GPLVM):
return np.hstack((dL_dmu.flatten(), dL_dS.flatten())) return np.hstack((dL_dmu.flatten(), dL_dS.flatten()))
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression.log_likelihood_gradients(self))) return np.hstack((self.dL_dmuS().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))

View file

@ -33,18 +33,18 @@ class GPLVM(GP_regression):
else: else:
return np.random.randn(Y.shape[0], Q) return np.random.randn(Y.shape[0], Q)
def get_param_names(self): def _get_param_names(self):
return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
+ self.kern.extract_param_names()) + self.kern._get_param_names_transformed())
def get_param(self): def _get_params(self):
return np.hstack((self.X.flatten(), self.kern.extract_param())) return np.hstack((self.X.flatten(), self.kern._get_params_transformed()))
def set_param(self,x): def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy() self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
GP_regression.set_param(self, x[self.X.size:]) GP_regression._set_params(self, x[self.X.size:])
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dL_dK = self.dL_dK() dL_dK = self.dL_dK()
dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X) dL_dtheta = self.kern.dK_dtheta(dL_dK,self.X)

View file

@ -41,14 +41,14 @@ class GP_EP(model):
self.K = self.kernel.K(self.X) self.K = self.kernel.K(self.X)
model.__init__(self) model.__init__(self)
def set_param(self,p): def _set_params(self,p):
self.kernel.expand_param(p) self.kernel._set_params_transformed(p)
def get_param(self): def _get_params(self):
return self.kernel.extract_param() return self.kernel._get_params_transformed()
def get_param_names(self): def _get_param_names(self):
return self.kernel.extract_param_names() return self.kernel._get_param_names_transformed()
def approximate_likelihood(self): def approximate_likelihood(self):
self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta]) self.ep_approx = Full(self.K,self.likelihood,epsilon=self.epsilon_ep,powerep=[self.eta,self.delta])
@ -78,7 +78,7 @@ class GP_EP(model):
L3 = sum(np.log(self.ep_approx.Z_hat)) L3 = sum(np.log(self.ep_approx.Z_hat))
return L1 + L2A + L2B + L3 return L1 + L2A + L2B + L3
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dK_dp = self.kernel.dK_dtheta(self.X) dK_dp = self.kernel.dK_dtheta(self.X)
self.dK_dp = dK_dp self.dK_dp = dK_dp
aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1) aux1,info_1 = linalg.flapack.dtrtrs(self.L,np.dot(self.Sroot_tilde_K,self.ep_approx.v_tilde),lower=1)
@ -138,7 +138,7 @@ class GP_EP(model):
""" """
self.epsilon_em = epsilon self.epsilon_em = epsilon
log_likelihood_change = self.epsilon_em + 1. log_likelihood_change = self.epsilon_em + 1.
self.parameters_path = [self.kernel.get_param()] self.parameters_path = [self.kernel._get_params()]
self.approximate_likelihood() self.approximate_likelihood()
self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]] self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
self.log_likelihood_path = [self.log_likelihood()] self.log_likelihood_path = [self.log_likelihood()]
@ -150,11 +150,11 @@ class GP_EP(model):
log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1] log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
if log_likelihood_change < 0: if log_likelihood_change < 0:
print 'log_likelihood decrement' print 'log_likelihood decrement'
self.kernel.expand_param(self.parameters_path[-1]) self.kernel._set_params_transformed(self.parameters_path[-1])
self.kernM.expand_param(self.parameters_path[-1]) self.kernM._set_params_transformed(self.parameters_path[-1])
else: else:
self.approximate_likelihood() self.approximate_likelihood()
self.log_likelihood_path.append(self.log_likelihood()) self.log_likelihood_path.append(self.log_likelihood())
self.parameters_path.append(self.kernel.get_param()) self.parameters_path.append(self.kernel._get_params())
self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde]) self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
iteration += 1 iteration += 1

View file

@ -63,47 +63,47 @@ class GP_regression(model):
self._Ystd = np.ones((1,self.Y.shape[1])) self._Ystd = np.ones((1,self.Y.shape[1]))
if self.D > self.N: if self.D > self.N:
# then it's more efficient to store Youter # then it's more efficient to store YYT
self.Youter = np.dot(self.Y, self.Y.T) self.YYT = np.dot(self.Y, self.Y.T)
else: else:
self.Youter = None self.YYT = None
model.__init__(self) model.__init__(self)
def set_param(self,p): def _set_params(self,p):
self.kern.expand_param(p) self.kern._set_params_transformed(p)
self.K = self.kern.K(self.X,slices1=self.Xslices) self.K = self.kern.K(self.X,slices1=self.Xslices)
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K) self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
def get_param(self): def _get_params(self):
return self.kern.extract_param() return self.kern._get_params_transformed()
def get_param_names(self): def _get_param_names(self):
return self.kern.extract_param_names() return self.kern._get_param_names_transformed()
def _model_fit_term(self): def _model_fit_term(self):
""" """
Computes the model fit using Youter if it's available Computes the model fit using YYT if it's available
""" """
if self.Youter is None: if self.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.Y))) return -0.5*np.sum(np.square(np.dot(self.Li,self.Y)))
else: else:
return -0.5*np.sum(np.multiply(self.Ki, self.Youter)) return -0.5*np.sum(np.multiply(self.Ki, self.YYT))
def log_likelihood(self): def log_likelihood(self):
complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet complexity_term = -0.5*self.N*self.D*np.log(2.*np.pi) - 0.5*self.D*self.K_logdet
return complexity_term + self._model_fit_term() return complexity_term + self._model_fit_term()
def dL_dK(self): def dL_dK(self):
if self.Youter is None: if self.YYT is None:
alpha = np.dot(self.Ki,self.Y) alpha = np.dot(self.Ki,self.Y)
dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki) dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
else: else:
dL_dK = 0.5*(mdot(self.Ki, self.Youter, self.Ki) - self.D*self.Ki) dL_dK = 0.5*(mdot(self.Ki, self.YYT, self.Ki) - self.D*self.Ki)
return dL_dK return dL_dK
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X) return self.kern.dK_dtheta(partial=self.dL_dK(),X=self.X)
def predict(self,Xnew, slices=None, full_cov=False): def predict(self,Xnew, slices=None, full_cov=False):

View file

@ -3,7 +3,7 @@
from GP_regression import GP_regression from GP_regression import GP_regression
from sparse_GP_regression import sparse_GP_regression, sgp_debugB, sgp_debugC, sgp_debugE from sparse_GP_regression import sparse_GP_regression
from GPLVM import GPLVM from GPLVM import GPLVM
from warped_GP import warpedGP from warped_GP import warpedGP
from GP_EP import GP_EP from GP_EP import GP_EP

View file

@ -42,15 +42,15 @@ class generalized_FITC(model):
self.jitter = 1e-12 self.jitter = 1e-12
model.__init__(self) model.__init__(self)
def set_param(self,p): def _set_params(self,p):
self.kernel.expand_param(p[0:-self.Z.size]) self.kernel._set_params_transformed(p[0:-self.Z.size])
self.Z = p[-self.Z.size:].reshape(self.M,self.D) self.Z = p[-self.Z.size:].reshape(self.M,self.D)
def get_param(self): def _get_params(self):
return np.hstack([self.kernel.extract_param(),self.Z.flatten()]) return np.hstack([self.kernel._get_params_transformed(),self.Z.flatten()])
def get_param_names(self): def _get_param_names(self):
return self.kernel.extract_param_names()+['iip_%i'%i for i in range(self.Z.size)] return self.kernel._get_param_names_transformed()+['iip_%i'%i for i in range(self.Z.size)]
def approximate_likelihood(self): def approximate_likelihood(self):
self.Kmm = self.kernel.K(self.Z) self.Kmm = self.kernel.K(self.Z)
@ -91,15 +91,15 @@ class generalized_FITC(model):
def log_likelihood(self): def log_likelihood(self):
self.posterior_param() self.posterior_param()
self.Youter = np.dot(self.mu_tilde,self.mu_tilde.T) self.YYT = np.dot(self.mu_tilde,self.mu_tilde.T)
A = -self.hld A = -self.hld
B = -.5*np.sum(self.Qi*self.Youter) B = -.5*np.sum(self.Qi*self.YYT)
C = sum(np.log(self.ep_approx.Z_hat)) C = sum(np.log(self.ep_approx.Z_hat))
D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_)) D = .5*np.sum(np.log(1./self.ep_approx.tau_tilde + 1./self.ep_approx.tau_))
E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde)) E = .5*np.sum((self.ep_approx.v_/self.ep_approx.tau_ - self.mu_tilde.flatten())**2/(1./self.ep_approx.tau_ + 1./self.ep_approx.tau_tilde))
return A + B + C + D + E return A + B + C + D + E
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
dKmm_dtheta = self.kernel.dK_dtheta(self.Z) dKmm_dtheta = self.kernel.dK_dtheta(self.Z)
dKnn_dtheta = self.kernel.dK_dtheta(self.X) dKnn_dtheta = self.kernel.dK_dtheta(self.X)
dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X) dKmn_dtheta = self.kernel.dK_dtheta(self.Z,self.X)
@ -214,7 +214,7 @@ class generalized_FITC(model):
""" """
self.epsilon_em = epsilon self.epsilon_em = epsilon
log_likelihood_change = self.epsilon_em + 1. log_likelihood_change = self.epsilon_em + 1.
self.parameters_path = [self.kernel.get_param()] self.parameters_path = [self.kernel._get_params()]
self.approximate_likelihood() self.approximate_likelihood()
self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]] self.site_approximations_path = [[self.ep_approx.tau_tilde,self.ep_approx.v_tilde]]
self.inducing_inputs_path = [self.Z] self.inducing_inputs_path = [self.Z]
@ -227,7 +227,7 @@ class generalized_FITC(model):
log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1] log_likelihood_change = log_likelihood_new - self.log_likelihood_path[-1]
if log_likelihood_change < 0: if log_likelihood_change < 0:
print 'log_likelihood decrement' print 'log_likelihood decrement'
self.kernel.expand_param(self.parameters_path[-1]) self.kernel._set_params_transformed(self.parameters_path[-1])
self.kernM = self.kernel.copy() self.kernM = self.kernel.copy()
slef.kernM.expand_X(self.iducing_inputs_path[-1]) slef.kernM.expand_X(self.iducing_inputs_path[-1])
self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em) self.__init__(self.kernel,self.likelihood,kernM=self.kernM,powerep=[self.eta,self.delta],epsilon_ep = self.epsilon_ep, epsilon_em = self.epsilon_em)
@ -235,7 +235,7 @@ class generalized_FITC(model):
else: else:
self.approximate_likelihood() self.approximate_likelihood()
self.log_likelihood_path.append(self.log_likelihood()) self.log_likelihood_path.append(self.log_likelihood())
self.parameters_path.append(self.kernel.get_param()) self.parameters_path.append(self.kernel._get_params())
self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde]) self.site_approximations_path.append([self.ep_approx.tau_tilde,self.ep_approx.v_tilde])
self.inducing_inputs_path.append(self.Z) self.inducing_inputs_path.append(self.Z)
iteration += 1 iteration += 1

View file

@ -27,16 +27,16 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
X = self.initialise_latent(init, Q, Y) X = self.initialise_latent(init, Q, Y)
sparse_GP_regression.__init__(self, X, Y, **kwargs) sparse_GP_regression.__init__(self, X, Y, **kwargs)
def get_param_names(self): def _get_param_names(self):
return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[]) return (sum([['X_%i_%i'%(n,q) for n in range(self.N)] for q in range(self.Q)],[])
+ sparse_GP_regression.get_param_names(self)) + sparse_GP_regression._get_param_names(self))
def get_param(self): def _get_params(self):
return np.hstack((self.X.flatten(), sparse_GP_regression.get_param(self))) return np.hstack((self.X.flatten(), sparse_GP_regression._get_params(self)))
def set_param(self,x): def _set_params(self,x):
self.X = x[:self.X.size].reshape(self.N,self.Q).copy() self.X = x[:self.X.size].reshape(self.N,self.Q).copy()
sparse_GP_regression.set_param(self, x[self.X.size:]) sparse_GP_regression._set_params(self, x[self.X.size:])
def log_likelihood(self): def log_likelihood(self):
return sparse_GP_regression.log_likelihood(self) return sparse_GP_regression.log_likelihood(self)
@ -49,8 +49,8 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
return dL_dX return dL_dX
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), sparse_GP_regression.log_likelihood_gradients(self))) return np.hstack((self.dL_dX().flatten(), sparse_GP_regression._log_likelihood_gradients(self)))
def plot(self): def plot(self):
GPLVM.plot(self) GPLVM.plot(self)

View file

@ -107,6 +107,20 @@ class sparse_GP_regression(GP_regression):
self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - np.dot(self.C, self.psi1VVpsi1), self.Kmmi) + 0.5*self.E # dD
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
self.beta = p[self.M*self.Q]
self.kern._set_params(p[self.Z.size + 1:])
self._computations()
def _get_params(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern._get_params_transformed()])
def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern._get_param_names_transformed()
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 sf2 = self.scale_factor**2
@ -116,17 +130,8 @@ class sparse_GP_regression(GP_regression):
D = +0.5*np.sum(self.psi1VVpsi1 * self.C) D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D return A+B+C+D
def set_param(self, p): def _log_likelihood_gradients(self):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q) return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
self.beta = p[self.M*self.Q]
self.kern.set_param(p[self.Z.size + 1:])
self._computations()
def get_param(self):
return np.hstack([self.Z.flatten(),self.beta,self.kern.extract_param()])
def get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for i in range(self.Z.shape[0])] for j in range(self.Z.shape[1])],[]) + ['noise_precision']+self.kern.extract_param_names()
def dL_dbeta(self): def dL_dbeta(self):
""" """
@ -172,9 +177,6 @@ class sparse_GP_regression(GP_regression):
dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X) dL_dZ += self.kern.dK_dX(dL_dpsi1,self.Z,self.X)
return dL_dZ return dL_dZ
def log_likelihood_gradients(self):
return np.hstack([self.dL_dZ().flatten(), self.dL_dbeta(), self.dL_dtheta()])
def _raw_predict(self, Xnew, slices, full_cov=False): def _raw_predict(self, Xnew, slices, full_cov=False):
"""Internal helper function for making predictions, does not account for normalisation""" """Internal helper function for making predictions, does not account for normalisation"""
@ -201,94 +203,3 @@ class sparse_GP_regression(GP_regression):
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten())) pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2: if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo') pb.plot(self.Z[:,0],self.Z[:,1],'wo')
class sgp_debugB(sparse_GP_regression):
def _computations(self):
sparse_GP_regression._computations(self)
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = np.zeros_like(self.psi1)
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*( - self.Kmmi))
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi) # dB
def log_likelihood(self):
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K
C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return B
def dL_dbeta(self):
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dB_dbeta)
class sgp_debugC(sparse_GP_regression):
def _computations(self):
sparse_GP_regression._computations(self)
# Compute dL_dpsi
self.dL_dpsi0 = np.zeros(self.N)
self.dL_dpsi1 = np.zeros_like(self.psi1)
self.dL_dpsi2 = - 0.5 * self.beta * (self.D*(self.LBL_inv))
# Compute dL_dKmm
self.dL_dKmm = -0.5 * self.D * (- self.LBL_inv - 2.*self.beta*mdot(self.LBL_inv, self.psi2, self.Kmmi) + self.Kmmi) # dC
def log_likelihood(self):
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K
C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return C
def dL_dbeta(self):
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dC_dbeta)
class sgp_debugE(sparse_GP_regression):
def _computations(self):
sparse_GP_regression._computations(self)
# Compute dL_dpsi
self.dL_dpsi0 = np.zeros(self.N)
self.dL_dpsi1 = np.zeros_like(self.psi1)
self.dL_dpsi2 = - 0.5 * self.beta * (self.G)
# Compute dL_dKmm
tmp = mdot(self.beta*self.psi2, self.Kmmi, self.psi1VVpsi1)
self.dL_dKmm = -0.5*mdot(self.Kmmi,tmp + tmp.T + self.psi1VVpsi1,self.Kmmi)
#self.dL_dKmm = np.dot(np.dot(self.G,self.beta*self.psi2) - np.dot(self.LBL_inv, self.psi1VVpsi1), self.Kmmi) + 0.5*self.G # dE
def log_likelihood(self):
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.D*self.trace_K
C = -0.5*self.D * self.B_logdet
D = -0.5*self.beta*self.trYYT
E = +0.5*np.sum(self.psi1VVpsi1 * self.LBL_inv)
return E
def dL_dbeta(self):
dA_dbeta = 0.5 * self.N*self.D/self.beta
dB_dbeta = - 0.5 * self.D * self.trace_K
dC_dbeta = - 0.5 * self.D * np.sum(self.Bi*self.A)/self.beta
dD_dbeta = - 0.5 * self.trYYT
tmp = mdot(self.LBi.T, self.LLambdai, self.psi1V)
dE_dbeta = (np.sum(np.square(self.C)) - 0.5 * np.sum(self.A * np.dot(tmp, tmp.T)))/self.beta
return np.squeeze(dE_dbeta)

View file

@ -13,7 +13,7 @@ from GP_regression import GP_regression
class warpedGP(GP_regression): class warpedGP(GP_regression):
""" """
TODO: fucking docstrings! TODO: fecking docstrings!
@nfusi: I'#ve hacked a little on this, but no guarantees. J. @nfusi: I'#ve hacked a little on this, but no guarantees. J.
""" """
@ -30,17 +30,17 @@ class warpedGP(GP_regression):
self.transform_data() self.transform_data()
GP_regression.__init__(self, X, self.Y, **kwargs) GP_regression.__init__(self, X, self.Y, **kwargs)
def set_param(self, x): def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters].reshape(self.warp_params_shape).copy() self.warping_params = x[:self.warping_function.num_parameters].reshape(self.warp_params_shape).copy()
self.transform_data() self.transform_data()
GP_regression.set_param(self, x[self.warping_function.num_parameters:].copy()) GP_regression._set_params(self, x[self.warping_function.num_parameters:].copy())
def get_param(self): def _get_params(self):
return np.hstack((self.warping_params.flatten().copy(), GP_regression.get_param(self).copy())) return np.hstack((self.warping_params.flatten().copy(), GP_regression._get_params(self).copy()))
def get_param_names(self): def _get_param_names(self):
warping_names = self.warping_function.get_param_names() warping_names = self.warping_function._get_param_names()
param_names = GP_regression.get_param_names(self) param_names = GP_regression._get_param_names(self)
return warping_names + param_names return warping_names + param_names
def transform_data(self): def transform_data(self):
@ -48,9 +48,9 @@ class warpedGP(GP_regression):
# this supports the 'smart' behaviour in GP_regression # this supports the 'smart' behaviour in GP_regression
if self.D > self.N: if self.D > self.N:
self.Youter = np.dot(self.Y, self.Y.T) self.YYT = np.dot(self.Y, self.Y.T)
else: else:
self.Youter = None self.YYT = None
return self.Y return self.Y
@ -59,8 +59,8 @@ class warpedGP(GP_regression):
jacobian = self.warping_function.fgrad_y(self.Z, self.warping_params) jacobian = self.warping_function.fgrad_y(self.Z, self.warping_params)
return ll + np.log(jacobian).sum() return ll + np.log(jacobian).sum()
def log_likelihood_gradients(self): def _log_likelihood_gradients(self):
ll_grads = GP_regression.log_likelihood_gradients(self) ll_grads = GP_regression._log_likelihood_gradients(self)
alpha = np.dot(self.Ki, self.Y.flatten()) alpha = np.dot(self.Ki, self.Y.flatten())
warping_grads = self.warping_function_gradients(alpha) warping_grads = self.warping_function_gradients(alpha)
return np.hstack((warping_grads.flatten(), ll_grads.flatten())) return np.hstack((warping_grads.flatten(), ll_grads.flatten()))
@ -81,7 +81,7 @@ class warpedGP(GP_regression):
def predict(self, X, in_unwarped_space = False, **kwargs): def predict(self, X, in_unwarped_space = False, **kwargs):
mu, var = GP_regression.predict(self, X, **kwargs) mu, var = GP_regression.predict(self, X, **kwargs)
# The plot() function calls set_param() before calling predict() # The plot() function calls _set_params() before calling predict()
# this is causing the observations to be plotted in the transformed # this is causing the observations to be plotted in the transformed
# space (where Y lives), making the plot looks very wrong # space (where Y lives), making the plot looks very wrong
# if the predictions are made in the untransformed space # if the predictions are made in the untransformed space

View file

@ -0,0 +1,61 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class PriorTests(unittest.TestCase):
def test_lognormal(self):
xmin, xmax = 1, 2.5*np.pi
b, C, SNR = 1, 0, 0.1
X = np.linspace(xmin, xmax, 500)
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y)
m.ensure_default_constraints()
lognormal = GPy.priors.log_Gaussian(1, 2)
m.set_prior('rbf', lognormal)
m.randomize()
self.assertTrue(m.checkgrad())
def test_gamma(self):
xmin, xmax = 1, 2.5*np.pi
b, C, SNR = 1, 0, 0.1
X = np.linspace(xmin, xmax, 500)
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y)
m.ensure_default_constraints()
gamma = GPy.priors.gamma(1, 1)
m.set_prior('rbf', gamma)
m.randomize()
self.assertTrue(m.checkgrad())
def test_incompatibility(self):
xmin, xmax = 1, 2.5*np.pi
b, C, SNR = 1, 0, 0.1
X = np.linspace(xmin, xmax, 500)
y = b*X + C + 1*np.sin(X)
y += 0.05*np.random.randn(len(X))
X, y = X[:, None], y[:, None]
m = GPy.models.GP_regression(X, y)
m.ensure_default_constraints()
gaussian = GPy.priors.Gaussian(1, 1)
success = False
# setting a Gaussian prior on non-negative parameters
# should raise an assertionerror.
try:
m.set_prior('rbf', gaussian)
except AssertionError:
success = True
self.assertTrue(success)
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -42,51 +42,66 @@ class GradientTests(unittest.TestCase):
# contrain all parameters to be positive # contrain all parameters to be positive
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
def test_gp_regression_rbf_white_kern_1d(self): def test_gp_regression_rbf_1d(self):
''' Testing the GP regression with rbf kernel with white kernel on 1d data ''' ''' Testing the GP regression with rbf kernel with white kernel on 1d data '''
rbf = GPy.kern.rbf(1) rbf = GPy.kern.rbf(1)
self.check_model_with_white(rbf, model_type='GP_regression', dimension=1) self.check_model_with_white(rbf, model_type='GP_regression', dimension=1)
def test_GP_regression_rbf_ARD_white_kern_2D(self): def test_GP_regression_rbf_2D(self):
''' Testing the GP regression with rbf and white kernel on 2d data '''
k = GPy.kern.rbf_ARD(2)
self.check_model_with_white(k, model_type='GP_regression', dimension=2)
def test_GP_regression_rbf_white_kern_2D(self):
''' Testing the GP regression with rbf and white kernel on 2d data ''' ''' Testing the GP regression with rbf and white kernel on 2d data '''
rbf = GPy.kern.rbf(2) rbf = GPy.kern.rbf(2)
self.check_model_with_white(rbf, model_type='GP_regression', dimension=2) self.check_model_with_white(rbf, model_type='GP_regression', dimension=2)
def test_GP_regression_matern52_kern_1D(self): def test_GP_regression_rbf_ARD_2D(self):
''' Testing the GP regression with rbf and white kernel on 2d data '''
k = GPy.kern.rbf(2,ARD=True)
self.check_model_with_white(k, model_type='GP_regression', dimension=2)
def test_GP_regression_matern52_1D(self):
''' Testing the GP regression with matern52 kernel on 1d data ''' ''' Testing the GP regression with matern52 kernel on 1d data '''
matern52 = GPy.kern.Matern52(1) matern52 = GPy.kern.Matern52(1)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=1) self.check_model_with_white(matern52, model_type='GP_regression', dimension=1)
def test_GP_regression_matern52_kern_2D(self): def test_GP_regression_matern52_2D(self):
''' Testing the GP regression with matern52 kernel on 2d data ''' ''' Testing the GP regression with matern52 kernel on 2d data '''
matern52 = GPy.kern.Matern52(2) matern52 = GPy.kern.Matern52(2)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=2) self.check_model_with_white(matern52, model_type='GP_regression', dimension=2)
def test_GP_regression_matern32_kern_1D(self): def test_GP_regression_matern52_ARD_2D(self):
''' Testing the GP regression with matern52 kernel on 2d data '''
matern52 = GPy.kern.Matern52(2,ARD=True)
self.check_model_with_white(matern52, model_type='GP_regression', dimension=2)
def test_GP_regression_matern32_1D(self):
''' Testing the GP regression with matern32 kernel on 1d data ''' ''' Testing the GP regression with matern32 kernel on 1d data '''
matern32 = GPy.kern.Matern32(1) matern32 = GPy.kern.Matern32(1)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=1) self.check_model_with_white(matern32, model_type='GP_regression', dimension=1)
def test_GP_regression_matern32_kern_2D(self): def test_GP_regression_matern32_2D(self):
''' Testing the GP regression with matern32 kernel on 2d data ''' ''' Testing the GP regression with matern32 kernel on 2d data '''
matern32 = GPy.kern.Matern32(2) matern32 = GPy.kern.Matern32(2)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=2) self.check_model_with_white(matern32, model_type='GP_regression', dimension=2)
def test_GP_regression_exponential_kern_1D(self): def test_GP_regression_matern32_ARD_2D(self):
''' Testing the GP regression with matern32 kernel on 2d data '''
matern32 = GPy.kern.Matern32(2,ARD=True)
self.check_model_with_white(matern32, model_type='GP_regression', dimension=2)
def test_GP_regression_exponential_1D(self):
''' Testing the GP regression with exponential kernel on 1d data ''' ''' Testing the GP regression with exponential kernel on 1d data '''
exponential = GPy.kern.exponential(1) exponential = GPy.kern.exponential(1)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=1) self.check_model_with_white(exponential, model_type='GP_regression', dimension=1)
def test_GP_regression_exponential_kern_2D(self): def test_GP_regression_exponential_2D(self):
''' Testing the GP regression with exponential kernel on 2d data ''' ''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2) exponential = GPy.kern.exponential(2)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=2) self.check_model_with_white(exponential, model_type='GP_regression', dimension=2)
def test_GP_regression_exponential_ARD_2D(self):
''' Testing the GP regression with exponential kernel on 2d data '''
exponential = GPy.kern.exponential(2,ARD=True)
self.check_model_with_white(exponential, model_type='GP_regression', dimension=2)
def test_GP_regression_bias_kern_1D(self): def test_GP_regression_bias_kern_1D(self):
''' Testing the GP regression with bias kernel on 1d data ''' ''' Testing the GP regression with bias kernel on 1d data '''
bias = GPy.kern.bias(1) bias = GPy.kern.bias(1)
@ -121,7 +136,7 @@ class GradientTests(unittest.TestCase):
""" Testing GPLVM with rbf + bias and white kernel """ """ Testing GPLVM with rbf + bias and white kernel """
N, Q, D = 50, 1, 2 N, Q, D = 50, 1, 2
X = np.random.rand(N, Q) X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q, 0.5, 0.9) + GPy.kern.bias(Q, 0.1) + GPy.kern.white(Q, 0.05) k = GPy.kern.rbf(Q, 0.5, 0.9*np.ones((1,))) + GPy.kern.bias(Q, 0.1) + GPy.kern.white(Q, 0.05)
K = k.K(X) K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T Y = np.random.multivariate_normal(np.zeros(N),K,D).T
m = GPy.models.GPLVM(Y, Q, kernel = k) m = GPy.models.GPLVM(Y, Q, kernel = k)
@ -151,6 +166,7 @@ class GradientTests(unittest.TestCase):
m.approximate_likelihood() m.approximate_likelihood()
self.assertTrue(m.checkgrad()) self.assertTrue(m.checkgrad())
@unittest.skip("FITC will be broken for a while")
def test_generalized_FITC(self): def test_generalized_FITC(self):
N = 20 N = 20
X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None] X = np.hstack([np.random.rand(N/2)+1,np.random.rand(N/2)-1])[:,None]

View file

@ -90,7 +90,7 @@ def toy_rbf_1d(seed=default_seed):
N = 500 N = 500
X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn)) X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn))
X.sort(axis=0) X.sort(axis=0)
rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=0.25) rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.white(numIn, variance=1e-2) white = GPy.kern.white(numIn, variance=1e-2)
kernel = rbf + white kernel = rbf + white
K = kernel.K(X) K = kernel.K(X)

View file

@ -33,7 +33,7 @@ class WarpingFunction(object):
"""inverse function transformation""" """inverse function transformation"""
raise NotImplementedError raise NotImplementedError
def get_param_names(self): def _get_param_names(self):
raise NotImplementedError raise NotImplementedError
def plot(self, psi, xmin, xmax): def plot(self, psi, xmin, xmax):
@ -151,7 +151,7 @@ class TanhWarpingFunction(WarpingFunction):
return gradients return gradients
def get_param_names(self): def _get_param_names(self):
variables = ['a', 'b', 'c'] variables = ['a', 'b', 'c']
names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[]) names = sum([['warp_tanh_%s_t%i' % (variables[n],q) for n in range(3)] for q in range(self.n_terms)],[])
return names return names

View file

@ -1,5 +1,7 @@
GPy GPy
=== ===
Gaussian processes framework in python A Gaussian processes framework in python.
* [Online documentation](https://gpy.readthedocs.org/en/latest/)
* [Unit tests (Travis-CI)](https://travis-ci.org/SheffieldML/GPy)

35
doc/GPy.core.rst Normal file
View file

@ -0,0 +1,35 @@
core Package
============
:mod:`core` Package
-------------------
.. automodule:: GPy.core
:members:
:undoc-members:
:show-inheritance:
:mod:`model` Module
-------------------
.. automodule:: GPy.core.model
:members:
:undoc-members:
:show-inheritance:
:mod:`parameterised` Module
---------------------------
.. automodule:: GPy.core.parameterised
:members:
:undoc-members:
:show-inheritance:
:mod:`priors` Module
--------------------
.. automodule:: GPy.core.priors
:members:
:undoc-members:
:show-inheritance:

35
doc/GPy.inference.rst Normal file
View file

@ -0,0 +1,35 @@
inference Package
=================
:mod:`Expectation_Propagation` Module
-------------------------------------
.. automodule:: GPy.inference.Expectation_Propagation
:members:
:undoc-members:
:show-inheritance:
:mod:`likelihoods` Module
-------------------------
.. automodule:: GPy.inference.likelihoods
:members:
:undoc-members:
:show-inheritance:
:mod:`optimization` Module
--------------------------
.. automodule:: GPy.inference.optimization
:members:
:undoc-members:
:show-inheritance:
:mod:`samplers` Module
----------------------
.. automodule:: GPy.inference.samplers
:members:
:undoc-members:
:show-inheritance:

139
doc/GPy.kern.rst Normal file
View file

@ -0,0 +1,139 @@
kern Package
============
:mod:`kern` Package
-------------------
.. automodule:: GPy.kern
:members:
:undoc-members:
:show-inheritance:
:mod:`Brownian` Module
----------------------
.. automodule:: GPy.kern.Brownian
:members:
:undoc-members:
:show-inheritance:
:mod:`Matern32` Module
----------------------
.. automodule:: GPy.kern.Matern32
:members:
:undoc-members:
:show-inheritance:
:mod:`Matern52` Module
----------------------
.. automodule:: GPy.kern.Matern52
:members:
:undoc-members:
:show-inheritance:
:mod:`bias` Module
------------------
.. automodule:: GPy.kern.bias
:members:
:undoc-members:
:show-inheritance:
:mod:`constructors` Module
--------------------------
.. automodule:: GPy.kern.constructors
:members:
:undoc-members:
:show-inheritance:
:mod:`exponential` Module
-------------------------
.. automodule:: GPy.kern.exponential
:members:
:undoc-members:
:show-inheritance:
:mod:`finite_dimensional` Module
--------------------------------
.. automodule:: GPy.kern.finite_dimensional
:members:
:undoc-members:
:show-inheritance:
:mod:`kern` Module
------------------
.. automodule:: GPy.kern.kern
:members:
:undoc-members:
:show-inheritance:
:mod:`kernpart` Module
----------------------
.. automodule:: GPy.kern.kernpart
:members:
:undoc-members:
:show-inheritance:
:mod:`linear` Module
--------------------
.. automodule:: GPy.kern.linear
:members:
:undoc-members:
:show-inheritance:
:mod:`linear_ARD` Module
------------------------
.. automodule:: GPy.kern.linear_ARD
:members:
:undoc-members:
:show-inheritance:
:mod:`rbf-testing` Module
-------------------------
.. automodule:: GPy.kern.rbf-testing
:members:
:undoc-members:
:show-inheritance:
:mod:`rbf` Module
-----------------
.. automodule:: GPy.kern.rbf
:members:
:undoc-members:
:show-inheritance:
:mod:`spline` Module
--------------------
.. automodule:: GPy.kern.spline
:members:
:undoc-members:
:show-inheritance:
:mod:`sympykern` Module
-----------------------
.. automodule:: GPy.kern.sympykern
:members:
:undoc-members:
:show-inheritance:
:mod:`white` Module
-------------------
.. automodule:: GPy.kern.white
:members:
:undoc-members:
:show-inheritance:

75
doc/GPy.models.rst Normal file
View file

@ -0,0 +1,75 @@
models Package
==============
:mod:`models` Package
---------------------
.. automodule:: GPy.models
:members:
:undoc-members:
:show-inheritance:
:mod:`GPLVM` Module
-------------------
.. automodule:: GPy.models.GPLVM
:members:
:undoc-members:
:show-inheritance:
:mod:`GP_EP` Module
-------------------
.. automodule:: GPy.models.GP_EP
:members:
:undoc-members:
:show-inheritance:
:mod:`GP_regression` Module
---------------------------
.. automodule:: GPy.models.GP_regression
:members:
:undoc-members:
:show-inheritance:
:mod:`generalized_FITC` Module
------------------------------
.. automodule:: GPy.models.generalized_FITC
:members:
:undoc-members:
:show-inheritance:
:mod:`sparse_GPLVM` Module
--------------------------
.. automodule:: GPy.models.sparse_GPLVM
:members:
:undoc-members:
:show-inheritance:
:mod:`sparse_GP_regression` Module
----------------------------------
.. automodule:: GPy.models.sparse_GP_regression
:members:
:undoc-members:
:show-inheritance:
:mod:`uncollapsed_sparse_GP` Module
-----------------------------------
.. automodule:: GPy.models.uncollapsed_sparse_GP
:members:
:undoc-members:
:show-inheritance:
:mod:`warped_GP` Module
-----------------------
.. automodule:: GPy.models.warped_GP
:members:
:undoc-members:
:show-inheritance:

22
doc/GPy.rst Normal file
View file

@ -0,0 +1,22 @@
GPy Package
===========
:mod:`GPy` Package
------------------
.. automodule:: GPy.__init__
:members:
:undoc-members:
:show-inheritance:
Subpackages
-----------
.. toctree::
GPy.core
GPy.inference
GPy.kern
GPy.models
GPy.util

67
doc/GPy.util.rst Normal file
View file

@ -0,0 +1,67 @@
util Package
============
:mod:`util` Package
-------------------
.. automodule:: GPy.util
:members:
:undoc-members:
:show-inheritance:
:mod:`Tango` Module
-------------------
.. automodule:: GPy.util.Tango
:members:
:undoc-members:
:show-inheritance:
:mod:`datasets` Module
----------------------
.. automodule:: GPy.util.datasets
:members:
:undoc-members:
:show-inheritance:
:mod:`linalg` Module
--------------------
.. automodule:: GPy.util.linalg
:members:
:undoc-members:
:show-inheritance:
:mod:`misc` Module
------------------
.. automodule:: GPy.util.misc
:members:
:undoc-members:
:show-inheritance:
:mod:`plot` Module
------------------
.. automodule:: GPy.util.plot
:members:
:undoc-members:
:show-inheritance:
:mod:`squashers` Module
-----------------------
.. automodule:: GPy.util.squashers
:members:
:undoc-members:
:show-inheritance:
:mod:`warping_functions` Module
-------------------------------
.. automodule:: GPy.util.warping_functions
:members:
:undoc-members:
:show-inheritance:

View file

@ -1,7 +1,7 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# GPy documentation build configuration file, created by # GPy documentation build configuration file, created by
# sphinx-quickstart on Wed Jan 9 15:21:20 2013. # sphinx-quickstart on Fri Jan 18 15:30:28 2013.
# #
# This file is execfile()d with the current directory set to its containing dir. # This file is execfile()d with the current directory set to its containing dir.
# #
@ -25,7 +25,15 @@ import sys, os
# Add any Sphinx extension module names here, as strings. They can be extensions # Add any Sphinx extension module names here, as strings. They can be extensions
# coming with Sphinx (named 'sphinx.ext.*') or your custom ones. # coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
extensions = ['sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.pngmath', 'sphinx.ext.mathjax', 'sphinx.ext.viewcode'] extensions = ['sphinx.ext.autodoc', 'sphinx.ext.viewcode']
# ----------------------- READTHEDOCS ------------------
on_rtd = os.environ.get('READTHEDOCS', None) == 'True'
if on_rtd:
sys.path.append("../GPy")
os.system("pwd")
os.system("sphinx-apidoc -f -o . ../GPy")
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ['_templates']
@ -41,16 +49,16 @@ master_doc = 'index'
# General information about the project. # General information about the project.
project = u'GPy' project = u'GPy'
copyright = u'2013, The GPy authors' copyright = u'2013, Author'
# The version info for the project you're documenting, acts as replacement for # The version info for the project you're documenting, acts as replacement for
# |version| and |release|, also used in various other places throughout the # |version| and |release|, also used in various other places throughout the
# built documents. # built documents.
# #
# The short X.Y version. # The short X.Y version.
version = '0.00001' version = ''
# The full version, including alpha/beta/rc tags. # The full version, including alpha/beta/rc tags.
release = '0.00001' release = ''
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
@ -184,7 +192,7 @@ latex_elements = {
# (source start file, target name, title, author, documentclass [howto/manual]). # (source start file, target name, title, author, documentclass [howto/manual]).
latex_documents = [ latex_documents = [
('index', 'GPy.tex', u'GPy Documentation', ('index', 'GPy.tex', u'GPy Documentation',
u'The GPy authors', 'manual'), u'Author', 'manual'),
] ]
# The name of an image file (relative to this directory) to place at the top of # The name of an image file (relative to this directory) to place at the top of
@ -214,7 +222,7 @@ latex_documents = [
# (source start file, name, description, authors, manual section). # (source start file, name, description, authors, manual section).
man_pages = [ man_pages = [
('index', 'gpy', u'GPy Documentation', ('index', 'gpy', u'GPy Documentation',
[u'The GPy authors'], 1) [u'Author'], 1)
] ]
# If true, show URL addresses after external links. # If true, show URL addresses after external links.
@ -228,7 +236,7 @@ man_pages = [
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [
('index', 'GPy', u'GPy Documentation', ('index', 'GPy', u'GPy Documentation',
u'The GPy authors', 'GPy', 'One line description of project.', u'Author', 'GPy', 'One line description of project.',
'Miscellaneous'), 'Miscellaneous'),
] ]
@ -240,3 +248,47 @@ texinfo_documents = [
# How to display URL addresses: 'footnote', 'no', or 'inline'. # How to display URL addresses: 'footnote', 'no', or 'inline'.
#texinfo_show_urls = 'footnote' #texinfo_show_urls = 'footnote'
# -- Options for Epub output ---------------------------------------------------
# Bibliographic Dublin Core info.
epub_title = u'GPy'
epub_author = u'Author'
epub_publisher = u'Author'
epub_copyright = u'2013, Author'
# The language of the text. It defaults to the language option
# or en if the language is not set.
#epub_language = ''
# The scheme of the identifier. Typical schemes are ISBN or URL.
#epub_scheme = ''
# The unique identifier of the text. This can be a ISBN number
# or the project homepage.
#epub_identifier = ''
# A unique identification for the text.
#epub_uid = ''
# A tuple containing the cover image and cover page html template filenames.
#epub_cover = ()
# HTML files that should be inserted before the pages created by sphinx.
# The format is a list of tuples containing the path and title.
#epub_pre_files = []
# HTML files shat should be inserted after the pages created by sphinx.
# The format is a list of tuples containing the path and title.
#epub_post_files = []
# A list of files that should not be packed into the epub file.
#epub_exclude_files = []
# The depth of the table of contents in toc.ncx.
#epub_tocdepth = 3
# Allow duplicate toc entries.
#epub_tocdup = True

View file

@ -1,5 +1,5 @@
.. GPy documentation master file, created by .. GPy documentation master file, created by
sphinx-quickstart on Wed Jan 9 15:21:20 2013. sphinx-quickstart on Fri Jan 18 17:36:01 2013.
You can adapt this file completely to your liking, but it should at least You can adapt this file completely to your liking, but it should at least
contain the root `toctree` directive. contain the root `toctree` directive.
@ -9,8 +9,9 @@ Welcome to GPy's documentation!
Contents: Contents:
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 4
GPy
Indices and tables Indices and tables

190
doc/make.bat Normal file
View file

@ -0,0 +1,190 @@
@ECHO OFF
REM Command file for Sphinx documentation
if "%SPHINXBUILD%" == "" (
set SPHINXBUILD=sphinx-build
)
set BUILDDIR=_build
set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% .
set I18NSPHINXOPTS=%SPHINXOPTS% .
if NOT "%PAPER%" == "" (
set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
)
if "%1" == "" goto help
if "%1" == "help" (
:help
echo.Please use `make ^<target^>` where ^<target^> is one of
echo. html to make standalone HTML files
echo. dirhtml to make HTML files named index.html in directories
echo. singlehtml to make a single large HTML file
echo. pickle to make pickle files
echo. json to make JSON files
echo. htmlhelp to make HTML files and a HTML help project
echo. qthelp to make HTML files and a qthelp project
echo. devhelp to make HTML files and a Devhelp project
echo. epub to make an epub
echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
echo. text to make text files
echo. man to make manual pages
echo. texinfo to make Texinfo files
echo. gettext to make PO message catalogs
echo. changes to make an overview over all changed/added/deprecated items
echo. linkcheck to check all external links for integrity
echo. doctest to run all doctests embedded in the documentation if enabled
goto end
)
if "%1" == "clean" (
for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
del /q /s %BUILDDIR%\*
goto end
)
if "%1" == "html" (
%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/html.
goto end
)
if "%1" == "dirhtml" (
%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
goto end
)
if "%1" == "singlehtml" (
%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
goto end
)
if "%1" == "pickle" (
%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the pickle files.
goto end
)
if "%1" == "json" (
%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can process the JSON files.
goto end
)
if "%1" == "htmlhelp" (
%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run HTML Help Workshop with the ^
.hhp project file in %BUILDDIR%/htmlhelp.
goto end
)
if "%1" == "qthelp" (
%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished; now you can run "qcollectiongenerator" with the ^
.qhcp project file in %BUILDDIR%/qthelp, like this:
echo.^> qcollectiongenerator %BUILDDIR%\qthelp\GPy.qhcp
echo.To view the help file:
echo.^> assistant -collectionFile %BUILDDIR%\qthelp\GPy.ghc
goto end
)
if "%1" == "devhelp" (
%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
if errorlevel 1 exit /b 1
echo.
echo.Build finished.
goto end
)
if "%1" == "epub" (
%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The epub file is in %BUILDDIR%/epub.
goto end
)
if "%1" == "latex" (
%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
if errorlevel 1 exit /b 1
echo.
echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
goto end
)
if "%1" == "text" (
%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The text files are in %BUILDDIR%/text.
goto end
)
if "%1" == "man" (
%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The manual pages are in %BUILDDIR%/man.
goto end
)
if "%1" == "texinfo" (
%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
goto end
)
if "%1" == "gettext" (
%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
if errorlevel 1 exit /b 1
echo.
echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
goto end
)
if "%1" == "changes" (
%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
if errorlevel 1 exit /b 1
echo.
echo.The overview file is in %BUILDDIR%/changes.
goto end
)
if "%1" == "linkcheck" (
%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
if errorlevel 1 exit /b 1
echo.
echo.Link check complete; look for any errors in the above output ^
or in %BUILDDIR%/linkcheck/output.txt.
goto end
)
if "%1" == "doctest" (
%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
if errorlevel 1 exit /b 1
echo.
echo.Testing of doctests in the sources finished, look at the ^
results in %BUILDDIR%/doctest/output.txt.
goto end
)
:end

7
doc/modules.rst Normal file
View file

@ -0,0 +1,7 @@
GPy
===
.. toctree::
:maxdepth: 4
GPy

105
doc/tuto_GP_regression.rst Normal file
View file

@ -0,0 +1,105 @@
*************************************
Gaussian process regression tutorial
*************************************
We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process model, also known as a kriging model.
We first import the libraries we will need: ::
import pylab as pb
pb.ion()
import numpy as np
import GPy
1 dimensional model
===================
For this toy example, we assume we have the following inputs and outputs::
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
Note that the observations Y include some noise.
The first step is to define the covariance kernel we want to use for the model. We choose here a kernel based on Gaussian kernel (i.e. rbf or square exponential) plus some white noise::
Gaussian = GPy.kern.rbf(D=1)
noise = GPy.kern.white(D=1)
kernel = Gaussian + noise
The parameter D stands for the dimension of the input space. Note that many other kernels are implemented such as:
* linear (``GPy.kern.linear``)
* exponential kernel (``GPy.kern.exponential``)
* Matern 3/2 (``GPy.kern.Matern32``)
* Matern 5/2 (``GPy.kern.Matern52``)
* spline (``GPy.kern.spline``)
* and many others...
The inputs required for building the model are the observations and the kernel::
m = GPy.models.GP_regression(X,Y,kernel)
The functions ``print`` and ``plot`` can help us understand the model we have just build::
print m
m.plot()
The default values of the kernel parameters may not be relevant for the current data (for example, the confidence intervals seems too wide on the previous figure). A common approach is find the values of the parameters that maximize the likelihood of the data. There are two steps for doing that with GPy:
* Constrain the parameters of the kernel to ensure the kernel will always be a valid covariance structure (For example, we don\'t want some variances to be negative!).
* Run the optimization
There are various ways to constrain the parameters of the kernel. The most basic is to constrain all the parameters to be positive::
m.constrain_positive('')
but it is also possible to set a range on to constrain one parameter to be fixed. The parameter of ``m.constrain_positive`` is a regular expression that matches the name of the parameters to be constrained (as seen in ``print m``). For example, if we want the variance to be positive, the lengthscale to be in [1,10] and the noise variance to be fixed we can write::
#m.unconstrain('') # Required if the model has been previously constrained
m.constrain_positive('rbf_variance')
m.constrain_bounded('lengthscale',1.,10. )
m.constrain_fixed('white',0.0025)
Once the constrains have bee imposed, the model can be optimized::
m.optimize()
If we want to perform some restarts to try to improve the result of the optimization, we can use the optimize_restart function::
m.optimize_restarts(Nrestarts = 10)
m.plot()
print(m)
2 dimensional example
=====================
Here is a 2 dimensional example::
import pylab as pb
pb.ion()
import numpy as np
import GPy
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GP_regression(X,Y,ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
pb.figure()
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)
The flag ``ARD=True`` in the definition of the Matern kernel specifies that we want one lengthscale parameter per dimension (ie the GP is not isotropic).

View file

@ -24,9 +24,9 @@ setup(name = 'GPy',
package_data = {'GPy': ['GPy/examples']}, package_data = {'GPy': ['GPy/examples']},
py_modules = ['GPy.__init__'], py_modules = ['GPy.__init__'],
long_description=read('README.md'), long_description=read('README.md'),
ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py', #ext_modules = [Extension(name = 'GPy.kern.lfmUpsilonf2py',
sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])], # sources = ['GPy/kern/src/lfmUpsilonf2py.f90'])],
install_requires=['numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'], install_requires=['sympy', 'numpy>=1.6', 'scipy>=0.9','matplotlib>=1.1'],
setup_requires=['sphinx'], setup_requires=['sphinx'],
cmdclass = {'build_sphinx': BuildDoc}, cmdclass = {'build_sphinx': BuildDoc},
classifiers=[ classifiers=[