Merged with master

This commit is contained in:
Alan Saul 2013-04-29 18:17:43 +01:00
commit bd346643db
34 changed files with 2916 additions and 1195 deletions

View file

@ -2,17 +2,19 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import optimize
import sys, pdb
import multiprocessing as mp
from GPy.util.misc import opt_wrapper
#import numdifftools as ndt
from parameterised import parameterised, truncate_pad
import priors
from ..util.linalg import jitchol
from ..inference import optimization
from .. import likelihoods
from ..inference import optimization
from ..util.linalg import jitchol
from GPy.util.misc import opt_wrapper
from parameterised import parameterised, truncate_pad
from scipy import optimize
import multiprocessing as mp
import numpy as np
import priors
import re
import sys
import pdb
# import numdifftools as ndt
class model(parameterised):
def __init__(self):
@ -24,14 +26,14 @@ class model(parameterised):
self.preferred_optimizer = 'tnc'
def _get_params(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def _set_params(self,x):
def _set_params(self, x):
raise NotImplementedError, "this needs to be implemented to use the model class"
def log_likelihood(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def _log_likelihood_gradients(self):
raise NotImplementedError, "this needs to be implemented to use the model class"
def set_prior(self,which,what):
def set_prior(self, which, what):
"""
Sets priors on the model parameters.
@ -52,84 +54,59 @@ class model(parameterised):
which = self.grep_param_names(which)
#check tied situation
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
# check tied situation
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
if len(tie_partial_matches):
raise ValueError, "cannot place prior across partial ties"
tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
if len(tie_matches)>1:
tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
if len(tie_matches) > 1:
raise ValueError, "cannot place prior across multiple ties"
elif len(tie_matches)==1:
which = which[:1]# just place a prior object on the first parameter
elif len(tie_matches) == 1:
which = which[:1] # just place a prior object on the first parameter
#check constraints are okay
# check constraints are okay
if isinstance(what, (priors.gamma, priors.log_Gaussian)):
assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
assert not np.any(which[:, None] == self.constrained_negative_indices), "constraint and prior incompatible"
assert not np.any(which[:, None] == self.constrained_bounded_indices), "constraint and prior incompatible"
unconst = np.setdiff1d(which, self.constrained_positive_indices)
if len(unconst):
print "Warning: constraining parameters to be positive:"
print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
print '\n'
self.constrain_positive(unconst)
elif isinstance(what,priors.Gaussian):
assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
elif isinstance(what, priors.Gaussian):
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
else:
raise ValueError, "prior not recognised"
#store the prior in a local list
# store the prior in a local list
for w in which:
self.priors[w] = what
def get(self,name, return_names=False):
"""
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._get_params()[matches]
else:
raise AttributeError, "no parameter matches %s"%name
def set(self,name,val):
"""
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
"""
matches = self.grep_param_names(name)
if len(matches):
x = self._get_params()
x[matches] = val
self._set_params(x)
else:
raise AttributeError, "no parameter matches %s"%name
def get_gradient(self,name, return_names=False):
def get_gradient(self, name, return_names=False):
"""
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._log_likelihood_gradients()[matches]
else:
raise AttributeError, "no parameter matches %s"%name
raise AttributeError, "no parameter matches %s" % name
def log_prior(self):
"""evaluate the prior"""
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
def _log_prior_gradients(self):
"""evaluate the gradients of the priors"""
x = self._get_params()
ret = np.zeros(x.size)
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
return ret
def _transform_gradients(self, g):
@ -138,13 +115,13 @@ class model(parameterised):
"""
x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
[np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.constrained_fixed_indices):
to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
return np.delete(g,to_remove)
to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
return np.delete(g, to_remove)
else:
return g
@ -154,15 +131,15 @@ class model(parameterised):
Randomize the model.
Make this draw from the prior if one exists, else draw from N(0,1)
"""
#first take care of all parameters (from N(0,1))
# first take care of all parameters (from N(0,1))
x = self._get_params_transformed()
x = np.random.randn(x.size)
self._set_params_transformed(x)
#now draw from prior where possible
# now draw from prior where possible
x = self._get_params()
[np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
self._set_params(x)
self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
@ -196,10 +173,10 @@ class model(parameterised):
pool = mp.Pool(processes=num_processes)
for i in range(Nrestarts):
self.randomize()
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
jobs.append(job)
pool.close() # signal that no more data coming in
pool.close() # signal that no more data coming in
pool.join() # wait for all the tasks to complete
except KeyboardInterrupt:
print "Ctrl+c received, terminating and joining pool."
@ -215,10 +192,10 @@ class model(parameterised):
self.optimization_runs.append(jobs[i].get())
if verbose:
print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
except Exception as e:
if robust:
print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
else:
raise e
@ -228,22 +205,22 @@ class model(parameterised):
else:
self._set_params_transformed(initial_parameters)
def ensure_default_constraints(self,warn=False):
def ensure_default_constraints(self, warn=False):
"""
Ensure that any variables which should clearly be positive have been constrained somehow.
"""
positive_strings = ['variance','lengthscale', 'precision']
positive_strings = ['variance', 'lengthscale', 'precision']
param_names = self._get_param_names()
currently_constrained = self.all_constrained_indices()
to_make_positive = []
for s in positive_strings:
for i in self.grep_param_names(s):
if not (i in currently_constrained):
to_make_positive.append(param_names[i])
to_make_positive.append(re.escape(param_names[i]))
if warn:
print "Warning! constraining %s postive"%name
print "Warning! constraining %s positive" % s
if len(to_make_positive):
self.constrain_positive('('+'|'.join(to_make_positive)+')')
self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
@ -261,14 +238,14 @@ class model(parameterised):
self._set_params_transformed(x)
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients())
return - LL_gradients - prior_gradients
return -LL_gradients - prior_gradients
def objective_and_gradients(self, x):
self._set_params_transformed(x)
obj_f = -self.log_likelihood() - self.log_prior()
obj_f = -self.log_likelihood() - self.log_prior()
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
prior_gradients = self._transform_gradients(self._log_prior_gradients())
obj_grads = - LL_gradients - prior_gradients
obj_grads = -LL_gradients - prior_gradients
return obj_f, obj_grads
def optimize(self, optimizer=None, start=None, **kwargs):
@ -288,13 +265,13 @@ class model(parameterised):
start = self._get_params_transformed()
optimizer = optimization.get_optimizer(optimizer)
opt = optimizer(start, model = self, **kwargs)
opt = optimizer(start, model=self, **kwargs)
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
self.optimization_runs.append(opt)
self._set_params_transformed(opt.x_opt)
def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
sgd.run()
@ -302,8 +279,8 @@ class model(parameterised):
def Laplace_covariance(self):
"""return the covariance matric of a Laplace approximatino at the current (stationary) point"""
#TODO add in the prior contributions for MAP estimation
#TODO fix the hessian for tied, constrained and fixed components
# TODO add in the prior contributions for MAP estimation
# TODO fix the hessian for tied, constrained and fixed components
if hasattr(self, 'log_likelihood_hessian'):
A = -self.log_likelihood_hessian()
@ -317,8 +294,8 @@ class model(parameterised):
A = -h(x)
self._set_params(x)
# check for almost zero components on the diagonal which screw up the cholesky
aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
A[aa,aa] = 0.
aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
A[aa, aa] = 0.
return A
def Laplace_evidence(self):
@ -329,11 +306,11 @@ class model(parameterised):
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
except:
return np.nan
return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
def __str__(self):
s = parameterised.__str__(self).split('\n')
#add priors to the string
# add priors to the string
strs = [str(p) if p is not None else '' for p in self.priors]
width = np.array(max([len(p) for p in strs] + [5])) + 4
@ -344,16 +321,16 @@ class model(parameterised):
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
obj_funct += '\n\n'
s[0] = obj_funct + s[0]
s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
s[1] += '-'*(width + 1)
s[0] += "|{h:^{col}}".format(h='Prior', col=width)
s[1] += '-' * (width + 1)
for p in range(2, len(strs)+2):
s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
for p in range(2, len(strs) + 2):
s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
return '\n'.join(s)
def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
"""
Check the gradient of the model by comparing to a numerical estimate.
If the verbose flag is passed, invividual components are tested (and printed)
@ -373,27 +350,27 @@ class model(parameterised):
x = self._get_params_transformed().copy()
if not verbose:
#just check the global ratio
dx = step*np.sign(np.random.uniform(-1,1,x.size))
# just check the global ratio
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
#evaulate around the point x
f1, g1 = self.objective_and_gradients(x+dx)
f2, g2 = self.objective_and_gradients(x-dx)
# evaulate around the point x
f1, g1 = self.objective_and_gradients(x + dx)
f2, g2 = self.objective_and_gradients(x - dx)
gradient = self.objective_function_gradients(x)
numerical_gradient = (f1-f2)/(2*dx)
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
numerical_gradient = (f1 - f2) / (2 * dx)
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
if (np.abs(1. - global_ratio) < tolerance) and not np.isnan(global_ratio):
return True
else:
return False
else:
#check the gradient of each parameter individually, and do some pretty printing
# check the gradient of each parameter individually, and do some pretty printing
try:
names = self._get_param_names_transformed()
except NotImplementedError:
names = ['Variable %i'%i for i in range(len(x))]
names = ['Variable %i' % i for i in range(len(x))]
# Prepare for pretty-printing
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
@ -402,9 +379,9 @@ class model(parameterised):
cols = [max_names]
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
cols = np.array(cols) + 5
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-'*len(header_string[0])
separator = '-' * len(header_string[0])
print '\n'.join([header_string[0], separator])
if target_param is None:
@ -420,11 +397,11 @@ class model(parameterised):
f2, g2 = self.objective_and_gradients(xx)
gradient = self.objective_function_gradients(x)[i]
numerical_gradient = (f1-f2)/(2*step)
ratio = (f1-f2)/(2*step*gradient)
difference = np.abs((f1-f2)/2/step - gradient)
numerical_gradient = (f1 - f2) / (2 * step)
ratio = (f1 - f2) / (2 * step * gradient)
difference = np.abs((f1 - f2) / 2 / step - gradient)
if (np.abs(ratio-1)<tolerance):
if (np.abs(ratio - 1) < tolerance):
formatted_name = "\033[92m {0} \033[0m".format(names[i])
else:
formatted_name = "\033[91m {0} \033[0m".format(names[i])
@ -432,7 +409,7 @@ class model(parameterised):
d = '%.6f' % float(difference)
g = '%.6f' % gradient
ng = '%.6f' % float(numerical_gradient)
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
print grad_string
def input_sensitivity(self):
@ -443,21 +420,21 @@ class model(parameterised):
TODO: proper sensitivity analysis
"""
if not hasattr(self,'kern'):
if not hasattr(self, 'kern'):
raise ValueError, "this model has no kernel"
k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
if (not len(k)==1) or (not k[0].ARD):
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
if (not len(k) == 1) or (not k[0].ARD):
raise ValueError, "cannot determine sensitivity for this kernel"
k = k[0]
if k.name=='rbf':
if k.name == 'rbf':
return k.lengthscale
elif k.name=='linear':
return 1./k.variances
elif k.name == 'linear':
return 1. / k.variances
def pseudo_EM(self,epsilon=.1,**kwargs):
def pseudo_EM(self, epsilon=.1, **kwargs):
"""
TODO: Should this not bein the GP class?
EM - like algorithm for Expectation Propagation and Laplace approximation
@ -471,7 +448,7 @@ class model(parameterised):
:type optimzer: string TODO: valid strings?
"""
assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
ll_change = epsilon + 1.
iteration = 0
last_ll = -np.exp(1000)
@ -491,9 +468,9 @@ class model(parameterised):
ll_change = new_ll - last_ll
if ll_change < 0:
self.likelihood = last_approximation #restore previous likelihood approximation
self._set_params(last_params) #restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
self.likelihood = last_approximation # restore previous likelihood approximation
self._set_params(last_params) # restore model parameters
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
stop = True
else:
self.optimize(**kwargs)
@ -502,5 +479,5 @@ class model(parameterised):
stop = True
iteration += 1
if stop:
print "%s iterations." %iteration
print "%s iterations." % iteration

View file

@ -8,24 +8,25 @@ import copy
import cPickle
import os
from ..util.squashers import sigmoid
import warnings
def truncate_pad(string,width,align='m'):
def truncate_pad(string, width, align='m'):
"""
A helper function to make aligned strings for parameterised.__str__
"""
width=max(width,4)
if len(string)>width:
return string[:width-3]+'...'
elif len(string)==width:
width = max(width, 4)
if len(string) > width:
return string[:width - 3] + '...'
elif len(string) == width:
return string
elif len(string)<width:
diff = width-len(string)
if align=='m':
return ' '*np.floor(diff/2.) + string + ' '*np.ceil(diff/2.)
elif align=='l':
return string + ' '*diff
elif align=='r':
return ' '*diff + string
elif len(string) < width:
diff = width - len(string)
if align == 'm':
return ' ' * np.floor(diff / 2.) + string + ' ' * np.ceil(diff / 2.)
elif align == 'l':
return string + ' ' * diff
elif align == 'r':
return ' ' * diff + string
else:
raise ValueError
@ -37,15 +38,15 @@ class parameterised(object):
self.tied_indices = []
self.constrained_fixed_indices = []
self.constrained_fixed_values = []
self.constrained_positive_indices = np.empty(shape=(0,),dtype=np.int64)
self.constrained_negative_indices = np.empty(shape=(0,),dtype=np.int64)
self.constrained_positive_indices = np.empty(shape=(0,), dtype=np.int64)
self.constrained_negative_indices = np.empty(shape=(0,), dtype=np.int64)
self.constrained_bounded_indices = []
self.constrained_bounded_uppers = []
self.constrained_bounded_lowers = []
def pickle(self,filename,protocol=-1):
f = file(filename,'w')
cPickle.dump(self,f,protocol)
def pickle(self, filename, protocol= -1):
f = file(filename, 'w')
cPickle.dump(self, f, protocol)
f.close()
def copy(self):
@ -55,18 +56,85 @@ class parameterised(object):
return copy.deepcopy(self)
@property
def params(self):
"""
Returns a **copy** of parameters in non transformed space
:see_also: :py:func:`GPy.core.parameterised.params_transformed`
"""
return self._get_params()
@params.setter
def params(self, params):
self._set_params(params)
@property
def params_transformed(self):
"""
Returns a **copy** of parameters in transformed space
:see_also: :py:func:`GPy.core.parameterised.params`
"""
return self._get_params_transformed()
@params_transformed.setter
def params_transformed(self, params):
self._set_params_transformed(params)
_get_set_deprecation = """get and set methods wont be available at next minor release
in the next releases you will get and set with following syntax:
Assume m is a model class:
print m['var'] # > prints all parameters matching 'var'
m['var'] = 2. # > sets all parameters matching 'var' to 2.
m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>
"""
def get(self, name):
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
return self[name]
def set(self, name, val):
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
self[name] = val
def __getitem__(self, name, return_names=False):
"""
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
"""
matches = self.grep_param_names(name)
if len(matches):
if return_names:
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
else:
return self._get_params()[matches]
else:
raise AttributeError, "no parameter matches %s" % name
def __setitem__(self, name, val):
"""
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
"""
matches = self.grep_param_names(name)
if len(matches):
val = np.array(val)
assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
x = self.params
x[matches] = val
self.params = x
# import ipdb;ipdb.set_trace()
# self.params[matches] = val
else:
raise AttributeError, "no parameter matches %s" % name
def tie_params(self, which):
matches = self.grep_param_names(which)
assert matches.size > 0, "need at least something to tie together"
if len(self.tied_indices):
assert not np.any(matches[:,None]==np.hstack(self.tied_indices)), "Some indices are already tied!"
assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
self.tied_indices.append(matches)
#TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
if hasattr(self,'prior'):
# TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
if hasattr(self, 'prior'):
pass
self._set_params_transformed(self._get_params_transformed())# sets tied parameters to single value
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
def untie_everything(self):
"""Unties all parameters by setting tied_indices to an empty list."""
@ -74,7 +142,7 @@ class parameterised(object):
def all_constrained_indices(self):
"""Return a np array of all the constrained indices"""
ret = [np.hstack(i) for i in [self.constrained_bounded_indices, self.constrained_positive_indices, self.constrained_negative_indices, self.constrained_fixed_indices] if len(i)]
ret = [np.hstack(i) for i in [self.constrained_bounded_indices, self.constrained_positive_indices, self.constrained_negative_indices, self.constrained_fixed_indices] if len(i)]
if len(ret):
return np.hstack(ret)
else:
@ -117,44 +185,44 @@ class parameterised(object):
which -- np.array(dtype=int), or regular expression object or string
"""
matches = self.grep_param_names(which)
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
self.constrained_positive_indices = np.hstack((self.constrained_positive_indices, matches))
#check to ensure constraint is in place
# check to ensure constraint is in place
x = self._get_params()
for i,xx in enumerate(x):
if (xx<0) & (i in matches):
for i, xx in enumerate(x):
if (xx < 0) & (i in matches):
x[i] = -xx
self._set_params(x)
def unconstrain(self,which):
def unconstrain(self, which):
"""Unconstrain matching parameters. does not untie parameters"""
matches = self.grep_param_names(which)
#positive/negative
self.constrained_positive_indices = np.delete(self.constrained_positive_indices,np.nonzero(np.sum(self.constrained_positive_indices[:,None]==matches[None,:],1))[0])
self.constrained_negative_indices = np.delete(self.constrained_negative_indices,np.nonzero(np.sum(self.constrained_negative_indices[:,None]==matches[None,:],1))[0])
#bounded
# positive/negative
self.constrained_positive_indices = np.delete(self.constrained_positive_indices, np.nonzero(np.sum(self.constrained_positive_indices[:, None] == matches[None, :], 1))[0])
self.constrained_negative_indices = np.delete(self.constrained_negative_indices, np.nonzero(np.sum(self.constrained_negative_indices[:, None] == matches[None, :], 1))[0])
# bounded
if len(self.constrained_bounded_indices):
self.constrained_bounded_indices = [np.delete(a,np.nonzero(np.sum(a[:,None]==matches[None,:],1))[0]) for a in self.constrained_bounded_indices]
self.constrained_bounded_indices = [np.delete(a, np.nonzero(np.sum(a[:, None] == matches[None, :], 1))[0]) for a in self.constrained_bounded_indices]
if np.hstack(self.constrained_bounded_indices).size:
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = zip(*[(u,l,i) for u,l,i in zip(self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices) if i.size])
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = zip(*[(u, l, i) for u, l, i in zip(self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices) if i.size])
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = list(self.constrained_bounded_uppers), list(self.constrained_bounded_lowers), list(self.constrained_bounded_indices)
else:
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = [],[],[]
#fixed:
for i,indices in enumerate(self.constrained_fixed_indices):
self.constrained_fixed_indices[i] = np.delete(indices,np.nonzero(np.sum(indices[:,None]==matches[None,:],1))[0])
#remove empty elements
tmp = [(i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values) if len(i)]
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = [], [], []
# fixed:
for i, indices in enumerate(self.constrained_fixed_indices):
self.constrained_fixed_indices[i] = np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0])
# remove empty elements
tmp = [(i, v) for i, v in zip(self.constrained_fixed_indices, self.constrained_fixed_values) if len(i)]
if tmp:
self.constrained_fixed_indices, self.constrained_fixed_values = zip(*tmp)
self.constrained_fixed_indices, self.constrained_fixed_values = list(self.constrained_fixed_indices), list(self.constrained_fixed_values)
else:
self.constrained_fixed_indices, self.constrained_fixed_values = [],[]
self.constrained_fixed_indices, self.constrained_fixed_values = [], []
def constrain_negative(self,which):
def constrain_negative(self, which):
"""
Set negative constraints.
@ -163,12 +231,12 @@ class parameterised(object):
"""
matches = self.grep_param_names(which)
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
self.constrained_negative_indices = np.hstack((self.constrained_negative_indices, matches))
#check to ensure constraint is in place
# check to ensure constraint is in place
x = self._get_params()
for i,xx in enumerate(x):
if (xx>0.) and (i in matches):
for i, xx in enumerate(x):
if (xx > 0.) and (i in matches):
x[i] = -xx
self._set_params(x)
@ -184,20 +252,20 @@ class parameterised(object):
lower -- (float) the lower bound on the constraint
"""
matches = self.grep_param_names(which)
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
assert lower < upper, "lower bound must be smaller than upper bound!"
self.constrained_bounded_indices.append(matches)
self.constrained_bounded_uppers.append(upper)
self.constrained_bounded_lowers.append(lower)
#check to ensure constraint is in place
# check to ensure constraint is in place
x = self._get_params()
for i,xx in enumerate(x):
if ((xx<=lower)|(xx>=upper)) & (i in matches):
x[i] = sigmoid(xx)*(upper-lower) + lower
for i, xx in enumerate(x):
if ((xx <= lower) | (xx >= upper)) & (i in matches):
x[i] = sigmoid(xx) * (upper - lower) + lower
self._set_params(x)
def constrain_fixed(self, which, value = None):
def constrain_fixed(self, which, value=None):
"""
Arguments
---------
@ -211,14 +279,14 @@ class parameterised(object):
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
"""
matches = self.grep_param_names(which)
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
self.constrained_fixed_indices.append(matches)
if value != None:
self.constrained_fixed_values.append(value)
else:
self.constrained_fixed_values.append(self._get_params()[self.constrained_fixed_indices[-1]])
#self.constrained_fixed_values.append(value)
# self.constrained_fixed_values.append(value)
self._set_params_transformed(self._get_params_transformed())
def _get_params_transformed(self):
@ -226,40 +294,40 @@ class parameterised(object):
x = self._get_params()
x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices])
x[self.constrained_negative_indices] = np.log(-x[self.constrained_negative_indices])
[np.put(x,i,np.log(np.clip(x[i]-l,1e-10,np.inf)/np.clip(h-x[i],1e-10,np.inf))) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(x, i, np.log(np.clip(x[i] - l, 1e-10, np.inf) / np.clip(h - x[i], 1e-10, np.inf))) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
to_remove = self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]
to_remove = self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]
if len(to_remove):
return np.delete(x,np.hstack(to_remove))
return np.delete(x, np.hstack(to_remove))
else:
return x
def _set_params_transformed(self,x):
def _set_params_transformed(self, x):
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
#work out how many places are fixed, and where they are. tricky logic!
# work out how many places are fixed, and where they are. tricky logic!
Nfix_places = 0.
if len(self.tied_indices):
Nfix_places += np.hstack(self.tied_indices).size-len(self.tied_indices)
Nfix_places += np.hstack(self.tied_indices).size - len(self.tied_indices)
if len(self.constrained_fixed_indices):
Nfix_places += np.hstack(self.constrained_fixed_indices).size
if Nfix_places:
fix_places = np.hstack(self.constrained_fixed_indices+[t[1:] for t in self.tied_indices])
fix_places = np.hstack(self.constrained_fixed_indices + [t[1:] for t in self.tied_indices])
else:
fix_places = []
free_places = np.setdiff1d(np.arange(Nfix_places+x.size,dtype=np.int),fix_places)
free_places = np.setdiff1d(np.arange(Nfix_places + x.size, dtype=np.int), fix_places)
#put the models values in the vector xx
xx = np.zeros(Nfix_places+free_places.size,dtype=np.float64)
# put the models values in the vector xx
xx = np.zeros(Nfix_places + free_places.size, dtype=np.float64)
xx[free_places] = x
[np.put(xx,i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values)]
[np.put(xx,i,v) for i,v in [(t[1:],xx[t[0]]) for t in self.tied_indices] ]
[np.put(xx, i, v) for i, v in zip(self.constrained_fixed_indices, self.constrained_fixed_values)]
[np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
xx[self.constrained_positive_indices] = np.exp(xx[self.constrained_positive_indices])
xx[self.constrained_negative_indices] = -np.exp(xx[self.constrained_negative_indices])
[np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(xx, i, low + sigmoid(xx[i]) * (high - low)) for i, low, high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
self._set_params(xx)
def _get_param_names_transformed(self):
@ -267,33 +335,33 @@ class parameterised(object):
Returns the parameter names as propagated after constraining,
tying or fixing, i.e. a list of the same length as _get_params_transformed()
"""
n = self._get_param_names()
n = self._get_param_names()
#remove/concatenate the tied parameter names
# remove/concatenate the tied parameter names
if len(self.tied_indices):
for t in self.tied_indices:
n[t[0]] = "<tie>".join([n[tt] for tt in t])
remove = np.hstack([t[1:] for t in self.tied_indices])
else:
remove=np.empty(shape=(0,),dtype=np.int)
remove = np.empty(shape=(0,), dtype=np.int)
#also remove the fixed params
# also remove the fixed params
if len(self.constrained_fixed_indices):
remove = np.hstack((remove, np.hstack(self.constrained_fixed_indices)))
#add markers to show that some variables are constrained
# add markers to show that some variables are constrained
for i in self.constrained_positive_indices:
n[i] = n[i]+'(+ve)'
n[i] = n[i] + '(+ve)'
for i in self.constrained_negative_indices:
n[i] = n[i]+'(-ve)'
for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers):
n[i] = n[i] + '(-ve)'
for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers):
for ii in i:
n[ii] = n[ii]+'(bounded)'
n[ii] = n[ii] + '(bounded)'
n = [nn for i,nn in enumerate(n) if not i in remove]
n = [nn for i, nn in enumerate(n) if not i in remove]
return n
def __str__(self,nw=30):
def __str__(self, nw=30):
"""
Return a string describing the parameter names and their ties and constraints
"""
@ -302,10 +370,10 @@ class parameterised(object):
if not N:
return "This object has no free parameters."
header = ['Name','Value','Constraints','Ties']
values = self._get_params() #map(str,self._get_params())
#sort out the constraints
constraints = ['']*len(names)
header = ['Name', 'Value', 'Constraints', 'Ties']
values = self._get_params() # map(str,self._get_params())
# sort out the constraints
constraints = [''] * len(names)
for i in self.constrained_positive_indices:
constraints[i] = '(+ve)'
for i in self.constrained_negative_indices:
@ -313,14 +381,14 @@ class parameterised(object):
for i in self.constrained_fixed_indices:
for ii in i:
constraints[ii] = 'Fixed'
for i,u,l in zip(self.constrained_bounded_indices, self.constrained_bounded_uppers, self.constrained_bounded_lowers):
for i, u, l in zip(self.constrained_bounded_indices, self.constrained_bounded_uppers, self.constrained_bounded_lowers):
for ii in i:
constraints[ii] = '('+str(l)+', '+str(u)+')'
#sort out the ties
ties = ['']*len(names)
for i,tie in enumerate(self.tied_indices):
constraints[ii] = '(' + str(l) + ', ' + str(u) + ')'
# sort out the ties
ties = [''] * len(names)
for i, tie in enumerate(self.tied_indices):
for j in tie:
ties[j] = '('+str(i)+')'
ties[j] = '(' + str(i) + ')'
values = ['%.4f' % float(v) for v in values]
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
@ -330,10 +398,10 @@ class parameterised(object):
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
columns = cols.sum()
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
header_string = map(lambda x: '|'.join(x), [header_string])
separator = '-'*len(header_string[0])
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n = names[i], v = values[i], c = constraints[i], t = ties[i], c0 = cols[0], c1 = cols[1], c2 = cols[2], c3 = cols[3]) for i in range(len(values))]
separator = '-' * len(header_string[0])
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
return ('\n'.join([header_string[0], separator]+param_string)) + '\n'
return ('\n'.join([header_string[0], separator] + param_string)) + '\n'

View file

@ -6,6 +6,8 @@ import pylab as pb
from matplotlib import pyplot as plt, pyplot
import GPy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
from GPy.util.datasets import simulation_BGPLVM
default_seed = np.random.seed(123344)
@ -46,7 +48,7 @@ def GPLVM_oil_100(optimize=True):
data = GPy.util.datasets.oil_100()
# create simple GP model
kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6)
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel)
m.data_labels = data['Y'].argmax(axis=1)
@ -99,6 +101,172 @@ def oil_100():
# m.plot_latent(labels=data['Y'].argmax(axis=1))
return m
def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
x = np.linspace(0, 4 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x))
s2 = np.vectorize(lambda x: np.cos(x))
s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
sS = np.vectorize(lambda x: np.sin(2 * x))
s1 = s1(x)
s2 = s2(x)
s3 = s3(x)
sS = sS(x)
# s1 -= s1.mean()
# s2 -= s2.mean()
# s3 -= s3.mean()
# sS -= sS.mean()
# s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
# s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
# s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
# sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
S1 = np.hstack([s1, sS])
S2 = np.hstack([s2, sS])
S3 = np.hstack([s3, sS])
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
Y1 += .1 * np.random.randn(*Y1.shape)
Y2 += .1 * np.random.randn(*Y2.shape)
Y3 += .1 * np.random.randn(*Y3.shape)
Y1 -= Y1.mean(0)
Y2 -= Y2.mean(0)
Y3 -= Y3.mean(0)
Y1 /= Y1.std(0)
Y2 /= Y2.std(0)
Y3 /= Y3.std(0)
slist = [s1, s2, s3, sS]
Ylist = [Y1, Y2, Y3]
if plot_sim:
import pylab
import itertools
fig = pylab.figure("MRD Simulation", figsize=(8, 6))
fig.clf()
ax = fig.add_subplot(2, 1, 1)
labls = sorted(filter(lambda x: x.startswith("s"), locals()))
for S, lab in itertools.izip(slist, labls):
ax.plot(S, label=lab)
ax.legend()
for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y)
ax.set_title("Y{}".format(i + 1))
pylab.draw()
pylab.tight_layout()
return slist, [S1, S2, S3], Ylist
def bgplvm_simulation_matlab_compare():
sim_data = simulation_BGPLVM()
Y = sim_data['Y']
S = sim_data['S']
mu = sim_data['mu']
M, [_, Q] = 20, mu.shape
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
# X=mu,
# X_variance=S,
_debug=True)
m.ensure_default_constraints()
m.auto_scale_factor = True
m['noise'] = .01 # Y.var() / 100.
m['{}_variance'.format(k.parts[0].name)] = .01
return m
def bgplvm_simulation(burnin='scg', plot_sim=False,
max_burnin=100, true_X=False,
do_opt=True,
max_f_eval=1000):
D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd
from GPy import kern
reload(mrd); reload(kern)
Y = Ylist[0]
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
# k = kern.white(Q, .00001) + kern.bias(Q)
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
# m.set('noise',)
m.ensure_default_constraints()
m['noise'] = Y.var() / 100.
m['linear_variance'] = .001
# m.auto_scale_factor = True
# m.scale_factor = 1.
if burnin:
print "initializing beta"
cstr = "noise"
m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 70.)
m.optimize(burnin, messages=1, max_f_eval=max_burnin)
print "releasing beta"
cstr = "noise"
m.unconstrain(cstr); m.constrain_positive(cstr)
if true_X:
true_X = np.hstack((slist[0], slist[3], 0. * np.ones((N, Q - 2))))
m.set('X_\d', true_X)
m.constrain_fixed("X_\d")
cstr = 'X_variance'
# m.unconstrain(cstr), m.constrain_fixed(cstr, .0001)
m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-7, .1)
# cstr = 'X_variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
# m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
# cstr = "iip"
# m.unconstrain(cstr); m.constrain_fixed(cstr)
# cstr = 'variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
# cstr = 'X_\d'
# m.unconstrain(cstr), m.constrain_bounded(cstr, -10., 10.)
#
# cstr = 'noise'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-5, 1.)
#
# cstr = 'white'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-6, 1.)
#
# cstr = 'linear_variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
# cstr = 'variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
# np.seterr(all='call')
# def ipdbonerr(errtype, flags):
# import ipdb; ipdb.set_trace()
# np.seterrcall(ipdbonerr)
if do_opt and burnin:
try:
m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
except:
pass
finally:
return m
return m
def mrd_simulation(plot_sim=False):
# num = 2
# ard1 = np.array([1., 1, 0, 0], dtype=float)
@ -117,32 +285,8 @@ def mrd_simulation(plot_sim=False):
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
# Y2 -= Y2.mean(0)
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
D1, D2, D3, N, M, Q = 50, 100, 8, 200, 2, 5
x = np.linspace(0, 8 * np.pi, N)[:, None]
s1 = np.vectorize(lambda x: np.sin(x))
s2 = np.vectorize(lambda x: np.cos(x))
s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
sS = np.vectorize(lambda x: x * np.sin(2 * x))
s1 = s1(x)
s2 = s2(x)
s3 = s3(x)
sS = sS(x)
s1 -= s1.mean()
s2 -= s2.mean()
s3 -= s3.mean()
sS -= sS.mean()
s1 /= np.abs(s1).max()
s2 /= np.abs(s2).max()
s3 /= np.abs(s3).max()
sS /= np.abs(sS).max()
S1 = np.hstack([s1, sS])
S2 = np.hstack([s2, sS])
S3 = np.hstack([s3, sS])
D1, D2, D3, N, M, Q = 2000, 34, 8, 500, 3, 6
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
from GPy.models import mrd
from GPy import kern
@ -153,73 +297,40 @@ def mrd_simulation(plot_sim=False):
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T
# Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
Y1 += .5 * np.random.randn(*Y1.shape)
Y2 += .5 * np.random.randn(*Y2.shape)
Y3 += .5 * np.random.randn(*Y3.shape)
# Y1 -= Y1.mean(0)
# Y2 -= Y2.mean(0)
# Y3 -= Y3.mean(0)
# Y1 /= Y1.std(0)
# Y2 /= Y2.std(0)
# Y3 /= Y3.std(0)
Slist = [s1, s2, sS]
Ylist = [Y1, Y2]
if plot_sim:
import pylab
import itertools
fig = pylab.figure("MRD Simulation", figsize=(8, 6))
fig.clf()
ax = fig.add_subplot(2, 1, 1)
labls = sorted(filter(lambda x: x.startswith("s"), locals()))
for S, lab in itertools.izip(Slist, labls):
ax.plot(x, S, label=lab)
ax.legend()
for i, Y in enumerate(Ylist):
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
ax.imshow(Y)
ax.set_title("Y{}".format(i + 1))
pylab.draw()
pylab.tight_layout()
Ylist = Ylist[0:2]
# k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
k = kern.linear(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", _debug=False)
m.ensure_default_constraints()
k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .001)
m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False)
for i, Y in enumerate(Ylist):
m.set('{}_noise'.format(i + 1), Y.var() / 100.)
# import ipdb;ipdb.set_trace()
cstr = "variance"
m.unconstrain(cstr); m.constrain_bounded(cstr, 1e-15, 1.)
m.ensure_default_constraints()
m.auto_scale_factor = True
# cstr = 'variance'
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
#
# cstr = 'linear_variance'
# m.unconstrain(cstr), m.constrain_positive(cstr)
# print "initializing beta"
# cstr = "noise"
# m.unconstrain(cstr); m.constrain_fixed(cstr)
# m.optimize('scg', messages=1, max_f_eval=200)
#
# m.optimize('scg', messages=1, max_f_eval=100)
# print "releasing beta"
# cstr = "noise"
# m.unconstrain(cstr); m.constrain_positive(cstr)
np.seterr(all='call')
def ipdbonerr(errtype, flags):
import ipdb; ipdb.set_trace()
np.seterrcall(ipdbonerr)
m.auto_scale_factor = True
# fig = pyplot.figure("expected", figsize=(8, 3))
# ax = fig.add_subplot(121)
# ax.bar(np.arange(ard1.size) + .1, ard1)
# ax = fig.add_subplot(122)
# ax.bar(np.arange(ard2.size) + .1, ard2)
return m
return m # , mtest
def mrd_silhouette():

View file

@ -0,0 +1,146 @@
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
import numpy as np
import sys
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
"""
Optimisation through Scaled Conjugate Gradients (SCG)
f: the objective function
gradf : the gradient function (should return a 1D np.ndarray)
x : the initial condition
Returns
x the optimal value for x
flog : a list of all the objective values
"""
sigma0 = 1.0e-4
fold = f(x, *optargs) # Initial function value.
function_eval = 1
fnow = fold
gradnew = gradf(x, *optargs) # Initial gradient.
gradold = gradnew.copy()
d = -gradnew # Initial search direction.
success = True # Force calculation of directional derivs.
nsuccess = 0 # nsuccess counts number of successes.
beta = 1.0 # Initial scale parameter.
betamin = 1.0e-15 # Lower bound on scale.
betamax = 1.0e100 # Upper bound on scale.
status = "Not converged"
flog = [fold]
iteration = 0
# Main optimization loop.
while iteration < maxiters:
# Calculate first and second directional derivatives.
if success:
mu = np.dot(d, gradnew)
if mu >= 0:
d = -gradnew
mu = np.dot(d, gradnew)
kappa = np.dot(d, d)
sigma = sigma0/np.sqrt(kappa)
xplus = x + sigma*d
gplus = gradf(xplus, *optargs)
theta = np.dot(d, (gplus - gradnew))/sigma
# Increase effective curvature and evaluate step size alpha.
delta = theta + beta*kappa
if delta <= 0:
delta = beta*kappa
beta = beta - theta/kappa
alpha = - mu/delta
# Calculate the comparison ratio.
xnew = x + alpha*d
fnew = f(xnew, *optargs)
function_eval += 1
if function_eval >= max_f_eval:
status = "Maximum number of function evaluations exceeded"
return x, flog, function_eval, status
Delta = 2.*(fnew - fold)/(alpha*mu)
if Delta >= 0.:
success = True
nsuccess += 1
x = xnew
fnow = fnew
else:
success = False
fnow = fold
# Store relevant variables
flog.append(fnow) # Current function value
iteration += 1
if display:
print '\r',
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta),
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
sys.stdout.flush()
if success:
# Test for termination
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
status='converged'
return x, flog, function_eval, status
else:
# Update variables for new position
fold = fnew
gradold = gradnew
gradnew = gradf(x, *optargs)
# If the gradient is zero then we are done.
if np.dot(gradnew,gradnew) == 0:
return x, flog, function_eval, status
# Adjust beta according to comparison ratio.
if Delta < 0.25:
beta = min(4.0*beta, betamax)
if Delta > 0.75:
beta = max(0.5*beta, betamin)
# Update search direction using Polak-Ribiere formula, or re-start
# in direction of negative gradient after nparams steps.
if nsuccess == x.size:
d = -gradnew
nsuccess = 0
elif success:
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
d = gamma*d - gradnew
# If we get here, then we haven't terminated in the given number of
# iterations.
status = "maxiter exceeded"
return x, flog, function_eval, status

View file

@ -2,5 +2,9 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
try:
from constructors import rbf_sympy, sympykern # these depend on sympy
except:
pass
from kern import kern

View file

@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
from coregionalise import coregionalise as coregionalise_part
from rational_quadratic import rational_quadratic as rational_quadraticpart
from rbfcos import rbfcos as rbfcospart
from independent_outputs import independent_outputs as independent_output_part
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
#using meta-classes to make the objects construct properly wthout them.
@ -165,34 +166,40 @@ def Brownian(D,variance=1.):
part = Brownianpart(D,variance)
return kern(D, [part])
import sympy as sp
from sympykern import spkern
from sympy.parsing.sympy_parser import parse_expr
try:
import sympy as sp
from sympykern import spkern
from sympy.parsing.sympy_parser import parse_expr
sympy_available = True
except ImportError:
sympy_available = False
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
"""
Radial Basis Function covariance.
"""
X = [sp.var('x%i'%i) for i in range(D)]
Z = [sp.var('z%i'%i) for i in range(D)]
rbf_variance = sp.var('rbf_variance',positive=True)
if ARD:
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/2.)
else:
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
return kern(D,[spkern(D,f)])
if sympy_available:
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
"""
Radial Basis Function covariance.
"""
X = [sp.var('x%i'%i) for i in range(D)]
Z = [sp.var('z%i'%i) for i in range(D)]
rbf_variance = sp.var('rbf_variance',positive=True)
if ARD:
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/2.)
else:
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
dist = parse_expr(dist_string)
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
return kern(D,[spkern(D,f)])
def sympykern(D,k):
"""
A kernel from a symbolic sympy representation
"""
return kern(D,[spkern(D,k)])
def sympykern(D,k):
"""
A kernel from a symbolic sympy representation
"""
return kern(D,[spkern(D,k)])
del sympy_available
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
"""
@ -318,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
"""
part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
return kern(D,[part])
def independent_outputs(k):
"""
Construct a kernel with independent outputs from an existing kernel
"""
for sl in k.input_slices:
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
parts = [independent_output_part(p) for p in k.parts]
return kern(k.D+1,parts)

View file

@ -5,10 +5,11 @@ from kernpart import kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
import pdb
from scipy import weave
class coregionalise(kernpart):
"""
Kernel for Intrisec Corregionalization Models
Kernel for Intrinsic Corregionalization Models
"""
def __init__(self,Nout,R=1, W=None, kappa=None):
self.D = 1
@ -42,19 +43,70 @@ class coregionalise(kernpart):
def K(self,index,index2,target):
index = np.asarray(index,dtype=np.int)
#here's the old code (numpy)
#if index2 is None:
#index2 = index
#else:
#index2 = np.asarray(index2,dtype=np.int)
#false_target = target.copy()
#ii,jj = np.meshgrid(index,index2)
#ii,jj = ii.T, jj.T
#false_target += self.B[ii,jj]
if index2 is None:
index2 = index
code="""
for(int i=0;i<N; i++){
target[i+i*N] += B[index[i]+Nout*index[i]];
for(int j=0; j<i; j++){
target[j+i*N] += B[index[i]+Nout*index[j]];
target[i+j*N] += target[j+i*N];
}
}
"""
N,B,Nout = index.size, self.B, self.Nout
weave.inline(code,['target','index','N','B','Nout'])
else:
index2 = np.asarray(index2,dtype=np.int)
ii,jj = np.meshgrid(index,index2)
ii,jj = ii.T, jj.T
target += self.B[ii,jj]
code="""
for(int i=0;i<M; i++){
for(int j=0; j<N; j++){
target[i+j*M] += B[Nout*index[j]+index2[i]];
}
}
"""
N,M,B,Nout = index.size,index2.size, self.B, self.Nout
weave.inline(code,['target','index','index2','N','M','B','Nout'])
def Kdiag(self,index,target):
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
def dK_dtheta(self,dL_dK,index,index2,target):
index = np.asarray(index,dtype=np.int)
dL_dK_small = np.zeros_like(self.B)
if index2 is None:
index2 = index
else:
index2 = np.asarray(index2,dtype=np.int)
code="""
for(int i=0; i<M; i++){
for(int j=0; j<N; j++){
dL_dK_small[index[j] + Nout*index2[i]] += dL_dK[i+j*M];
}
}
"""
N, M, Nout = index.size, index2.size, self.Nout
weave.inline(code, ['N','M','Nout','dL_dK','dL_dK_small','index','index2'])
dkappa = np.diag(dL_dK_small)
dL_dK_small += dL_dK_small.T
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
target += np.hstack([dW.flatten(),dkappa])
def dK_dtheta_old(self,dL_dK,index,index2,target):
if index2 is None:
index2 = index
else:

View file

@ -0,0 +1,97 @@
# Copyright (c) 2012, James Hesnsman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
def index_to_slices(index):
"""
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
e.g.
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
returns
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
or, a more complicated example
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
returns
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
"""
#contruct the return structure
ind = np.asarray(index,dtype=np.int64)
ret = [[] for i in range(ind.max()+1)]
#find the switchpoints
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
return ret
class independent_outputs(kernpart):
"""
A kernel part shich can reopresent several independent functions.
this kernel 'switches off' parts of the matrix where the output indexes are different.
The index of the functions is given by the last column in the input X
the rest of the columns of X are passed to the kernel for computation (in blocks).
"""
def __init__(self,k):
self.D = k.D + 1
self.Nparam = k.Nparam
self.name = 'iops('+ k.name + ')'
self.k = k
def _get_params(self):
return self.k._get_params()
def _set_params(self,x):
self.k._set_params(x)
self.params = x
def _get_param_names(self):
return self.k._get_param_names()
def K(self,X,X2,target):
#Sort out the slices from the input data
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def Kdiag(self,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
def dK_dtheta(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dK_dX(self,dL_dK,X,X2,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
if X2 is None:
X2,slices2 = X,slices
else:
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
def dKdiag_dX(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
def dKdiag_dtheta(self,dL_dKdiag,X,target):
X,slices = X[:,:-1],index_to_slices(X[:,-1])
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]

View file

@ -9,19 +9,14 @@ from kernpart import kernpart
import itertools
from prod_orthogonal import prod_orthogonal
from prod import prod
from ..util.linalg import symmetrify
class kern(parameterised):
def __init__(self,D,parts=[], input_slices=None):
def __init__(self, D, parts=[], input_slices=None):
"""
This kernel does 'compound' structures.
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
The compund structure enables many features of GPy, including
- Hierarchical models
- Correleated output models
- multi-view learning
Hadamard product and outer-product kernels will require a new class.
This feature is currently WONTFIX. for small number sof inputs, you can use the sympy kernel for this.
The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
:param D: The dimensioality of the kernel's input space
:type D: int
@ -37,15 +32,15 @@ class kern(parameterised):
self.D = D
#deal with input_slices
# deal with input_slices
if input_slices is None:
self.input_slices = [slice(None) for p in self.parts]
else:
assert len(input_slices)==len(self.parts)
assert len(input_slices) == len(self.parts)
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
for p in self.parts:
assert isinstance(p,kernpart), "bad kernel part"
assert isinstance(p, kernpart), "bad kernel part"
self.compute_param_slices()
@ -67,22 +62,22 @@ class kern(parameterised):
if p.name == 'linear':
ard_params = p.variances
else:
ard_params = 1./p.lengthscale
ard_params = 1. / p.lengthscale
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
ax.set_xticks(np.arange(len(ard_params)),
["${}$".format(i + 1) for i in range(len(ard_params))])
ax.set_xticks(np.arange(len(ard_params)))
ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))])
return ax
def _transform_gradients(self,g):
def _transform_gradients(self, g):
x = self._get_params()
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
g[self.constrained_positive_indices] = g[self.constrained_positive_indices] * x[self.constrained_positive_indices]
g[self.constrained_negative_indices] = g[self.constrained_negative_indices] * x[self.constrained_negative_indices]
[np.put(g, i, g[i] * (x[i] - l) * (h - x[i]) / (h - l)) for i, l, h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
if len(self.tied_indices) or len(self.constrained_fixed_indices):
to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
return np.delete(g,to_remove)
to_remove = np.hstack((self.constrained_fixed_indices + [t[1:] for t in self.tied_indices]))
return np.delete(g, to_remove)
else:
return g
@ -91,41 +86,13 @@ class kern(parameterised):
self.param_slices = []
count = 0
for p in self.parts:
self.param_slices.append(slice(count,count+p.Nparam))
self.param_slices.append(slice(count, count + p.Nparam))
count += p.Nparam
def _process_slices(self,slices1=None,slices2=None):
"""
Format the slices so that they can easily be used.
Both slices can be any of three things:
- If None, the new points covary through every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
if the second arg is False, return only slices1
returns actual lists of slice objects
"""
if slices1 is None:
slices1 = [slice(None)]*self.Nparts
elif all([type(s_i) is bool for s_i in slices1]):
slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
else:
assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
if slices2 is None:
slices2 = [slice(None)]*self.Nparts
elif slices2 is False:
return slices1
elif all([type(s_i) is bool for s_i in slices2]):
slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
else:
assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
return slices1, slices2
def __add__(self,other):
def __add__(self, other):
assert self.D == other.D
newkern = kern(self.D,self.parts+other.parts, self.input_slices + other.input_slices)
#transfer constraints:
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
# transfer constraints:
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
@ -136,7 +103,7 @@ class kern(parameterised):
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def add(self,other):
def add(self, other):
"""
Add another kernel to this one. Both kernels are defined on the same _space_
:param other: the other kernel to be added
@ -144,21 +111,21 @@ class kern(parameterised):
"""
return self + other
def add_orthogonal(self,other):
def add_orthogonal(self, other):
"""
Add another kernel to this one. Both kernels are defined on separate spaces
:param other: the other kernel to be added
:type other: GPy.kern
"""
#deal with input slices
# deal with input slices
D = self.D + other.D
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
other_input_slices = [slice(i[0]+self.D,i[1]+self.D,i[2]) for i in other_input_indices]
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
#transfer constraints:
# transfer constraints:
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
@ -169,13 +136,13 @@ class kern(parameterised):
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
return newkern
def __mul__(self,other):
def __mul__(self, other):
"""
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
"""
return self.prod(other)
def prod(self,other):
def prod(self, other):
"""
multiply two kernels defined on the same spaces.
:param other: the other kernel to be added
@ -184,20 +151,20 @@ class kern(parameterised):
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False]*K1.D, [False]*K2.D
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1+s2]
slices += [s1 + s2]
newkern = kern(K1.D, newkernparts, slices)
newkern._follow_constrains(K1,K2)
newkern._follow_constrains(K1, K2)
return newkern
def prod_orthogonal(self,other):
def prod_orthogonal(self, other):
"""
multiply two kernels. Both kernels are defined on separate spaces.
:param other: the other kernel to be added
@ -206,31 +173,31 @@ class kern(parameterised):
K1 = self.copy()
K2 = other.copy()
newkernparts = [prod_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
slices = []
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
s1, s2 = [False]*K1.D, [False]*K2.D
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
s1, s2 = [False] * K1.D, [False] * K2.D
s1[sl1], s2[sl2] = [True], [True]
slices += [s1+s2]
slices += [s1 + s2]
newkern = kern(K1.D + K2.D, newkernparts, slices)
newkern._follow_constrains(K1,K2)
newkern._follow_constrains(K1, K2)
return newkern
def _follow_constrains(self,K1,K2):
def _follow_constrains(self, K1, K2):
# Build the array that allows to go from the initial indices of the param to the new ones
K1_param = []
n = 0
for k1 in K1.parts:
K1_param += [range(n,n+k1.Nparam)]
K1_param += [range(n, n + k1.Nparam)]
n += k1.Nparam
n = 0
K2_param = []
for k2 in K2.parts:
K2_param += [range(K1.Nparam+n,K1.Nparam+n+k2.Nparam)]
K2_param += [range(K1.Nparam + n, K1.Nparam + n + k2.Nparam)]
n += k2.Nparam
index_param = []
for p1 in K1_param:
@ -254,47 +221,50 @@ class kern(parameterised):
# follow the previous ties
for arr in prev_ties:
for j in arr:
index_param[np.where(index_param==j)[0]] = arr[0]
index_param[np.where(index_param == j)[0]] = arr[0]
# ties and constrains
for i in range(K1.Nparam + K2.Nparam):
index = np.where(index_param==i)[0]
index = np.where(index_param == i)[0]
if index.size > 1:
self.tie_params(index)
for i in prev_constr_pos:
self.constrain_positive(np.where(index_param==i)[0])
self.constrain_positive(np.where(index_param == i)[0])
for i in prev_constr_neg:
self.constrain_neg(np.where(index_param==i)[0])
self.constrain_neg(np.where(index_param == i)[0])
for j, i in enumerate(prev_constr_fix):
self.constrain_fixed(np.where(index_param==i)[0],prev_constr_fix_values[j])
self.constrain_fixed(np.where(index_param == i)[0], prev_constr_fix_values[j])
for j, i in enumerate(prev_constr_bou):
self.constrain_bounded(np.where(index_param==i)[0],prev_constr_bou_low[j],prev_constr_bou_upp[j])
self.constrain_bounded(np.where(index_param == i)[0], prev_constr_bou_low[j], prev_constr_bou_upp[j])
def _get_params(self):
return np.hstack([p._get_params() for p in self.parts])
def _set_params(self,x):
def _set_params(self, x):
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
def _get_param_names(self):
#this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
part_names = np.array([k.name for k in self.parts],dtype=np.str)
counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
# this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
part_names = np.array([k.name for k in self.parts], dtype=np.str)
counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
return sum([[name+'_'+n for n in k._get_param_names()] for name,k in zip(names,self.parts)],[])
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
def K(self,X,X2=None,slices1=None,slices2=None):
assert X.shape[1]==self.D
slices1, slices2 = self._process_slices(slices1,slices2)
def K(self, X, X2=None, which_parts='all'):
if which_parts=='all':
which_parts = [True]*self.Nparts
assert X.shape[1] == self.D
if X2 is None:
X2 = X
target = np.zeros((X.shape[0],X2.shape[0]))
[p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
target = np.zeros((X.shape[0], X.shape[0]))
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
else:
target = np.zeros((X.shape[0], X2.shape[0]))
[p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
return target
def dK_dtheta(self,dL_dK,X,X2=None,slices1=None,slices2=None):
def dK_dtheta(self, dL_dK, X, X2=None):
"""
:param dL_dK: An array of dL_dK derivaties, dL_dK
:type dL_dK: Np.ndarray (N x M)
@ -302,286 +272,275 @@ class kern(parameterised):
:type X: np.ndarray (N x D)
:param X2: Observed dara inputs (optional, defaults to X)
:type X2: np.ndarray (M x D)
:param slices1: a slice object for each kernel part, describing which data are affected by each kernel part
:type slices1: list of slice objects, or list of booleans
:param slices2: slices for X2
"""
assert X.shape[1]==self.D
slices1, slices2 = self._process_slices(slices1,slices2)
if X2 is None:
X2 = X
assert X.shape[1] == self.D
target = np.zeros(self.Nparam)
[p.dK_dtheta(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
if X2 is None:
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
else:
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dK_dX(self,dL_dK,X,X2=None,slices1=None,slices2=None):
def dK_dX(self, dL_dK, X, X2=None):
if X2 is None:
X2 = X
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(X)
[p.dK_dX(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
if X2 is None:
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
else:
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def Kdiag(self,X,slices=None):
assert X.shape[1]==self.D
slices = self._process_slices(slices,False)
def Kdiag(self, X, which_parts='all'):
if which_parts=='all':
which_parts = [True]*self.Nparts
assert X.shape[1] == self.D
target = np.zeros(X.shape[0])
[p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dKdiag_dtheta(self,dL_dKdiag,X,slices=None):
assert X.shape[1]==self.D
assert len(dL_dKdiag.shape)==1
assert dL_dKdiag.size==X.shape[0]
slices = self._process_slices(slices,False)
def dKdiag_dtheta(self, dL_dKdiag, X):
assert X.shape[1] == self.D
assert dL_dKdiag.size == X.shape[0]
target = np.zeros(self.Nparam)
[p.dKdiag_dtheta(dL_dKdiag[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
return self._transform_gradients(target)
def dKdiag_dX(self, dL_dKdiag, X, slices=None):
assert X.shape[1]==self.D
slices = self._process_slices(slices,False)
def dKdiag_dX(self, dL_dKdiag, X):
assert X.shape[1] == self.D
target = np.zeros_like(X)
[p.dKdiag_dX(dL_dKdiag[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def psi0(self,Z,mu,S,slices=None):
slices = self._process_slices(slices,False)
def psi0(self, Z, mu, S):
target = np.zeros(mu.shape[0])
[p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)]
[p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,slices=None):
slices = self._process_slices(slices,False)
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
target = np.zeros(self.Nparam)
[p.dpsi0_dtheta(dL_dpsi0[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
[p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,slices=None):
slices = self._process_slices(slices,False)
target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
return target_mu,target_S
def psi1(self,Z,mu,S,slices1=None,slices2=None):
"""Think N,M,Q """
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros((mu.shape[0],Z.shape[0]))
[p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)]
return target
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""N,M,(Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros((self.Nparam))
[p.dpsi1_dtheta(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""N,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
[p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S
def psi2(self,Z,mu,S,slices1=None,slices2=None):
def psi1(self, Z, mu, S):
target = np.zeros((mu.shape[0], Z.shape[0]))
[p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
target = np.zeros((self.Nparam))
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
return self._transform_gradients(target)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
"""return shapes are N,M,Q"""
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
return target_mu, target_S
def psi2(self, Z, mu, S):
"""
:param Z: np.ndarray of inducing inputs (M x Q)
:param mu, S: np.ndarrays of means and variances (each N x Q)
:returns psi2: np.ndarray (N,M,M)
"""
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
slices1, slices2 = self._process_slices(slices1,slices2)
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
#compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2):
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
# compute the "cross" terms
#TODO: input_slices needed
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
target += p1.variance*(p2._psi1[:,:,None]+p2._psi1[:,None,:])
elif p2.name=='bias' and p1.name=='rbf':
target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
tmp = np.zeros((mu.shape[0],Z.shape[0]))
p2.psi1(Z,mu,S,tmp)
target += p1.variance*(tmp[:,:,None] + tmp[:,None,:])
elif p2.name=='bias' and p1.name=='linear':
tmp = np.zeros((mu.shape[0],Z.shape[0]))
p1.psi1(Z,mu,S,tmp)
target += p2.variance*(tmp[:,:,None] + tmp[:,None,:])
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
# rbf X bias
elif p1.name == 'bias' and p2.name == 'rbf':
target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :])
elif p2.name == 'bias' and p1.name == 'rbf':
target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z, mu, S, tmp)
target += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
elif p2.name == 'bias' and p1.name == 'linear':
tmp = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, tmp)
target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
"""Returns shape (N,M,M,Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2)
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
#compute the "cross" terms
#TODO: better looping
for i1, i2 in itertools.combinations(range(len(self.parts)),2):
p1,p2 = self.parts[i1], self.parts[i2]
ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
# compute the "cross" terms
# TODO: better looping, input_slices
for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
p1, p2 = self.parts[i1], self.parts[i2]
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2.,Z,mu,S,target[ps2])
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1*2.,Z,mu,S,target[ps1])
elif p2.name=='bias' and p1.name=='rbf':
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2.,Z,mu,S,target[ps1])
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1*2.,Z,mu,S,target[ps2])
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2., Z, mu, S, target[ps1])
elif p2.name=='bias' and p1.name=='linear':
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2., Z, mu, S, target[ps1])
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
# rbf X bias
elif p1.name == 'bias' and p2.name == 'rbf':
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1])
elif p2.name == 'bias' and p1.name == 'rbf':
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
p2.psi1(Z, mu, S, psi1)
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
elif p2.name == 'bias' and p1.name == 'linear':
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
p1.psi1(Z, mu, S, psi1)
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return self._transform_gradients(target)
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
slices1, slices2 = self._process_slices(slices1,slices2)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
target = np.zeros_like(Z)
[p.dpsi2_dZ(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
#compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2):
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
# compute the "cross" terms
#TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
p2.dpsi1_dX(dL_dpsi2.sum(1).T*p1.variance,Z,mu,S,target)
elif p2.name=='bias' and p1.name=='rbf':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance,Z,mu,S,target)
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
p2.dpsi1_dZ(dL_dpsi2.sum(1).T*p1.variance, Z, mu, S, target)
elif p2.name=='bias' and p1.name=='linear':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance, Z, mu, S, target)
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
# rbf X bias
elif p1.name == 'bias' and p2.name == 'rbf':
p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
elif p2.name == 'bias' and p1.name == 'rbf':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
elif p2.name == 'bias' and p1.name == 'linear':
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target * 2.
return target
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi2_dmuS(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2):
#white doesn;t combine with anything
if p1.name=='white' or p2.name=='white':
# compute the "cross" terms
#TODO: we need input_slices here.
for p1, p2 in itertools.combinations(self.parts, 2):
# white doesn;t combine with anything
if p1.name == 'white' or p2.name == 'white':
pass
#rbf X bias
elif p1.name=='bias' and p2.name=='rbf':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2.,Z,mu,S,target_mu,target_S)
elif p2.name=='bias' and p1.name=='rbf':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2.,Z,mu,S,target_mu,target_S)
#linear X bias
elif p1.name=='bias' and p2.name=='linear':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2., Z, mu, S, target_mu, target_S)
elif p2.name=='bias' and p1.name=='linear':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2., Z, mu, S, target_mu, target_S)
#rbf X linear
elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO
elif p2.name=='linear' and p1.name=='rbf':
raise NotImplementedError #TODO
# rbf X bias
elif p1.name == 'bias' and p2.name == 'rbf':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
elif p2.name == 'bias' and p1.name == 'rbf':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
# linear X bias
elif p1.name == 'bias' and p2.name == 'linear':
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
elif p2.name == 'bias' and p1.name == 'linear':
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
# rbf X linear
elif p1.name == 'linear' and p2.name == 'rbf':
raise NotImplementedError # TODO
elif p2.name == 'linear' and p1.name == 'rbf':
raise NotImplementedError # TODO
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return target_mu, target_S
def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None,*args,**kwargs):
if which_functions=='all':
which_functions = [True]*self.Nparts
def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs):
if which_functions == 'all':
which_functions = [True] * self.Nparts
if self.D == 1:
if x is None:
x = np.zeros((1,1))
x = np.zeros((1, 1))
else:
x = np.asarray(x)
assert x.size == 1, "The size of the fixed variable x is not 1"
x = x.reshape((1,1))
x = x.reshape((1, 1))
if plot_limits == None:
xmin, xmax = (x-5).flatten(), (x+5).flatten()
xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
elif len(plot_limits) == 2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
Xnew = np.linspace(xmin,xmax,resolution or 201)[:,None]
Kx = self.K(Xnew,x,slices2=which_functions)
pb.plot(Xnew,Kx,*args,**kwargs)
pb.xlim(xmin,xmax)
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
Kx = self.K(Xnew, x, slices2=which_functions)
pb.plot(Xnew, Kx, *args, **kwargs)
pb.xlim(xmin, xmax)
pb.xlabel("x")
pb.ylabel("k(x,%0.1f)" %x)
pb.ylabel("k(x,%0.1f)" % x)
elif self.D == 2:
if x is None:
x = np.zeros((1,2))
x = np.zeros((1, 2))
else:
x = np.asarray(x)
assert x.size == 2, "The size of the fixed variable x is not 2"
x = x.reshape((1,2))
x = x.reshape((1, 2))
if plot_limits == None:
xmin, xmax = (x-5).flatten(), (x+5).flatten()
xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
elif len(plot_limits) == 2:
xmin, xmax = plot_limits
else:
raise ValueError, "Bad limits for plotting"
resolution = resolution or 51
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
xg = np.linspace(xmin[0],xmax[0],resolution)
yg = np.linspace(xmin[1],xmax[1],resolution)
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
Kx = self.K(Xnew,x,slices2=which_functions)
Kx = Kx.reshape(resolution,resolution).T
pb.contour(xg,yg,Kx,vmin=Kx.min(),vmax=Kx.max(),cmap=pb.cm.jet,*args,**kwargs)
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution]
xg = np.linspace(xmin[0], xmax[0], resolution)
yg = np.linspace(xmin[1], xmax[1], resolution)
Xnew = np.vstack((xx.flatten(), yy.flatten())).T
Kx = self.K(Xnew, x, slices2=which_functions)
Kx = Kx.reshape(resolution, resolution).T
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1])
pb.xlabel("x1")
pb.ylabel("x2")
pb.title("k(x1,x2 ; %0.1f,%0.1f)" %(x[0,0],x[0,1]) )
pb.title("k(x1,x2 ; %0.1f,%0.1f)" % (x[0, 0], x[0, 1]))
else:
raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"

View file

@ -4,6 +4,7 @@
from kernpart import kernpart
import numpy as np
from ..util.linalg import tdot
class linear(kernpart):
"""
@ -65,8 +66,11 @@ class linear(kernpart):
def K(self,X,X2,target):
if self.ARD:
XX = X*np.sqrt(self.variances)
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
if X2 is None:
target += tdot(XX)
else:
XX2 = X2*np.sqrt(self.variances)
target += np.dot(XX, XX2.T)
else:
self._K_computations(X, X2)
target += self.variances * self._dot_product
@ -76,8 +80,11 @@ class linear(kernpart):
def dK_dtheta(self,dL_dK,X,X2,target):
if self.ARD:
product = X[:,None,:]*X2[None,:,:]
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
if X2 is None:
[np.add(target[i:i+1],np.sum(dL_dK*tdot(X[:,i:i+1])),target[i:i+1]) for i in range(self.D)]
else:
product = X[:,None,:]*X2[None,:,:]
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
else:
self._K_computations(X, X2)
target += np.sum(self._dot_product*dL_dK)
@ -114,7 +121,7 @@ class linear(kernpart):
def psi1(self,Z,mu,S,target):
"""the variance, it does nothing"""
self.K(mu,Z,target)
self._psi1 = self.K(mu, Z, target)
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
"""the variance, it does nothing"""
@ -133,9 +140,9 @@ class linear(kernpart):
returns N,M,M matrix
"""
self._psi_computations(Z,mu,S)
psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
target += psi2.sum(-1)
#TODO: this could be faster using np.tensordot
#psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
#target += psi2.sum(-1)
target += np.tensordot(self.ZZ[None,:,:,:]*np.square(self.variances),self.mu2_S[:, None, None, :],((3),(3))).squeeze().T
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S)
@ -156,28 +163,30 @@ class linear(kernpart):
self._psi_computations(Z,mu,S)
mu2_S = np.sum(self.mu2_S,0)# Q,
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
#TODO: tensordot would gain some time here
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self,X,X2):
if X2 is None:
X2 = X
if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
self._Xcache = X
self._X2cache = X2
self._dot_product = np.dot(X,X2.T)
else:
# print "Cache hit!"
pass # TODO: insert debug message here (logging framework)
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
self._Xcache = X.copy()
if X2 is None:
self._dot_product = tdot(X)
self._X2cache = None
else:
self._X2cache = X2.copy()
self._dot_product = np.dot(X,X2.T)
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
#Z has changed, compute Z specific stuff
self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self._Z = Z
#self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
self.ZZ = np.empty((Z.shape[0],Z.shape[0],Z.shape[1]),order='F')
[tdot(Z[:,i:i+1],self.ZZ[:,:,i].T) for i in xrange(Z.shape[1])]
self._Z = Z.copy()
if not (np.all(mu==self._mu) and np.all(S==self._S)):
self.mu2_S = np.square(mu)+S
self._mu, self._S = mu, S
self._mu, self._S = mu.copy(), S.copy()

View file

@ -40,9 +40,12 @@ class prod(kernpart):
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
if X2 is None:
target1 = np.zeros((X.shape[0],X2.shape[0]))
target2 = np.zeros((X.shape[0],X2.shape[0]))
else:
target1 = np.zeros((X.shape[0],X.shape[0]))
target2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X,X2,target1)
self.k2.K(X,X2,target2)
target += target1 * target2

View file

@ -21,41 +21,35 @@ class prod_orthogonal(kernpart):
self.name = k1.name + '<times>' + k2.name
self.k1 = k1
self.k2 = k2
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
def _get_params(self):
"""return the value of the parameters."""
return self.params
return np.hstack((self.k1._get_params(), self.k2._get_params()))
def _set_params(self,x):
"""set the value of the parameters."""
self.k1._set_params(x[:self.k1.Nparam])
self.k2._set_params(x[self.k1.Nparam:])
self.params = x
def _get_param_names(self):
"""return parameter names."""
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
target1 = np.zeros_like(target)
target2 = np.zeros_like(target)
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
target += target1 * target2
self._K_computations(X,X2)
target += self._K1 * self._K2
def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
self._K_computations(X,X2)
if X2 is None:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], None, target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], None, target[self.k1.Nparam:])
else:
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
@ -75,14 +69,9 @@ class prod_orthogonal(kernpart):
def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0]))
K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
self._K_computations(X,X2)
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0])
@ -93,3 +82,20 @@ class prod_orthogonal(kernpart):
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target)
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target)
def _K_computations(self,X,X2):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params == self._get_params().copy()
if X2 is None:
self._X2 = None
self._K1 = np.zeros((X.shape[0],X.shape[0]))
self._K2 = np.zeros((X.shape[0],X.shape[0]))
self.k1.K(X[:,:self.k1.D],None,self._K1)
self.k2.K(X[:,self.k1.D:],None,self._K2)
else:
self._X2 = X2.copy()
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)

View file

@ -6,6 +6,7 @@ from kernpart import kernpart
import numpy as np
import hashlib
from scipy import weave
from ..util.linalg import tdot
class rbf(kernpart):
"""
@ -74,11 +75,8 @@ class rbf(kernpart):
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
def K(self,X,X2,target):
if X2 is None:
X2 = X
self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target)
target += self.variance*self._K_dvar
def Kdiag(self,X,target):
np.add(target,self.variance,target)
@ -87,6 +85,7 @@ class rbf(kernpart):
self._K_computations(X,X2)
target[0] += np.sum(self._K_dvar*dL_dK)
if self.ARD:
if X2 is None: X2 = X
[np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
else:
target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
@ -182,29 +181,31 @@ class rbf(kernpart):
#---------------------------------------#
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._X2 = X2.copy()
self._params == self._get_params().copy()
if X2 is None: X2 = X
#never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
#_K_dist = X[:,None,:]-X2[None,:,:]
#_K_dist2 = np.square(_K_dist/self.lengthscale)
X = X/self.lengthscale
X2 = X2/self.lengthscale
self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
if X2 is None:
self._X2 = None
X = X/self.lengthscale
Xsquare = np.sum(np.square(X),1)
self._K_dist2 = (-2.*tdot(X) + Xsquare[:,None] + Xsquare[None,:])
else:
self._X2 = X2.copy()
X = X/self.lengthscale
X2 = X2/self.lengthscale
self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
self._K_dvar = np.exp(-0.5*self._K_dist2)
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
if not np.array_equal(Z, self._Z):
#Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
self._Z = Z
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
#something's changed. recompute EVERYTHING
#psi1

View file

@ -30,17 +30,15 @@ class white(kernpart):
return ['variance']
def K(self,X,X2,target):
if X.shape==X2.shape:
if np.all(X==X2):
np.add(target,np.eye(X.shape[0])*self.variance,target)
if X2 is None:
target += np.eye(X.shape[0])*self.variance
def Kdiag(self,X,target):
target += self.variance
def dK_dtheta(self,dL_dK,X,X2,target):
if X.shape==X2.shape:
if np.all(X==X2):
target += np.trace(dL_dK)
if X2 is None:
target += np.trace(dL_dK)
def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(dL_dKdiag)

View file

@ -2,19 +2,30 @@ import numpy as np
from likelihood import likelihood
class Gaussian(likelihood):
"""
Likelihood class for doing Expectation propagation
:param Y: observed output (Nx1 numpy.darray)
..Note:: Y values allowed depend on the likelihood_function used
:param variance :
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
:type normalize: False|True
"""
def __init__(self,data,variance=1.,normalize=False):
self.is_heteroscedastic = False
self.Nparams = 1
self.Z = 0. # a correction factor which accounts for the approximation made
N, self.D = data.shape
#normaliztion
#normalization
if normalize:
self._mean = data.mean(0)[None,:]
self._std = data.std(0)[None,:]
self._bias = data.mean(0)[None,:]
self._scale = data.std(0)[None,:]
# Don't scale outputs which have zero variance to zero.
self._scale[np.nonzero(self._scale==0.)] = 1.0e-3
else:
self._mean = np.zeros((1,self.D))
self._std = np.ones((1,self.D))
self._bias = np.zeros((1,self.D))
self._scale = np.ones((1,self.D))
self.set_data(data)
@ -24,13 +35,13 @@ class Gaussian(likelihood):
self.data = data
self.N,D = data.shape
assert D == self.D
self.Y = (self.data - self._mean)/self._std
self.Y = (self.data - self._bias)/self._scale
if D > self.N:
self.YYT = np.dot(self.Y,self.Y.T)
self.trYYT = np.trace(self.YYT)
else:
self.YYT = None
self.trYYT = None
self.trYYT = np.sum(np.square(self.Y))
def _get_params(self):
return np.asarray(self._variance)
@ -47,19 +58,19 @@ class Gaussian(likelihood):
"""
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
"""
mean = mu*self._std + self._mean
mean = mu*self._scale + self._bias
if full_cov:
if self.D >1:
raise NotImplementedError, "TODO"
#Note. for D>1, we need to re-normalise all the outputs independently.
# This will mess up computations of diag(true_var), below.
#note that the upper, lower quantiles should be the same shape as mean
true_var = (var + np.eye(var.shape[0])*self._variance)*self._std**2
_5pc = mean + - 2.*np.sqrt(np.diag(true_var))
true_var = (var + np.eye(var.shape[0])*self._variance)*self._scale**2
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
else:
true_var = (var + self._variance)*self._std**2
_5pc = mean + - 2.*np.sqrt(true_var)
true_var = (var + self._variance)*self._scale**2
_5pc = mean - 2.*np.sqrt(true_var)
_95pc = mean + 2.*np.sqrt(true_var)
return mean, true_var, _5pc, _95pc

View file

@ -9,6 +9,10 @@ from sparse_GP import sparse_GP
from GPy.util.linalg import pdinv
from ..likelihoods import Gaussian
from .. import kern
from numpy.linalg.linalg import LinAlgError
import itertools
from matplotlib.colors import colorConverter
from matplotlib.figure import SubplotParams
class Bayesian_GPLVM(sparse_GP, GPLVM):
"""
@ -22,12 +26,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
:type init: 'PCA'|'random'
"""
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
Z=None, kernel=None, oldpsave=5, _debug=False,
**kwargs):
if X == None:
X = self.initialise_latent(init, Q, Y)
if X_variance is None:
X_variance = np.ones_like(X) * 0.5
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
if Z is None:
Z = np.random.permutation(X.copy())[:M]
@ -36,9 +42,30 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
if kernel is None:
kernel = kern.rbf(Q) + kern.white(Q)
self.oldpsave = oldpsave
self._oldps = []
self._debug = _debug
if self._debug:
self.f_call = 0
self._count = itertools.count()
self._savedklll = []
self._savedparams = []
self._savedgradients = []
self._savederrors = []
self._savedpsiKmm = []
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
@property
def oldps(self):
return self._oldps
@oldps.setter
def oldps(self, p):
if len(self._oldps) == (self.oldpsave + 1):
self._oldps.pop()
# if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
self._oldps.insert(0, p.copy())
def _get_param_names(self):
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
@ -54,17 +81,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
===============================================================
"""
return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
def _set_params(self, x):
N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N, Q).copy()
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
sparse_GP._set_params(self, x[(2 * N * Q):])
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
return x
def _set_params(self, x, save_old=True, save_count=0):
try:
N, Q = self.N, self.Q
self.X = x[:self.X.size].reshape(N, Q).copy()
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
sparse_GP._set_params(self, x[(2 * N * Q):])
self.oldps = x
except (LinAlgError, FloatingPointError, ZeroDivisionError):
print "\rWARNING: Caught LinAlgError, continueing without setting "
if self._debug:
self._savederrors.append(self.f_call)
# if save_count > 10:
# raise
# self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
def dKL_dmuS(self):
dKL_dS = (1. - (1. / self.X_variance)) * 0.5
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
dKL_dmu = self.X
return dKL_dmu, dKL_dS
@ -83,13 +119,40 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
def log_likelihood(self):
return sparse_GP.log_likelihood(self) - self.KL_divergence()
ll = sparse_GP.log_likelihood(self)
kl = self.KL_divergence()
# if ll < -2E4:
# ll = -2E4 + np.random.randn()
# if kl > 5E4:
# kl = 5E4 + np.random.randn()
if self._debug:
self.f_call = self._count.next()
if self.f_call % 1 == 0:
self._savedklll.append([self.f_call, ll, kl])
self._savedparams.append([self.f_call, self._get_params()])
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
# print "\nkl:", kl, "ll:", ll
return ll - kl
def _log_likelihood_gradients(self):
dKL_dmu, dKL_dS = self.dKL_dmuS()
dL_dmu, dL_dS = self.dL_dmuS()
# TODO: find way to make faster
dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
d_dmu = (dL_dmu - dKL_dmu).flatten()
d_dS = (dL_dS - dKL_dS).flatten()
# TEST KL: ====================
# d_dmu = (dKL_dmu).flatten()
# d_dS = (dKL_dS).flatten()
# ========================
# TEST L: ====================
# d_dmu = (dL_dmu).flatten()
# d_dS = (dL_dS).flatten()
# ========================
dbound_dmuS = np.hstack((d_dmu, d_dS))
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
def plot_latent(self, which_indices=None, *args, **kwargs):
@ -104,3 +167,287 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
return ax
def plot_X_1d(self, fig=None, axes=None, fig_num="MRD X 1d", colors=None):
"""
Plot latent space X in 1D:
-if fig is given, create Q subplots in fig and plot in these
-if axes is given plot Q 1D latent space plots of X into each `axis`
-if neither fig nor axes is given create a figure with fig_num and plot in there
colors:
colors of different latent space dimensions Q
"""
import pylab
if fig is None and axes is None:
fig = pylab.figure(num=fig_num, figsize=(8, min(12, (2 * self.X.shape[1]))))
if colors is None:
colors = pylab.gca()._get_lines.color_cycle
pylab.clf()
else:
colors = iter(colors)
plots = []
for i in range(self.X.shape[1]):
if axes is None:
ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
else:
ax = axes[i]
ax.plot(self.X, c='k', alpha=.3)
plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
ax.fill_between(np.arange(self.X.shape[0]),
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
facecolor=plots[-1].get_color(),
alpha=.3)
ax.legend(borderaxespad=0.)
if i < self.X.shape[1] - 1:
ax.set_xticklabels('')
pylab.draw()
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig
def _debug_filter_params(self, x):
start, end = 0, self.X.size,
X = x[start:end].reshape(self.N, self.Q)
start, end = end, end + self.X_variance.size
X_v = x[start:end].reshape(self.N, self.Q)
start, end = end, end + (self.M * self.Q)
Z = x[start:end].reshape(self.M, self.Q)
start, end = end, end + self.Q
theta = x[start:]
return X, X_v, Z, theta
def _debug_get_axis(self, figs):
if figs[-1].axes:
ax1 = figs[-1].axes[0]
ax1.cla()
else:
ax1 = figs[-1].add_subplot(111)
return ax1
def _debug_plot(self):
assert self._debug, "must enable _debug, to debug-plot"
import pylab
# from mpl_toolkits.mplot3d import Axes3D
figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
# fig.clf()
# log like
# splotshape = (6, 4)
# ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
ax1 = self._debug_get_axis(figs)
ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
ha='center', va='center')
kllls = np.array(self._savedklll)
LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
param_dict = dict(self._savedparams)
gradient_dict = dict(self._savedgradients)
kmm_dict = dict(self._savedpsiKmm)
iters = np.array(param_dict.keys())
self.showing = 0
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
ax2 = self._debug_get_axis(figs)
ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
# ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
ax3 = self._debug_get_axis(figs)
ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
# ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
ax4 = self._debug_get_axis(figs)
ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
# ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
ax5 = self._debug_get_axis(figs)
ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
ha='center', va='center')
figs[-1].canvas.draw()
figs[-1].tight_layout(rect=(0, 0, 1, .9))
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
fig = figs[-1]
ax6 = fig.add_subplot(121)
ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
ha='center', va='center')
ax7 = fig.add_subplot(122)
ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
ha='center', va='center')
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
quiver_units = 'xy'
quiver_scale = 1
quiver_scale_units = 'xy'
Xlatentplts = ax2.plot(X, ls="-", marker="x")
colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
Ulatent = np.zeros_like(X)
xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
Slatentplts = ax3.plot(S, ls="-", marker="x")
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
UZ = np.zeros_like(Z)
Zplts = ax4.plot(Z, ls="-", marker="x")
Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale)
xtheta = np.arange(len(theta))
Utheta = np.zeros_like(theta)
thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
units=quiver_units, scale_units=quiver_scale_units,
scale=quiver_scale,
edgecolors=('k',), linewidths=[1])
pylab.setp(thetaplts, zorder=0)
pylab.setp(thetagrads, zorder=10)
ax5.set_xticks(np.arange(len(theta)))
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
imkmm = ax6.imshow(kmm_dict[self.showing][0])
from mpl_toolkits.axes_grid1 import make_axes_locatable
divider = make_axes_locatable(ax6)
caxkmm = divider.append_axes("right", "5%", pad="1%")
cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
divider = make_axes_locatable(ax7)
caxkmmdl = divider.append_axes("right", "5%", pad="1%")
cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
# Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
# loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
# borderaxespad=0, mode="expand")
ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.01, 1, 1.01),
borderaxespad=0, mode="expand")
Lleg = ax1.legend()
Lleg.draggable()
# ax1.add_artist(Qleg)
indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
# for err in self._savederrors:
# if err < kllls.shape[0]:
# ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
# ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
# ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
# try:
# for f in figs:
# f.canvas.draw()
# f.tight_layout(box=(0, .15, 1, .9))
# # pylab.draw()
# # pylab.tight_layout(box=(0, .1, 1, .9))
# except:
# pass
# parameter changes
# ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
button_options = [0, 0] # [0]: clicked -- [1]: dragged
def update_plots(event):
if button_options[0] and not button_options[1]:
# event.button, event.x, event.y, event.xdata, event.ydata)
tmp = np.abs(iters - event.xdata)
closest_hit = iters[tmp == tmp.min()][0]
if closest_hit != self.showing:
self.showing = closest_hit
# print closest_hit, iters, event.xdata
indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
indicatorKL.set_data(self.showing, kllls[self.showing, 2])
indicatorL.set_data(self.showing, kllls[self.showing, 1])
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
for i, Xlatent in enumerate(Xlatentplts):
Xlatent.set_ydata(X[:, i])
Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
Xlatentgrads.set_UVC(Ulatent, Xg)
for i, Slatent in enumerate(Slatentplts):
Slatent.set_ydata(S[:, i])
Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
Slatentgrads.set_UVC(Ulatent, Sg)
for i, Zlatent in enumerate(Zplts):
Zlatent.set_ydata(Z[:, i])
Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
Zgrads.set_UVC(UZ, Zg)
for p, t in zip(thetaplts, theta):
p.set_height(t)
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
thetagrads.set_UVC(Utheta, thetag)
imkmm.set_data(kmm_dict[self.showing][0])
imkmm.autoscale()
cbarkmm.update_normal(imkmm)
imkmmdl.set_data(kmm_dict[self.showing][1])
imkmmdl.autoscale()
cbarkmmdl.update_normal(imkmmdl)
ax2.relim()
ax3.relim()
ax4.relim()
ax5.relim()
ax2.autoscale()
ax3.autoscale()
ax4.autoscale()
ax5.autoscale()
[fig.canvas.draw() for fig in figs]
button_options[0] = 0
button_options[1] = 0
def onclick(event):
if event.inaxes is ax1 and event.button == 1:
button_options[0] = 1
def motion(event):
if button_options[0]:
button_options[1] = 1
cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
return ax1, ax2, ax3, ax4, ax5, ax6, ax7

View file

@ -6,9 +6,15 @@ import numpy as np
import pylab as pb
from .. import kern
from ..core import model
<<<<<<< HEAD
from ..util.linalg import pdinv,mdot
from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
from ..likelihoods import EP, Laplace
=======
from ..util.linalg import pdinv, mdot
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
from ..likelihoods import EP
>>>>>>> upstream/devel
class GP(model):
"""
@ -19,9 +25,6 @@ class GP(model):
:parm likelihood: a GPy likelihood
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
:rtype: model object
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
@ -30,30 +33,29 @@ class GP(model):
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
def __init__(self, X, likelihood, kernel, normalize_X=False):
# parse arguments
self.Xslices = Xslices
self.X = X
assert len(self.X.shape)==2
assert len(self.X.shape) == 2
self.N, self.Q = self.X.shape
assert isinstance(kernel, kern.kern)
self.kern = kernel
#here's some simple normalization for the inputs
# here's some simple normalization for the inputs
if normalize_X:
self._Xmean = X.mean(0)[None,:]
self._Xstd = X.std(0)[None,:]
self._Xmean = X.mean(0)[None, :]
self._Xstd = X.std(0)[None, :]
self.X = (X.copy() - self._Xmean) / self._Xstd
if hasattr(self,'Z'):
if hasattr(self, 'Z'):
self.Z = (self.Z - self._Xmean) / self._Xstd
else:
self._Xmean = np.zeros((1,self.X.shape[1]))
self._Xstd = np.ones((1,self.X.shape[1]))
self._Xmean = np.zeros((1, self.X.shape[1]))
self._Xstd = np.ones((1, self.X.shape[1]))
self.likelihood = likelihood
#assert self.X.shape[0] == self.likelihood.Y.shape[0]
#self.N, self.D = self.likelihood.Y.shape
# assert self.X.shape[0] == self.likelihood.Y.shape[0]
# self.N, self.D = self.likelihood.Y.shape
assert self.X.shape[0] == self.likelihood.data.shape[0]
self.N, self.D = self.likelihood.data.shape
@ -65,24 +67,24 @@ class GP(model):
"""
return np.zeros_like(self.Z)
def _set_params(self,p):
self.kern._set_params_transformed(p[:self.kern.Nparam])
#self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
def _set_params(self, p):
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
# self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices)
self.K = self.kern.K(self.X)
self.K += self.likelihood.covariance_matrix
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
#the gradient of the likelihood wrt the covariance matrix
# the gradient of the likelihood wrt the covariance matrix
if self.likelihood.YYT is None:
alpha = np.dot(self.Ki,self.likelihood.Y)
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
alpha = np.dot(self.Ki, self.likelihood.Y)
self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki)
else:
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
def _get_params(self):
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
@ -94,20 +96,20 @@ class GP(model):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
For a Gaussian likelihood, no iteration is required:
this function does nothing
"""
self.likelihood.fit_full(self.kern.K(self.X))
self._set_params(self._get_params()) # update the GP
self._set_params(self._get_params()) # update the GP
def _model_fit_term(self):
"""
Computes the model fit using YYT if it's available
"""
if self.likelihood.YYT is None:
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
else:
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
def log_likelihood(self):
"""
@ -117,16 +119,14 @@ class GP(model):
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
"""
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
def _log_likelihood_gradients(self):
"""
The gradient of all parameters.
For the kernel parameters, use the chain rule via dL_dK
For the likelihood parameters, pass in alpha = K^-1 y
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
"""
dL_dthetaK = self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X, slices1=self.Xslices, slices2=self.Xslices)
if isinstance(self.likelihood, Laplace):
@ -141,26 +141,31 @@ class GP(model):
else:
dL_dthetaL = self.likelihood._gradients(partial=np.diag(self.dL_dK))
return np.hstack((dL_dthetaK, dL_dthetaL))
#return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self,_Xnew,slices=None, full_cov=False):
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False):
"""
Internal helper function for making predictions, does not account
for normalization or likelihood
#TODO: which_parts does nothing
"""
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
KiKx = np.dot(self.Ki,Kx)
Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts)
mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y)
KiKx = np.dot(self.Ki, Kx)
if full_cov:
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
var = Kxx - np.dot(KiKx.T,Kx)
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
var = Kxx - np.dot(KiKx.T, Kx)
else:
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
var = var[:,None]
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
var = var[:, None]
return mu, var
def predict(self,Xnew, slices=None, full_cov=False):
def predict(self, Xnew, which_parts='all', full_cov=False):
"""
Predict the function(s) at the new point(s) Xnew.
@ -168,35 +173,30 @@ class GP(model):
---------
:param Xnew: The points at which to make a prediction
:type Xnew: np.ndarray, Nnew x self.Q
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
:type slices: (None, list of slice objects, list of ints)
:param which_parts: specifies which outputs kernel(s) to use in prediction
:type which_parts: ('all', list of bools)
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
:type full_cov: bool
:rtype: posterior mean, a Numpy array, Nnew x self.D
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
- If None, the new points covary throigh every kernel part (default)
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
- If a list of booleans, specifying which kernel parts are active
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
This is to allow for different normalizations of the output dimensions.
"""
#normalize X values
# normalize X values
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
mu, var = self._raw_predict(Xnew, slices, full_cov)
mu, var = self._raw_predict(Xnew, which_parts, full_cov)
#now push through likelihood TODO
# now push through likelihood
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
return mean, var, _025pm, _975pm
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
"""
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
@ -204,8 +204,8 @@ class GP(model):
:param which_data: which if the training data to plot (default all)
:type which_data: 'all' or a slice object to slice self.X, self.Y
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
:param which_functions: which of the kernel functions to plot (additively)
:type which_functions: list of bools
:param which_parts: which of the kernel functions to plot (additively)
:type which_parts: 'all', or list of bools
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
Plot the posterior of the GP.
@ -216,86 +216,86 @@ class GP(model):
Can plot only part of the data and part of the posterior functions using which_data and which_functions
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
"""
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
if which_data == 'all':
which_data = slice(None)
if self.X.shape[1] == 1:
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
if samples == 0:
m,v = self._raw_predict(Xnew, slices=which_functions)
gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
else:
m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
for i in range(samples):
pb.plot(Xnew,Ysim[i,:],Tango.colorsHex['darkBlue'],linewidth=0.25)
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
pb.xlim(xmin,xmax)
ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
pb.ylim(ymin,ymax)
if hasattr(self,'Z'):
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
pb.xlim(xmin, xmax)
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
pb.ylim(ymin, ymax)
if hasattr(self, 'Z'):
pb.plot(self.Z, self.Z * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
elif self.X.shape[1] == 2:
resolution = resolution or 50
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
m,v = self._raw_predict(Xnew, slices=which_functions)
m = m.reshape(resolution,resolution).T
pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
m, v = self._raw_predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1])
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,levels=20):
def plot(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, levels=20):
"""
TODO: Docstrings!
:param levels: for 2D plotting, the number of contour levels to use
"""
# TODO include samples
if which_functions=='all':
which_functions = [True]*self.kern.Nparts
if which_data=='all':
if which_data == 'all':
which_data = slice(None)
if self.X.shape[1] == 1:
Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
gpplot(Xnew,m, lower, upper)
pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
pb.xlim(xmin,xmax)
pb.ylim(ymin,ymax)
if hasattr(self,'Z'):
Zu = self.Z*self._Xstd + self._Xmean
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
gpplot(Xnew, m, lower, upper)
pb.plot(Xu[which_data], self.likelihood.data[which_data], 'kx', mew=1.5)
if self.has_uncertain_inputs:
pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
elif self.X.shape[1]==2: #FIXME
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
pb.xlim(xmin, xmax)
pb.ylim(ymin, ymax)
if hasattr(self, 'Z'):
Zu = self.Z * self._Xstd + self._Xmean
pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
# pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
elif self.X.shape[1] == 2: # FIXME
resolution = resolution or 50
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
m = m.reshape(resolution,resolution).T
pb.contour(x,y,m,levels,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
m = m.reshape(resolution, resolution).T
pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
Yf = self.likelihood.Y.flatten()
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1])
if hasattr(self,'Z'):
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0], xmax[0])
pb.ylim(xmin[1], xmax[1])
if hasattr(self, 'Z'):
pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -1,4 +1,4 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
@ -24,12 +24,12 @@ class GPLVM(GP):
:type init: 'PCA'|'random'
"""
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False, **kwargs):
if X is None:
X = self.initialise_latent(init, Q, Y)
if kernel is None:
kernel = kern.rbf(Q) + kern.bias(Q)
likelihood = Gaussian(Y)
likelihood = Gaussian(Y, normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, **kwargs)
def initialise_latent(self, init, Q, Y):
@ -91,8 +91,8 @@ class GPLVM(GP):
Xtest_full[:, :2] = Xtest
mu, var, low, up = self.predict(Xtest_full)
var = var[:, :1]
ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
ax.imshow(var.reshape(resolution, resolution).T,
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
for i,ul in enumerate(np.unique(labels)):
if type(ul) is np.string_:

View file

@ -11,26 +11,24 @@ class GP_regression(GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the GP class, with a set of sensible defalts
This is a thin wrapper around the models.GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
:param kernel: a GPy kernel, defaults to rbf+white
:param kernel: a GPy kernel, defaults to rbf
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)

View file

@ -9,7 +9,6 @@ from sparse_GP_regression import sparse_GP_regression
from GPLVM import GPLVM
from warped_GP import warpedGP
from sparse_GPLVM import sparse_GPLVM
from uncollapsed_sparse_GP import uncollapsed_sparse_GP
from Bayesian_GPLVM import Bayesian_GPLVM
from mrd import MRD
from generalized_FITC import generalized_FITC

View file

@ -23,20 +23,19 @@ class generalized_FITC(sparse_GP):
:type X_variance: np.ndarray (N x Q) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.Z = Z
self.M = self.Z.shape[0]
self._precision = likelihood.precision
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False)
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
def _set_params(self, p):
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
@ -145,7 +144,7 @@ class generalized_FITC(sparse_GP):
D = 0.5*np.trace(self.Cpsi1VVpsi1)
return A+C+D
def _raw_predict(self, Xnew, slices, full_cov=False):
def _raw_predict(self, Xnew, which_parts, full_cov=False):
if self.likelihood.is_heteroscedastic:
"""
Make a prediction for the generalized FITC model
@ -174,16 +173,16 @@ class generalized_FITC(sparse_GP):
self.mu_H = mu_H
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
Kx = self.kern.K(self.Z, Xnew)
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
KR0T = np.dot(Kx.T,self.Lmi.T)
mu_star = np.dot(KR0T,mu_H)
if full_cov:
Kxx = self.kern.K(Xnew)
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
else:
Kxx = self.kern.Kdiag(Xnew)
Kxx_ = self.kern.K(Xnew)
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
return mu_star[:,None],var
else:

View file

@ -271,90 +271,52 @@ class MRD(model):
self.Z = Z
return Z
def plot_X_1d(self, colors=None):
fig = pylab.figure(num="MRD X 1d", figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
fig.clf()
ax1 = fig.add_subplot(self.X.shape[1], 1, 1)
if colors is None:
colors = ax1._get_lines.color_cycle
ax1.plot(self.X, c='k', alpha=.3)
plots = ax1.plot(self.X.T[0], c=colors.next())
ax1.fill_between(numpy.arange(self.X.shape[0]),
self.X.T[0] - 2 * numpy.sqrt(self.gref.X_variance.T[0]),
self.X.T[0] + 2 * numpy.sqrt(self.gref.X_variance.T[0]),
facecolor=plots[-1].get_color(),
alpha=.3)
ax1.text(1, 1, r"$\mathbf{{X_{}}}".format(1),
horizontalalignment='right',
verticalalignment='top',
transform=ax1.transAxes)
for i in range(self.X.shape[1] - 1):
ax = fig.add_subplot(self.X.shape[1], 1, i + 2)
ax.plot(self.X, c='k', alpha=.3)
plots.extend(ax.plot(self.X.T[i + 1], c=colors.next()))
ax.fill_between(numpy.arange(self.X.shape[0]),
self.X.T[i + 1] - 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
self.X.T[i + 1] + 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
facecolor=plots[-1].get_color(),
alpha=.3)
if i < self.X.shape[1] - 2:
ax.set_xticklabels('')
ax1.set_xticklabels('')
# ax1.legend(plots, [r"$\mathbf{{X_{}}}$".format(i + 1) for i in range(self.X.shape[1])],
# bbox_to_anchor=(0., 1 + .01 * self.X.shape[1],
# 1., 1. + .01 * self.X.shape[1]), loc=3,
# ncol=self.X.shape[1], mode="expand", borderaxespad=0.)
def _handle_plotting(self, fig_num, axes, plotf):
if axes is None:
fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms)))
for i, g in enumerate(self.bgplvms):
if axes is None:
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
else:
ax = axes[i]
plotf(i, g, ax)
pylab.draw()
fig.tight_layout(h_pad=.01, rect=(0, 0, 1, .95))
if axes is None:
fig.tight_layout()
return fig
else:
return pylab.gcf()
def plot_X(self, fig_num="MRD Predictions", axes=None):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
return fig
def plot_X(self):
fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
ax.imshow(g.X)
pylab.draw()
fig.tight_layout()
def plot_predict(self, fig_num="MRD Predictions", axes=None):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0]))
return fig
def plot_predict(self):
fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
ax.imshow(g.predict(g.X)[0])
pylab.draw()
fig.tight_layout()
def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
return fig
def plot_scales(self, *args, **kwargs):
fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
g.kern.plot_ARD(ax=ax, *args, **kwargs)
pylab.draw()
fig.tight_layout()
return fig
def plot_latent(self, *args, **kwargs):
fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
fig.clf()
for i, g in enumerate(self.bgplvms):
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
g.plot_latent(ax=ax, *args, **kwargs)
pylab.draw()
fig.tight_layout()
def plot_latent(self, fig_num="MRD Latent Spaces", axes=None, *args, **kwargs):
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
return fig
def _debug_plot(self):
self.plot_X()
self.plot_X_1d()
self.plot_latent()
self.plot_scales()
fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
fig.clf()
axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
self.plot_X(axes=axes)
axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
self.plot_latent(axes=axes)
axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
self.plot_scales(axes=axes)
pylab.draw()
fig.tight_layout()
def _debug_optimize(self, opt='scg', maxiters=500, itersteps=10):
def _debug_optimize(self, opt='scg', maxiters=5000, itersteps=10):
iters = 0
optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
self._debug_plot()

View file

@ -3,16 +3,12 @@
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot, tdot
from ..util.plot import gpplot
from .. import kern
from GP import GP
from scipy import linalg
#Still TODO:
# make use of slices properly (kernel can now do this)
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
class sparse_GP(GP):
"""
Variational sparse GP model
@ -27,19 +23,16 @@ class sparse_GP(GP):
:type X_variance: np.ndarray (N x Q) | None
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
:type M: int
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_(X|Y): bool
"""
def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
self.auto_scale_factor = False
self.Z = Z
self.Zslices = Zslices
self.Xslices = Xslices
self.M = Z.shape[0]
self.likelihood = likelihood
@ -50,10 +43,7 @@ class sparse_GP(GP):
self.has_uncertain_inputs=True
self.X_variance = X_variance
if not self.likelihood.is_heteroscedastic:
self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here?
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
#normalize X uncertainty also
if self.has_uncertain_inputs:
@ -68,13 +58,12 @@ class sparse_GP(GP):
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
self.psi0 = self.kern.Kdiag(self.X)
self.psi1 = self.kern.K(self.Z,self.X)
self.psi2 = None
def _computations(self):
#TODO: find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough)
sf = self.scale_factor
sf2 = sf**2
@ -86,13 +75,15 @@ class sparse_GP(GP):
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp)
else:
if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
#self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2_beta_scaled = tdot(tmp)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
@ -101,39 +92,48 @@ class sparse_GP(GP):
#Compute A = L^-1 psi2 beta L^-T
#self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0]
self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asarray(tmp.T,order='F'),lower=1)[0]
self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0]
self.B = np.eye(self.M)/sf2 + self.A
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
self.psi1V = np.dot(self.psi1, self.V)
#tmp = np.dot(self.Lmi.T, self.LBi.T)
tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
self.Cpsi1V = np.dot(self.C,self.psi1V)
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.Bi),lower=1,trans=1)[0]
self.C = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0]
#back substutue C into psi1V
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
#self.Cpsi1V = np.dot(self.C,self.psi1V)
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
#self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
self.E = tdot(self.Cpsi1V/sf)
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
if self.likelihood.is_heteroscedastic:
if self.has_uncertain_inputs:
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
#self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
self.dL_dpsi2 = 0.5*self.likelihood.precision[:,None,None]*(self.D*(self.Kmmi - self.C/sf2) -self.E)[None,:,:]
else:
self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
#self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
#self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
#self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi1 += np.dot(self.Kmmi - self.C/sf2 -self.E,self.psi1*self.likelihood.precision.reshape(1,self.N))
self.dL_dpsi2 = None
else:
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
#self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
#self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
self.dL_dpsi2 = 0.5*self.likelihood.precision*(self.D*(self.Kmmi - self.C/sf2) -self.E)
if self.has_uncertain_inputs:
#repeat for each of the N psi_2 matrices
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
@ -146,9 +146,11 @@ class sparse_GP(GP):
#self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
#self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.B),lower=1,trans=1)[0]
self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
self.dL_dKmm += 0.5*(self.D*self.C/sf2 + self.E) +tmp # d(C+D)
#the partial derivative vector for the likelihood
if self.likelihood.Nparams ==0:
@ -162,7 +164,7 @@ class sparse_GP(GP):
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
else:
#likelihood is not heterscedatic
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
@ -194,6 +196,7 @@ class sparse_GP(GP):
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
# else:
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
#self.scale_factor = 1.
self._computations()
def _get_params(self):
@ -239,24 +242,24 @@ class sparse_GP(GP):
"""
The derivative of the bound wrt the inducing inputs Z
"""
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
if self.has_uncertain_inputs:
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance)
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_variance) # 'stripes'
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
else:
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
return dL_dZ
def _raw_predict(self, Xnew, slices, full_cov=False):
def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(self.Z, Xnew)
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
if full_cov:
Kxx = self.kern.K(Xnew)
Kxx = self.kern.K(Xnew,which_parts=which_parts)
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
else:
Kxx = self.kern.Kdiag(Xnew)
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None]

View file

@ -13,7 +13,7 @@ class sparse_GP_regression(sparse_GP):
"""
Gaussian Process model for regression
This is a thin wrapper around the GP class, with a set of sensible defalts
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
:param X: input observations
:param Y: observed values
@ -22,25 +22,25 @@ class sparse_GP_regression(sparse_GP):
:type normalize_X: False|True
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
:type normalize_Y: False|True
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
:rtype: model object
.. Note:: Multiple independent outputs are allowed using columns of Y
"""
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
#kern defaults to rbf
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
#kern defaults to rbf (plus white for stability)
if kernel is None:
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
#Z defaults to a subset of the data
if Z is None:
Z = np.random.permutation(X.copy())[:M]
i = np.random.permutation(X.shape[0])[:M]
Z = X[i].copy()
else:
assert Z.shape[1]==X.shape[1]
#likelihood defaults to Gaussian
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X)

View file

@ -1,151 +0,0 @@
# Copyright (c) 2012 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
import pylab as pb
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
from .. import kern
from ..likelihoods import likelihood
from sparse_GP import sparse_GP
class uncollapsed_sparse_GP(sparse_GP):
"""
Variational sparse GP model (Regression), where the approximating distribution q(u) is represented explicitly
:param X: inputs
:type X: np.ndarray (N x Q)
:param likelihood: GPy likelihood class, containing observed data
:param q_u: canonical parameters of the distribution squasehd into a 1D array
:type q_u: np.ndarray
:param kernel : the kernel/covariance function. See link kernels
:type kernel: a GPy kernel
:param Z: inducing inputs (optional, see note)
:type Z: np.ndarray (M x Q) | None
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
:param normalize_X : whether to normalize the data before computing (predictions will be in original scales)
:type normalize_X: bool
"""
def __init__(self, X, likelihood, kernel, Z, q_u=None, **kwargs):
self.M = Z.shape[0]
if q_u is None:
q_u = np.hstack((np.random.randn(self.M*likelihood.D),-0.5*np.eye(self.M).flatten()))
self.likelihood = likelihood
self.set_vb_param(q_u)
sparse_GP.__init__(self, X, likelihood, kernel, Z, **kwargs)
def _computations(self):
# kernel computations, using BGPLVM notation
self.Kmm = self.kern.K(self.Z)
if self.has_uncertain_inputs:
raise NotImplementedError
else:
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
self.psi1 = self.kern.K(self.Z,self.X)
if self.likelihood.is_heteroscedastic:
raise NotImplementedError
else:
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
self.V = self.likelihood.precision*self.Y
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
self.psi1V = np.dot(self.psi1, self.V)
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
self.B = np.eye(self.M) + self.A
self.Lambda = mdot(self.Lmi.T,self.B,self.Lmi)
self.trace_K = self.psi0 - np.trace(self.A)/self.beta
self.projected_mean = mdot(self.psi1.T,self.Kmmi,self.q_u_expectation[0])
# Compute dL_dpsi
self.dL_dpsi0 = - 0.5 * self.likelihood.D * self.beta * np.ones(self.N)
self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T # This is the correct term for E I think...
self.dL_dpsi2 = 0.5 * self.beta * self.likelihood.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
# Compute dL_dKmm
tmp = self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) -np.dot(self.q_u_expectation[0],self.psi1V.T)
tmp += tmp.T
tmp += self.likelihood.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
self.dL_dKmm = 0.5*mdot(self.Kmmi,tmp,self.Kmmi)
#Compute the gradient of the log likelihood wrt noise variance
#TODO: suport heteroscedatic noise
dbeta = 0.5 * self.N*self.likelihood.D/self.beta
dbeta += - 0.5 * self.likelihood.D * self.trace_K
dbeta += - 0.5 * self.likelihood.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi))
dbeta += - 0.5 * self.trYYT
dbeta += np.sum(np.dot(self.Y.T,self.projected_mean))
self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
def log_likelihood(self):
"""
Compute the (lower bound on the) log marginal likelihood
"""
A = -0.5*self.N*self.likelihood.D*(np.log(2.*np.pi) - np.log(self.beta))
B = -0.5*self.beta*self.likelihood.D*self.trace_K
C = -0.5*self.likelihood.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M)
D = -0.5*self.beta*self.trYYT
E = np.sum(np.dot(self.V.T,self.projected_mean))
return A+B+C+D+E
def _raw_predict(self, Xnew, slices,full_cov=False):
"""Internal helper function for making predictions, does not account for normalization"""
Kx = self.kern.K(Xnew,self.Z)
mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
if full_cov:
Kxx = self.kern.K(Xnew)
var = Kxx - mdot(Kx,tmp,Kx.T)
else:
Kxx = self.kern.Kdiag(Xnew)
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
return mu,var
def set_vb_param(self,vb_param):
"""set the distribution q(u) from the canonical parameters"""
self.q_u_prec = -2.*vb_param[-self.M**2:].reshape(self.M, self.M)
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
self.q_u_logdet = -tmp
self.q_u_mean = np.dot(self.q_u_cov,vb_param[:self.M*self.likelihood.D].reshape(self.M,self.likelihood.D))
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.likelihood.D)
self.q_u_canonical = (np.dot(self.q_u_prec, self.q_u_mean),-0.5*self.q_u_prec)
#TODO: computations now?
def get_vb_param(self):
"""
Return the canonical parameters of the distribution q(u)
"""
return np.hstack([e.flatten() for e in self.q_u_canonical])
def vb_grad_natgrad(self):
"""
Compute the gradients of the lower bound wrt the canonical and
Expectation parameters of u.
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
"""
dL_dmmT_S = -0.5*self.Lambda-self.q_u_canonical[1]
dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
#dL_dSim =
#dL_dmhSi =
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO
def plot(self, *args, **kwargs):
"""
add the distribution q(u) to the plot from sparse_GP
"""
sparse_GP.plot(self,*args,**kwargs)
if self.Q==1:
pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2.*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')

View file

@ -14,7 +14,7 @@ from .. import likelihoods
from .. import kern
class warpedGP(GP):
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False, Xslices=None):
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False):
if kernel is None:
kernel = kern.rbf(X.shape[1])
@ -28,7 +28,7 @@ class warpedGP(GP):
self.predict_in_warped_space = False
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
def _set_params(self, x):
self.warping_params = x[:self.warping_function.num_parameters]

View file

@ -60,7 +60,7 @@ class BGPLVMTests(unittest.TestCase):
#@unittest.skip('psi2 cross terms are NotImplemented for this combination')
def test_linear_bias_kern(self):
N, M, Q, D = 10, 3, 2, 4
N, M, Q, D = 30, 5, 4, 30
X = np.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)

View file

@ -0,0 +1,83 @@
'''
Created on 26 Apr 2013
@author: maxz
'''
import unittest
import GPy
import numpy as np
import pylab
__test__ = False
class Test(unittest.TestCase):
D = 9
M = 5
Nsamples = 3e6
def setUp(self):
self.kerns = (
GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
GPy.kern.linear(self.D), GPy.kern.linear(self.D, ARD=True),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
GPy.kern.bias(self.D), GPy.kern.white(self.D),
)
self.q_x_mean = np.random.randn(self.D)
self.q_x_variance = np.exp(np.random.randn(self.D))
self.q_x_samples = np.random.randn(self.Nsamples, self.D) * np.sqrt(self.q_x_variance) + self.q_x_mean
self.Z = np.random.randn(self.M, self.D)
self.q_x_mean.shape = (1, self.D)
self.q_x_variance.shape = (1, self.D)
def test_psi0(self):
for kern in self.kerns:
psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
Kdiag = kern.Kdiag(self.q_x_samples)
self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
# print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
def test_psi1(self):
for kern in self.kerns:
Nsamples = 100
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((self.N, self.M))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z)
K_ += K
diffs.append(((psi1 - (K_ / (i + 1))) ** 2).mean())
K_ /= self.Nsamples / Nsamples
# pylab.figure("+".join([p.name for p in kern.parts]) + "psi1")
# pylab.plot(diffs)
self.assertTrue(np.allclose(psi1.flatten() , K.mean(0), rtol=1e-1))
def test_psi2(self):
for kern in self.kerns:
Nsamples = 100
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
K_ = np.zeros((self.M, self.M))
diffs = []
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
K = kern.K(q_x_sample_stripe, self.Z)
K = (K[:, :, None] * K[:, None, :]).mean(0)
K_ += K
diffs.append(((psi2 - (K_ / (i + 1))) ** 2).mean())
K_ /= self.Nsamples / Nsamples
try:
# pylab.figure("+".join([p.name for p in kern.parts]) + "psi2")
# pylab.plot(diffs)
self.assertTrue(np.allclose(psi2.squeeze(), K_,
rtol=1e-1, atol=.1),
msg="{}: not matching".format("+".join([p.name for p in kern.parts])))
except:
print "{}: not matching".format(kern.parts[0].name)
if __name__ == "__main__":
import sys;sys.argv = ['',
'Test.test_psi0',
'Test.test_psi1',
'Test.test_psi2']
unittest.main()

View file

@ -0,0 +1,153 @@
'''
Created on 22 Apr 2013
@author: maxz
'''
import unittest
import numpy
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
import GPy
import itertools
from GPy.core import model
class PsiStatModel(model):
def __init__(self, which, X, X_variance, Z, M, kernel):
self.which = which
self.X = X
self.X_variance = X_variance
self.Z = Z
self.N, self.Q = X.shape
self.M, Q = Z.shape
assert self.Q == Q, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
self.kern = kernel
super(PsiStatModel, self).__init__()
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
def _get_param_names(self):
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.Q))]
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.Q))]
return Xnames + Znames + self.kern._get_param_names()
def _get_params(self):
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
def _set_params(self, x, save_old=True, save_count=0):
start, end = 0, self.X.size
self.X = x[start:end].reshape(self.N, self.Q)
start, end = end, end + self.X_variance.size
self.X_variance = x[start: end].reshape(self.N, self.Q)
start, end = end, end + self.Z.size
self.Z = x[start: end].reshape(self.M, self.Q)
self.kern._set_params(x[end:])
def log_likelihood(self):
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
def _log_likelihood_gradients(self):
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
try:
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
except AttributeError:
psiZ = numpy.zeros(self.M * self.Q)
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
class Test(unittest.TestCase):
Q = 5
N = 50
M = 10
D = 10
X = numpy.random.randn(N, Q)
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M]
Y = X.dot(numpy.random.randn(Q, D))
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q)]
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
GPy.kern.linear(Q) + GPy.kern.bias(Q),
GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
def testPsi0(self):
for k in self.kernels:
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
# def testPsi1(self):
# for k in self.kernels:
# m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
# M=self.M, kernel=k)
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin(self):
k = self.kernels[0]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_lin_bia(self):
k = self.kernels[3]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf(self):
k = self.kernels[1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_rbf_bia(self):
k = self.kernels[-1]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
def testPsi2_bia(self):
k = self.kernels[2]
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
M=self.M, kernel=k)
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
if __name__ == "__main__":
import sys
interactive = 'i' in sys.argv
if interactive:
N, M, Q, D = 30, 5, 4, 30
X = numpy.random.rand(N, Q)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
Y -= Y.mean(axis=0)
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
m.ensure_default_constraints()
m.randomize()
# self.assertTrue(m.checkgrad())
Q = 5
N = 50
M = 10
D = 10
X = numpy.random.randn(N, Q)
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
Z = numpy.random.permutation(X)[:M]
Y = X.dot(numpy.random.randn(Q, D))
kernel = GPy.kern.bias(Q)
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
GPy.kern.linear(Q) + GPy.kern.bias(Q),
GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
# for k in kernels:
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=k)
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
#
# m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=GPy.kern.linear(Q))
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel)
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
# M=M, kernel=kernel)
m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.rbf(Q))
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.linear(Q) + GPy.kern.bias(Q))
m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
else:
unittest.main()

View file

@ -112,6 +112,16 @@ class GradientTests(unittest.TestCase):
bias = GPy.kern.bias(2)
self.check_model_with_white(bias, model_type='GP_regression', dimension=2)
def test_GP_regression_linear_kern_1D_ARD(self):
''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1,ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
def test_GP_regression_linear_kern_2D_ARD(self):
''' Testing the GP regression with linear kernel on 2d data '''
linear = GPy.kern.linear(2,ARD=True)
self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
def test_GP_regression_linear_kern_1D(self):
''' Testing the GP regression with linear kernel on 1d data '''
linear = GPy.kern.linear(1)

View file

@ -4,14 +4,14 @@ import numpy as np
import GPy
import scipy.sparse
import scipy.io
data_path = os.path.join(os.path.dirname(__file__),'datasets')
default_seed =10000
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
default_seed = 10000
# Some general utilities.
def sample_class(f):
p = 1./(1.+np.exp(-f))
c = np.random.binomial(1,p)
c = np.where(c,1,-1)
p = 1. / (1. + np.exp(-f))
c = np.random.binomial(1, p)
c = np.where(c, 1, -1)
return c
def della_gatta_TRP63_gene_expression(gene_number=None):
@ -25,6 +25,15 @@ def della_gatta_TRP63_gene_expression(gene_number=None):
Y = Y[:, None]
return {'X': X, 'Y': Y, 'info': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA."}
def simulation_BGPLVM():
mat_data = scipy.io.loadmat(os.path.join(data_path, 'BGPLVMSimulation.mat'))
Y = np.array(mat_data['Y'], dtype=float)
S = np.array(mat_data['initS'], dtype=float)
mu = np.array(mat_data['initMu'], dtype=float)
return {'Y': Y, 'S': S,
'mu' : mu,
'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
# The data sets
def oil():
@ -32,7 +41,7 @@ def oil():
X = np.fromfile(fid, sep='\t').reshape((-1, 12))
fid.close()
fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt'))
Y = np.fromfile(fid, sep='\t').reshape((-1, 3))*2.-1.
Y = np.fromfile(fid, sep='\t').reshape((-1, 3)) * 2. - 1.
fid.close()
return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."}
@ -74,9 +83,9 @@ def silhouette():
inMean = np.mean(mat_data['Y'])
inScales = np.sqrt(np.var(mat_data['Y']))
X = mat_data['Y'] - inMean
X = X/inScales
X = X / inScales
Xtest = mat_data['Y_test'] - inMean
Xtest = Xtest/inScales
Xtest = Xtest / inScales
Y = mat_data['Z']
Ytest = mat_data['Z_test']
return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Artificial silhouette simulation data developed from Agarwal and Triggs (2004)."}
@ -102,13 +111,13 @@ def toy_rbf_1d(seed=default_seed):
np.random.seed(seed=seed)
numIn = 1
N = 500
X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn))
X = np.random.uniform(low= -1.0, high=1.0, size=(N, numIn))
X.sort(axis=0)
rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
white = GPy.kern.white(numIn, variance=1e-2)
kernel = rbf + white
K = kernel.K(X)
y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N,1))
y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N, 1))
return {'X':X, 'Y':y, 'info': "Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1."}
def toy_rbf_1d_50(seed=default_seed):
@ -124,15 +133,15 @@ def toy_rbf_1d_50(seed=default_seed):
def toy_linear_1d_classification(seed=default_seed):
np.random.seed(seed=seed)
x1 = np.random.normal(-3,5,20)
x2 = np.random.normal(3,5,20)
X = (np.r_[x1,x2])[:,None]
x1 = np.random.normal(-3, 5, 20)
x2 = np.random.normal(3, 5, 20)
X = (np.r_[x1, x2])[:, None]
return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X}
def rogers_girolami_olympics():
olympic_data = scipy.io.loadmat(os.path.join(data_path, 'olympics.mat'))['male100']
X = olympic_data[:, 0][:, None]
Y= olympic_data[:, 1][:, None]
Y = olympic_data[:, 1][:, None]
return {'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}
# def movielens_small(partNo=1,seed=default_seed):
# np.random.seed(seed=seed)
@ -169,7 +178,7 @@ def rogers_girolami_olympics():
def crescent_data(num_data=200,seed=default_seed):
def crescent_data(num_data=200, seed=default_seed):
"""Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
:param num_data_part: number of data to be sampled (default is 200).
:type num_data: int
@ -178,7 +187,7 @@ def crescent_data(num_data=200,seed=default_seed):
np.random.seed(seed=seed)
sqrt2 = np.sqrt(2)
# Rotation matrix
R = np.array([[sqrt2/2, -sqrt2/2], [sqrt2/2, sqrt2/2]])
R = np.array([[sqrt2 / 2, -sqrt2 / 2], [sqrt2 / 2, sqrt2 / 2]])
# Scaling matrices
scales = []
scales.append(np.array([[3, 0], [0, 1]]))
@ -195,9 +204,9 @@ def crescent_data(num_data=200,seed=default_seed):
num_data_part = []
num_data_total = 0
for i in range(0, 4):
num_data_part.append(round(((i+1)*num_data)/4.))
num_data_part.append(round(((i + 1) * num_data) / 4.))
num_data_part[i] -= num_data_total
#print num_data_part[i]
# print num_data_part[i]
part = np.random.normal(size=(num_data_part[i], 2))
part = np.dot(np.dot(part, scales[i]), R) + means[i]
Xparts.append(part)
@ -205,15 +214,103 @@ def crescent_data(num_data=200,seed=default_seed):
X = np.vstack((Xparts[0], Xparts[1], Xparts[2], Xparts[3]))
Y = np.vstack((np.ones((num_data_part[0]+num_data_part[1], 1)), -np.ones((num_data_part[2]+num_data_part[3], 1))))
Y = np.vstack((np.ones((num_data_part[0] + num_data_part[1], 1)), -np.ones((num_data_part[2] + num_data_part[3], 1))))
return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}
def creep_data():
all_data = np.loadtxt(os.path.join(data_path, 'creep', 'taka'))
y = all_data[:, 1:2].copy()
features = [0]
features.extend(range(2, 31))
X = all_data[:,features].copy()
X = all_data[:, features].copy()
return {'X': X, 'y' : y}
def cmu_mocap_49_balance():
"""Load CMU subject 49's one legged balancing motion that was used by Alvarez, Luengo and Lawrence at AISTATS 2009."""
train_motions = ['18', '19']
test_motions = ['20']
data = cmu_mocap('49', train_motions, test_motions, sample_every=4)
data['info'] = "One legged balancing motions from CMU data base subject 49. As used in Alvarez, Luengo and Lawrence at AISTATS 2009. It consists of " + data['info']
return data
def cmu_mocap_35_walk_jog():
"""Load CMU subject 35's walking and jogging motions, the same data that was used by Taylor, Roweis and Hinton at NIPS 2007. but without their preprocessing. Also used by Lawrence at AISTATS 2007."""
train_motions = ['01', '02', '03', '04', '05', '06',
'07', '08', '09', '10', '11', '12',
'13', '14', '15', '16', '17', '19',
'20', '21', '22', '23', '24', '25',
'26', '28', '30', '31', '32', '33', '34']
test_motions = ['18', '29']
data = cmu_mocap('35', train_motions, test_motions, sample_every=4)
data['info'] = "Walk and jog data from CMU data base subject 35. As used in Tayor, Roweis and Hinton at NIPS 2007, but without their pre-processing (i.e. as used by Lawrence at AISTATS 2007). It consists of " + data['info']
return data
def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
"""Load a given subject's training and test motions from the CMU motion capture data."""
# Load in subject skeleton.
subject_dir = os.path.join(data_path, 'mocap', 'cmu', subject)
skel = GPy.util.mocap.acclaim_skeleton(os.path.join(subject_dir, subject + '.asf'))
# Set up labels for each sequence
exlbls = np.eye(len(train_motions))
# Load sequences
tot_length = 0
temp_Y = []
temp_lbls = []
for i in range(len(train_motions)):
temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + train_motions[i] + '.amc'))
temp_Y.append(temp_chan[::sample_every, :])
temp_lbls.append(np.tile(exlbls[i, :], (temp_Y[i].shape[0], 1)))
tot_length += temp_Y[i].shape[0]
Y = np.zeros((tot_length, temp_Y[0].shape[1]))
lbls = np.zeros((tot_length, temp_lbls[0].shape[1]))
end_ind = 0
for i in range(len(temp_Y)):
start_ind = end_ind
end_ind += temp_Y[i].shape[0]
Y[start_ind:end_ind, :] = temp_Y[i]
lbls[start_ind:end_ind, :] = temp_lbls[i]
if len(test_motions)>0:
temp_Ytest = []
temp_lblstest = []
testexlbls = np.eye(len(test_motions))
tot_test_length = 0
for i in range(len(test_motions)):
temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + test_motions[i] + '.amc'))
temp_Ytest.append(temp_chan[::sample_every, :])
temp_lblstest.append(np.tile(testexlbls[i, :], (temp_Ytest[i].shape[0], 1)))
tot_test_length += temp_Ytest[i].shape[0]
# Load test data
Ytest = np.zeros((tot_test_length, temp_Ytest[0].shape[1]))
lblstest = np.zeros((tot_test_length, temp_lblstest[0].shape[1]))
end_ind = 0
for i in range(len(temp_Ytest)):
start_ind = end_ind
end_ind += temp_Ytest[i].shape[0]
Ytest[start_ind:end_ind, :] = temp_Ytest[i]
lblstest[start_ind:end_ind, :] = temp_lblstest[i]
else:
Ytest = None
lblstest = None
info = 'Subject: ' + subject + '. Training motions: '
for motion in train_motions:
info += motion + ', '
info = info[:-2]
if len(test_motions)>0:
info += '. Test motions: '
for motion in test_motions:
info += motion + ', '
info = info[:-2] + '.'
else:
info += '.'
if sample_every != 1:
info += ' Data is sub-sampled to every ' + str(sample_every) + ' frames.'
return {'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': info, 'skel': skel}

View file

@ -1,9 +1,12 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#tdot function courtesy of Ian Murray:
# Iain Murray, April 2013. iain contactable via iainmurray.net
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
import numpy as np
from scipy import linalg, optimize
from scipy import linalg, optimize, weave
import pylab as pb
import Tango
import sys
@ -11,9 +14,17 @@ import re
import pdb
import cPickle
import types
import ctypes
from ctypes import byref, c_char, c_int, c_double # TODO
#import scipy.lib.lapack.flapack
import scipy as sp
try:
_blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__)
_blas_available = True
except:
_blas_available = False
def det_ln_diag(A):
"""
log determinant of a diagonal matrix
@ -36,34 +47,34 @@ def trace_dot(a,b):
return np.sum(a*b)
def mdot(*args):
"""Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one
from left to right using dot().
Precedence can be controlled by creating tuples of arguments,
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
Note that this means the output of dot(a,b) and mdot(a,b) will differ if
a or b is a pure tuple of numbers.
"""
if len(args)==1:
return args[0]
elif len(args)==2:
return _mdot_r(args[0],args[1])
else:
return _mdot_r(args[:-1],args[-1])
"""Multiply all the arguments using matrix product rules.
The output is equivalent to multiplying the arguments one by one
from left to right using dot().
Precedence can be controlled by creating tuples of arguments,
for instance mdot(a,((b,c),d)) multiplies a (a*((b*c)*d)).
Note that this means the output of dot(a,b) and mdot(a,b) will differ if
a or b is a pure tuple of numbers.
"""
if len(args)==1:
return args[0]
elif len(args)==2:
return _mdot_r(args[0],args[1])
else:
return _mdot_r(args[:-1],args[-1])
def _mdot_r(a,b):
"""Recursive helper for mdot"""
if type(a)==types.TupleType:
if len(a)>1:
a = mdot(*a)
else:
a = a[0]
if type(b)==types.TupleType:
if len(b)>1:
b = mdot(*b)
else:
b = b[0]
return np.dot(a,b)
"""Recursive helper for mdot"""
if type(a)==types.TupleType:
if len(a)>1:
a = mdot(*a)
else:
a = a[0]
if type(b)==types.TupleType:
if len(b)>1:
b = mdot(*b)
else:
b = b[0]
return np.dot(a,b)
def jitchol(A,maxtries=5):
A = np.asfortranarray(A)
@ -112,7 +123,7 @@ def jitchol_old(A,maxtries=5):
raise linalg.LinAlgError,"not positive definite, even with jitter."
def pdinv(A):
def pdinv(A, *args):
"""
:param A: A DxD pd numpy array
@ -125,7 +136,7 @@ def pdinv(A):
:rval logdet: the log of the determinant of A
:rtype logdet: float64
"""
L = jitchol(A)
L = jitchol(A, *args)
logdet = 2.*np.sum(np.log(np.diag(L)))
Li = chol_inv(L)
Ai = linalg.lapack.flapack.dpotri(L)[0]
@ -187,6 +198,96 @@ def PCA(Y, Q):
Z = linalg.svd(Y-Y.mean(axis=0), full_matrices = False)
[X, W] = [Z[0][:,0:Q], np.dot(np.diag(Z[1]), Z[2]).T[:,0:Q]]
v = X.std(axis=0)
X /= v;
W *= v;
X /= v
W *= v
return X, W.T
def tdot_numpy(mat,out=None):
return np.dot(mat,mat.T,out)
def tdot_blas(mat, out=None):
"""returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles."""
if (mat.dtype != 'float64') or (len(mat.shape) != 2):
return np.dot(mat, mat.T)
nn = mat.shape[0]
if out is None:
out = np.zeros((nn,nn))
else:
assert(out.dtype == 'float64')
assert(out.shape == (nn,nn))
# FIXME: should allow non-contiguous out, and copy output into it:
assert(8 in out.strides)
# zeroing needed because of dumb way I copy across triangular answer
out[:] = 0.0
## Call to DSYRK from BLAS
# If already in Fortran order (rare), and has the right sorts of strides I
# could avoid the copy. I also thought swapping to cblas API would allow use
# of C order. However, I tried that and had errors with large matrices:
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
mat = np.asfortranarray(mat)
TRANS = c_char('n')
N = c_int(mat.shape[0])
K = c_int(mat.shape[1])
LDA = c_int(mat.shape[0])
UPLO = c_char('l')
ALPHA = c_double(1.0)
A = mat.ctypes.data_as(ctypes.c_void_p)
BETA = c_double(0.0)
C = out.ctypes.data_as(ctypes.c_void_p)
LDC = c_int(np.max(out.strides) / 8)
_blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
symmetrify(out,upper=True)
return out
def tdot(*args, **kwargs):
if _blas_available:
return tdot_blas(*args,**kwargs)
else:
return tdot_numpy(*args,**kwargs)
def symmetrify(A,upper=False):
"""
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
works IN PLACE.
"""
N,M = A.shape
assert N==M
c_contig_code = """
for (int i=1; i<N; i++){
for (int j=0; j<i; j++){
A[i+j*N] = A[i*N+j];
}
}
"""
f_contig_code = """
for (int i=1; i<N; i++){
for (int j=0; j<i; j++){
A[i*N+j] = A[i+j*N];
}
}
"""
if A.flags['C_CONTIGUOUS'] and upper:
weave.inline(f_contig_code,['A','N'])
elif A.flags['C_CONTIGUOUS'] and not upper:
weave.inline(c_contig_code,['A','N'])
elif A.flags['F_CONTIGUOUS'] and upper:
weave.inline(c_contig_code,['A','N'])
elif A.flags['F_CONTIGUOUS'] and not upper:
weave.inline(f_contig_code,['A','N'])
else:
tmp = np.tril(A)
A[:] = 0.0
A += tmp
A += np.tril(tmp,-1).T
def symmetrify_murray(A):
A += A.T
nn = A.shape[0]
A[[range(nn),range(nn)]] /= 2.0

View file

@ -1,6 +1,621 @@
import os
import numpy as np
import math
class vertex:
def __init__(self, name, id, parents=[], children=[], meta = {}):
self.name = name
self.id = id
self.parents = parents
self.children = children
self.meta = meta
def __str__(self):
return self.name + '(' + str(self.id) + ').'
class tree:
def __init__(self):
self.vertices = []
self.vertices.append(vertex(name='root', id=0))
def __str__(self):
index = self.find_root()
return self.branch_str(index)
def branch_str(self, index, indent=''):
out = indent + str(self.vertices[index]) + '\n'
for child in self.vertices[index].children:
out+=self.branch_str(child, indent+' ')
return out
def find_children(self):
"""Take a tree and set the children according to the parents.
Takes a tree structure which lists the parents of each vertex
and computes the children for each vertex and places them in."""
for i in range(len(self.vertices)):
self.vertices[i].children = []
for i in range(len(self.vertices)):
for parent in self.vertices[i].parents:
if i not in self.vertices[parent].children:
self.vertices[parent].children.append(i)
def find_parents(self):
"""Take a tree and set the parents according to the children
Takes a tree structure which lists the children of each vertex
and computes the parents for each vertex and places them in."""
for i in range(len(self.vertices)):
self.vertices[i].parents = []
for i in range(len(self.vertices)):
for child in self.vertices[i].children:
if i not in self.vertices[child].parents:
self.vertices[child].parents.append(i)
def find_root(self):
"""Finds the index of the root node of the tree."""
self.find_parents()
index = 0
while len(self.vertices[index].parents)>0:
index = self.vertices[index].parents[0]
return index
def get_index_by_id(self, id):
"""Give the index associated with a given vertex id."""
for i in range(len(self.vertices)):
if self.vertices[i].id == id:
return i
raise Error, 'Reverse look up of id failed.'
def get_index_by_name(self, name):
"""Give the index associated with a given vertex name."""
for i in range(len(self.vertices)):
if self.vertices[i].name == name:
return i
raise Error, 'Reverse look up of name failed.'
def order_vertices(self):
"""Order vertices in the graph such that parents always have a lower index than children."""
ordered = False
while ordered == False:
for i in range(len(self.vertices)):
ordered = True
for parent in self.vertices[i].parents:
if parent>i:
ordered = False
self.swap_vertices(i, parent)
def swap_vertices(self, i, j):
"""Swap two vertices in the tree structure array.
swap_vertex swaps the location of two vertices in a tree structure array.
ARG tree : the tree for which two vertices are to be swapped.
ARG i : the index of the first vertex to be swapped.
ARG j : the index of the second vertex to be swapped.
RETURN tree : the tree structure with the two vertex locations
swapped.
"""
store_vertex_i = self.vertices[i]
store_vertex_j = self.vertices[j]
self.vertices[j] = store_vertex_i
self.vertices[i] = store_vertex_j
for k in range(len(self.vertices)):
for swap_list in [self.vertices[k].children, self.vertices[k].parents]:
if i in swap_list:
swap_list[swap_list.index(i)] = -1
if j in swap_list:
swap_list[swap_list.index(j)] = i
if -1 in swap_list:
swap_list[swap_list.index(-1)] = j
def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False):
"""Compute the rotation matrix for an angle in each direction.
This is a helper function for computing the rotation matrix for a given set of angles in a given order.
ARG xangle : rotation for x-axis.
ARG yangle : rotation for y-axis.
ARG zangle : rotation for z-axis.
ARG order : the order for the rotations."""
if degrees:
xangle = math.radians(xangle)
yangle = math.radians(yangle)
zangle = math.radians(zangle)
# Here we assume we rotate z, then x then y.
c1 = math.cos(xangle) # The x angle
c2 = math.cos(yangle) # The y angle
c3 = math.cos(zangle) # the z angle
s1 = math.sin(xangle)
s2 = math.sin(yangle)
s3 = math.sin(zangle)
# see http://en.wikipedia.org/wiki/Rotation_matrix for
# additional info.
if order=='zxy':
rot_mat = np.array([[c2*c3-s1*s2*s3, c2*s3+s1*s2*c3, -s2*c1],[-c1*s3, c1*c3, s1],[s2*c3+c2*s1*s3, s2*s3-c2*s1*c3, c2*c1]])
else:
rot_mat = np.eye(3)
for i in range(len(order)):
if order[i]=='x':
rot_mat = np.dot(np.array([[1, 0, 0], [0, c1, s1], [0, -s1, c1]]),rot_mat)
elif order[i] == 'y':
rot_mat = np.dot(np.array([[c2, 0, -s2], [0, 1, 0], [s2, 0, c2]]),rot_mat)
elif order[i] == 'z':
rot_mat = np.dot(np.array([[c3, s3, 0], [-s3, c3, 0], [0, 0, 1]]),rot_mat)
return rot_mat
# Motion capture data routines.
class skeleton(tree):
def __init__(self):
tree.__init__(self)
def connection_matrix(self):
connection = np.zeros((len(self.vertices), len(self.vertices)), dtype=bool)
for i in range(len(self.vertices)):
for j in range(len(self.vertices[i].children)):
connection[i, self.vertices[i].children[j]] = True
return connection
def to_xyz(self, channels):
raise NotImplementedError, "this needs to be implemented to use the skeleton class"
def finalize(self):
"""After loading in a skeleton ensure parents are correct, vertex orders are correct and rotation matrices are correct."""
self.find_parents()
self.order_vertices()
self.set_rotation_matrices()
def smooth_angle_channels(self, channels):
"""Remove discontinuities in angle channels so that they don't cause artifacts in algorithms that rely on the smoothness of the functions."""
for vertex in self.vertices:
for col in vertex.meta['rot_ind']:
if col:
for k in range(1, channels.shape[0]):
diff=channels[k, col]-channels[k-1, col]
if abs(diff+360.)<abs(diff):
channels[k:, col]=channels[k:, col]+360.
elif abs(diff-360.)<abs(diff):
channels[k:, col]=channels[k:, col]-360.
# class bvh_skeleton(skeleton):
# def __init__(self):
# skeleton.__init__(self)
# def to_xyz(self, channels):
class acclaim_skeleton(skeleton):
def __init__(self, file_name=None):
skeleton.__init__(self)
self.documentation = []
self.angle = 'deg'
self.length = 1.0
self.mass = 1.0
self.type = 'acclaim'
self.vertices[0] = vertex(name='root', id=0,
parents = [0], children=[],
meta = {'orientation': [],
'axis': [0., 0., 0.],
'axis_order': [],
'C': np.eye(3),
'Cinv': np.eye(3),
'channels': [],
'bodymass': [],
'confmass': [],
'order': [],
'rot_ind': [],
'pos_ind': [],
'limits': [],
'xyz': np.array([0., 0., 0.]),
'rot': np.eye(3)})
if file_name:
self.load_skel(file_name)
def to_xyz(self, channels):
rot_val = list(self.vertices[0].meta['orientation'])
for i in range(len(self.vertices[0].meta['rot_ind'])):
rind = self.vertices[0].meta['rot_ind'][i]
if rind != -1:
rot_val[i] += channels[rind]
self.vertices[0].meta['rot'] = rotation_matrix(rot_val[0],
rot_val[1],
rot_val[2],
self.vertices[0].meta['axis_order'],
degrees=True)
# vertex based store of the xyz location
self.vertices[0].meta['xyz'] = list(self.vertices[0].meta['offset'])
for i in range(len(self.vertices[0].meta['pos_ind'])):
pind = self.vertices[0].meta['pos_ind'][i]
if pind != -1:
self.vertices[0].meta['xyz'][i] += channels[pind]
for i in range(len(self.vertices[0].children)):
ind = self.vertices[0].children[i]
self.get_child_xyz(ind, channels)
xyz = []
for vertex in self.vertices:
xyz.append(vertex.meta['xyz'])
return np.array(xyz)
def get_child_xyz(self, ind, channels):
parent = self.vertices[ind].parents[0]
children = self.vertices[ind].children
rot_val = np.zeros(3)
for j in range(len(self.vertices[ind].meta['rot_ind'])):
rind = self.vertices[ind].meta['rot_ind'][j]
if rind != -1:
rot_val[j] = channels[rind]
else:
rot_val[j] = 0
tdof = rotation_matrix(rot_val[0], rot_val[1], rot_val[2],
self.vertices[ind].meta['order'],
degrees=True)
torient = rotation_matrix(self.vertices[ind].meta['axis'][0],
self.vertices[ind].meta['axis'][1],
self.vertices[ind].meta['axis'][2],
self.vertices[ind].meta['axis_order'],
degrees=True)
torient_inv = rotation_matrix(-self.vertices[ind].meta['axis'][0],
-self.vertices[ind].meta['axis'][1],
-self.vertices[ind].meta['axis'][2],
self.vertices[ind].meta['axis_order'][::-1],
degrees=True)
self.vertices[ind].meta['rot'] = np.dot(np.dot(np.dot(torient_inv,tdof),torient),self.vertices[parent].meta['rot'])
self.vertices[ind].meta['xyz'] = self.vertices[parent].meta['xyz'] + np.dot(self.vertices[ind].meta['offset'],self.vertices[ind].meta['rot'])
for i in range(len(children)):
cind = children[i]
self.get_child_xyz(cind, channels)
def load_channels(self, file_name):
fid=open(file_name, 'r')
channels = self.read_channels(fid)
fid.close()
return channels
def load_skel(self, file_name):
"""Loads an ASF file into a skeleton structure.
loads skeleton structure from an acclaim skeleton file.
ARG file_name : the file name to load in.
RETURN skel : the skeleton for the file."""
fid = open(file_name, 'r')
self.read_skel(fid)
fid.close()
self.name = file_name
def read_bonedata(self, fid):
"""Read bone data from an acclaim skeleton file stream."""
bone_count = 0
lin = self.read_line(fid)
while lin[0]!=':':
parts = lin.split()
if parts[0] == 'begin':
bone_count += 1
self.vertices.append(vertex(name = '', id=np.NaN,
meta={'name': [],
'id': [],
'offset': [],
'orientation': [],
'axis': [0., 0., 0.],
'axis_order': [],
'C': np.eye(3),
'Cinv': np.eye(3),
'channels': [],
'bodymass': [],
'confmass': [],
'order': [],
'rot_ind': [],
'pos_ind': [],
'limits': [],
'xyz': np.array([0., 0., 0.]),
'rot': np.eye(3)}))
lin = self.read_line(fid)
elif parts[0]=='id':
self.vertices[bone_count].id = int(parts[1])
lin = self.read_line(fid)
self.vertices[bone_count].children = []
elif parts[0]=='name':
self.vertices[bone_count].name = parts[1]
lin = self.read_line(fid)
elif parts[0]=='direction':
direction = np.array([float(parts[1]), float(parts[2]), float(parts[3])])
lin = self.read_line(fid)
elif parts[0]=='length':
lgth = float(parts[1])
lin = self.read_line(fid)
elif parts[0]=='axis':
self.vertices[bone_count].meta['axis'] = np.array([float(parts[1]),
float(parts[2]),
float(parts[3])])
# order is reversed compared to bvh
self.vertices[bone_count].meta['axis_order'] = parts[-1][::-1].lower()
lin = self.read_line(fid)
elif parts[0]=='dof':
order = []
for i in range(1, len(parts)):
if parts[i]== 'rx':
chan = 'Xrotation'
order.append('x')
elif parts[i] =='ry':
chan = 'Yrotation'
order.append('y')
elif parts[i] == 'rz':
chan = 'Zrotation'
order.append('z')
elif parts[i] == 'tx':
chan = 'Xposition'
elif parts[i] == 'ty':
chan = 'Yposition'
elif parts[i] == 'tz':
chan = 'Zposition'
elif parts[i] == 'l':
chan = 'length'
self.vertices[bone_count].meta['channels'].append(chan)
# order is reversed compared to bvh
self.vertices[bone_count].meta['order'] = order[::-1]
lin = self.read_line(fid)
elif parts[0]=='limits':
self.vertices[bone_count].meta['limits'] = [[float(parts[1][1:]), float(parts[2][:-1])]]
lin = self.read_line(fid)
while lin !='end':
parts = lin.split()
self.vertices[bone_count].meta['limits'].append([float(parts[0][1:]), float(parts[1][:-1])])
lin = self.read_line(fid)
self.vertices[bone_count].meta['limits'] = np.array(self.vertices[bone_count].meta['limits'])
elif parts[0]=='end':
self.vertices[bone_count].meta['offset'] = direction*lgth
lin = self.read_line(fid)
return lin
def read_channels(self, fid):
"""Read channels from an acclaim file."""
bones = [[] for i in self.vertices]
num_channels = 0
for vertex in self.vertices:
num_channels = num_channels + len(vertex.meta['channels'])
lin = self.read_line(fid)
while lin != ':DEGREES':
lin = self.read_line(fid)
counter = 0
lin = self.read_line(fid)
while lin:
parts = lin.split()
if len(parts)==1:
frame_no = int(parts[0])
if frame_no:
counter += 1
if counter != frame_no:
raise Error, 'Unexpected frame number.'
else:
raise Error, 'Single bone name ...'
else:
ind = self.get_index_by_name(parts[0])
bones[ind].append(np.array([float(channel) for channel in parts[1:]]))
lin = self.read_line(fid)
num_frames = counter
channels = np.zeros((num_frames, num_channels))
end_val = 0
for i in range(len(self.vertices)):
vertex = self.vertices[i]
if len(vertex.meta['channels'])>0:
start_val = end_val
end_val = end_val + len(vertex.meta['channels'])
for j in range(num_frames):
channels[j, start_val:end_val] = bones[i][j]
self.resolve_indices(i, start_val)
self.smooth_angle_channels(channels)
return channels
def read_documentation(self, fid):
"""Read documentation from an acclaim skeleton file stream."""
lin = self.read_line(fid)
while lin[0] != ':':
self.documentation.append(lin)
lin = self.read_line(fid)
return lin
def read_hierarchy(self, fid):
"""Read hierarchy information from acclaim skeleton file stream."""
lin = self.read_line(fid)
while lin != 'end':
parts = lin.split()
if lin != 'begin':
ind = self.get_index_by_name(parts[0])
for i in range(1, len(parts)):
self.vertices[ind].children.append(self.get_index_by_name(parts[i]))
lin = self.read_line(fid)
lin = self.read_line(fid)
return lin
def read_line(self, fid):
"""Read a line from a file string and check it isn't either empty or commented before returning."""
lin = '#'
while lin[0] == '#':
lin = fid.readline().strip()
if lin == '':
return lin
return lin
def read_root(self, fid):
"""Read the root node from an acclaim skeleton file stream."""
lin = self.read_line(fid)
while lin[0] != ':':
parts = lin.split()
if parts[0]=='order':
order = []
for i in range(1, len(parts)):
if parts[i].lower()=='rx':
chan = 'Xrotation'
order.append('x')
elif parts[i].lower()=='ry':
chan = 'Yrotation'
order.append('y')
elif parts[i].lower()=='rz':
chan = 'Zrotation'
order.append('z')
elif parts[i].lower()=='tx':
chan = 'Xposition'
elif parts[i].lower()=='ty':
chan = 'Yposition'
elif parts[i].lower()=='tz':
chan = 'Zposition'
elif parts[i].lower()=='l':
chan = 'length'
self.vertices[0].meta['channels'].append(chan)
# order is reversed compared to bvh
self.vertices[0].meta['order'] = order[::-1]
elif parts[0]=='axis':
# order is reversed compared to bvh
self.vertices[0].meta['axis_order'] = parts[1][::-1].lower()
elif parts[0]=='position':
self.vertices[0].meta['offset'] = [float(parts[1]),
float(parts[2]),
float(parts[3])]
elif parts[0]=='orientation':
self.vertices[0].meta['orientation'] = [float(parts[1]),
float(parts[2]),
float(parts[3])]
print self.vertices[0].meta['orientation']
lin = self.read_line(fid)
return lin
def read_skel(self, fid):
"""Loads an acclaim skeleton format from a file stream."""
lin = self.read_line(fid)
while lin:
if lin[0]==':':
if lin[1:]== 'name':
lin = self.read_line(fid)
self.name = lin
elif lin[1:]=='units':
lin = self.read_units(fid)
elif lin[1:]=='documentation':
lin = self.read_documentation(fid)
elif lin[1:]=='root':
lin = self.read_root(fid)
elif lin[1:]=='bonedata':
lin = self.read_bonedata(fid)
elif lin[1:]=='hierarchy':
lin = self.read_hierarchy(fid)
elif lin[1:8]=='version':
lin = self.read_line(fid)
continue
else:
if not lin:
self.finalize()
return
lin = self.read_line(fid)
else:
raise Error, 'Unrecognised file format'
self.finalize()
def read_units(self, fid):
"""Read units from an acclaim skeleton file stream."""
lin = self.read_line(fid)
while lin[0] != ':':
parts = lin.split()
if parts[0]=='mass':
self.mass = float(parts[1])
elif parts[0]=='length':
self.length = float(parts[1])
elif parts[0]=='angle':
self.angle = parts[1]
lin = self.read_line(fid)
return lin
def resolve_indices(self, index, start_val):
"""Get indices for the skeleton from the channels when loading in channel data."""
channels = self.vertices[index].meta['channels']
base_channel = start_val
rot_ind = -np.ones(3, dtype=int)
pos_ind = -np.ones(3, dtype=int)
for i in range(len(channels)):
if channels[i]== 'Xrotation':
rot_ind[0] = base_channel + i
elif channels[i]=='Yrotation':
rot_ind[1] = base_channel + i
elif channels[i]=='Zrotation':
rot_ind[2] = base_channel + i
elif channels[i]=='Xposition':
pos_ind[0] = base_channel + i
elif channels[i]=='Yposition':
pos_ind[1] = base_channel + i
elif channels[i]=='Zposition':
pos_ind[2] = base_channel + i
self.vertices[index].meta['rot_ind'] = list(rot_ind)
self.vertices[index].meta['pos_ind'] = list(pos_ind)
def set_rotation_matrices(self):
"""Set the meta information at each vertex to contain the correct matrices C and Cinv as prescribed by the rotations and rotation orders."""
for i in range(len(self.vertices)):
self.vertices[i].meta['C'] = rotation_matrix(self.vertices[i].meta['axis'][0],
self.vertices[i].meta['axis'][1],
self.vertices[i].meta['axis'][2],
self.vertices[i].meta['axis_order'],
degrees=True)
# Todo: invert this by applying angle operations in reverse order
self.vertices[i].meta['Cinv'] = np.linalg.inv(self.vertices[i].meta['C'])
# Utilities for loading in x,y,z data.
def load_text_data(dataset, directory, centre=True):
"""Load in a data set of marker points from the Ohio State University C3D motion capture files (http://accad.osu.edu/research/mocap/mocap_data.htm)."""
@ -72,3 +687,4 @@ def read_connections(file_name, point_names):
skel = acclaim_skeleton()

View file

@ -184,71 +184,115 @@ class image_show(data_show):
#if self.invert:
# self.vals = -self.vals
class stick_show(data_show):
"""Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
class mocap_data_show(data_show):
"""Base class for visualizing motion capture data."""
def __init__(self, vals, axes=None, connect=None):
if axes==None:
fig = plt.figure()
axes = fig.add_subplot(111, projection='3d')
data_show.__init__(self, vals, axes)
self.vals = vals.reshape((3, vals.shape[1]/3)).T
self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim)
self.axes.set_aspect(1)
self.axes.autoscale(enable=False)
self.connect = connect
if not self.connect==None:
x = []
y = []
z = []
self.I, self.J = np.nonzero(self.connect)
for i in range(len(self.I)):
x.append(self.vals[self.I[i], 0])
x.append(self.vals[self.J[i], 0])
x.append(np.NaN)
y.append(self.vals[self.I[i], 1])
y.append(self.vals[self.J[i], 1])
y.append(np.NaN)
z.append(self.vals[self.I[i], 2])
z.append(self.vals[self.J[i], 2])
z.append(np.NaN)
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
self.process_values(vals)
self.initialize_axes()
self.draw_vertices()
self.finalize_axes()
self.draw_edges()
self.axes.figure.canvas.draw()
def modify(self, vals):
self.points_handle.remove()
self.line_handle[0].remove()
self.vals = vals.reshape((3, vals.shape[1]/3)).T
def draw_vertices(self):
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim)
def draw_edges(self):
self.line_handle = []
if not self.connect==None:
x = []
y = []
z = []
self.I, self.J = np.nonzero(self.connect)
for i in range(len(self.I)):
x.append(self.vals[self.I[i], 0])
x.append(self.vals[self.J[i], 0])
for i, j in zip(self.I, self.J):
x.append(self.vals[i, 0])
x.append(self.vals[j, 0])
x.append(np.NaN)
y.append(self.vals[self.I[i], 1])
y.append(self.vals[self.J[i], 1])
y.append(self.vals[i, 1])
y.append(self.vals[j, 1])
y.append(np.NaN)
z.append(self.vals[self.I[i], 2])
z.append(self.vals[self.J[i], 2])
z.append(self.vals[i, 2])
z.append(self.vals[j, 2])
z.append(np.NaN)
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
def modify(self, vals):
self.process_values(vals)
self.initialize_axes_modify()
self.draw_vertices()
self.finalize_axes_modify()
self.draw_edges()
self.axes.figure.canvas.draw()
def process_values(self, vals):
raise NotImplementedError, "this needs to be implemented to use the data_show class"
def initialize_axes(self):
"""Set up the axes with the right limits and scaling."""
self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
def initialize_axes_modify(self):
self.points_handle.remove()
self.line_handle[0].remove()
def finalize_axes(self):
self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim)
self.axes.set_aspect(1)
self.axes.autoscale(enable=False)
def finalize_axes_modify(self):
self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim)
class stick_show(mocap_data_show):
"""Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
def __init__(self, vals, axes=None, connect=None):
mocap_data_show.__init__(self, vals, axes, connect)
def process_values(self, vals):
self.vals = vals.reshape((3, vals.shape[1]/3)).T
class skeleton_show(mocap_data_show):
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""
def __init__(self, vals, skel, padding=0, axes=None):
self.skel = skel
self.padding = padding
connect = skel.connection_matrix()
mocap_data_show.__init__(self, vals, axes, connect)
def process_values(self, vals):
if self.padding>0:
channels = np.zeros((vals.shape[0], vals.shape[1]+self.padding))
channels[:, 0:vals.shape[0]] = vals
else:
channels = vals
vals_mat = self.skel.to_xyz(channels.flatten())
self.vals = vals_mat
# Flip the Y and Z axes
self.vals[:, 0] = vals_mat[:, 0]
self.vals[:, 1] = vals_mat[:, 2]
self.vals[:, 2] = vals_mat[:, 1]
def wrap_around(vals, lim, connect):
quot = lim[1] - lim[0]
vals = rem(vals, quot)+lim[0]
nVals = floor(vals/quot)
for i in range(connect.shape[0]):
for j in find(connect[i, :]):
if nVals[i] != nVals[j]:
connect[i, j] = False
return vals, connect