mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-26 21:36:23 +02:00
Merge branch 'devel' of github.com:SheffieldML/GPy into devel
This commit is contained in:
commit
dc1e747702
44 changed files with 4026 additions and 1575 deletions
|
|
@ -12,9 +12,10 @@ before_install:
|
||||||
- sudo apt-get install -qq python-matplotlib
|
- sudo apt-get install -qq python-matplotlib
|
||||||
|
|
||||||
install:
|
install:
|
||||||
|
- pip install --upgrade numpy==1.7.1
|
||||||
- pip install sphinx
|
- pip install sphinx
|
||||||
- pip install nose
|
- pip install nose
|
||||||
- pip install . --use-mirrors
|
- pip install . --use-mirrors
|
||||||
# command to run tests, e.g. python setup.py test
|
# command to run tests, e.g. python setup.py test
|
||||||
script:
|
script:
|
||||||
- nosetests GPy/testing
|
- nosetests GPy/testing
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,19 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
from scipy import optimize
|
|
||||||
import sys, pdb
|
|
||||||
import multiprocessing as mp
|
|
||||||
from GPy.util.misc import opt_wrapper
|
|
||||||
#import numdifftools as ndt
|
|
||||||
from parameterised import parameterised, truncate_pad
|
|
||||||
import priors
|
|
||||||
from ..util.linalg import jitchol
|
|
||||||
from ..inference import optimization
|
|
||||||
from .. import likelihoods
|
from .. import likelihoods
|
||||||
|
from ..inference import optimization
|
||||||
|
from ..util.linalg import jitchol
|
||||||
|
from GPy.util.misc import opt_wrapper
|
||||||
|
from parameterised import parameterised
|
||||||
|
from scipy import optimize
|
||||||
|
import multiprocessing as mp
|
||||||
|
import numpy as np
|
||||||
|
import priors
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import pdb
|
||||||
|
# import numdifftools as ndt
|
||||||
|
|
||||||
class model(parameterised):
|
class model(parameterised):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
@ -24,14 +26,14 @@ class model(parameterised):
|
||||||
self.preferred_optimizer = 'tnc'
|
self.preferred_optimizer = 'tnc'
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
def _set_params(self,x):
|
def _set_params(self, x):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
raise NotImplementedError, "this needs to be implemented to use the model class"
|
raise NotImplementedError, "this needs to be implemented to use the model class"
|
||||||
|
|
||||||
def set_prior(self,which,what):
|
def set_prior(self, which, what):
|
||||||
"""
|
"""
|
||||||
Sets priors on the model parameters.
|
Sets priors on the model parameters.
|
||||||
|
|
||||||
|
|
@ -52,117 +54,90 @@ class model(parameterised):
|
||||||
|
|
||||||
which = self.grep_param_names(which)
|
which = self.grep_param_names(which)
|
||||||
|
|
||||||
#check tied situation
|
# check tied situation
|
||||||
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie)==set(which))]
|
tie_partial_matches = [tie for tie in self.tied_indices if (not set(tie).isdisjoint(set(which))) & (not set(tie) == set(which))]
|
||||||
if len(tie_partial_matches):
|
if len(tie_partial_matches):
|
||||||
raise ValueError, "cannot place prior across partial ties"
|
raise ValueError, "cannot place prior across partial ties"
|
||||||
tie_matches = [tie for tie in self.tied_indices if set(which)==set(tie) ]
|
tie_matches = [tie for tie in self.tied_indices if set(which) == set(tie) ]
|
||||||
if len(tie_matches)>1:
|
if len(tie_matches) > 1:
|
||||||
raise ValueError, "cannot place prior across multiple ties"
|
raise ValueError, "cannot place prior across multiple ties"
|
||||||
elif len(tie_matches)==1:
|
elif len(tie_matches) == 1:
|
||||||
which = which[:1]# just place a prior object on the first parameter
|
which = which[:1] # just place a prior object on the first parameter
|
||||||
|
|
||||||
|
|
||||||
#check constraints are okay
|
# check constraints are okay
|
||||||
if isinstance(what, (priors.gamma, priors.log_Gaussian)):
|
if isinstance(what, (priors.gamma, priors.log_Gaussian)):
|
||||||
assert not np.any(which[:,None]==self.constrained_negative_indices), "constraint and prior incompatible"
|
constrained_positive_indices = [i for i, t in zip(self.constrained_indices, self.constraints) if t.domain == 'positive']
|
||||||
assert not np.any(which[:,None]==self.constrained_bounded_indices), "constraint and prior incompatible"
|
if len(constrained_positive_indices):
|
||||||
unconst = np.setdiff1d(which, self.constrained_positive_indices)
|
constrained_positive_indices = np.hstack(constrained_positive_indices)
|
||||||
|
else:
|
||||||
|
constrained_positive_indices = np.zeros(shape=(0,))
|
||||||
|
bad_constraints = np.setdiff1d(self.all_constrained_indices(), constrained_positive_indices)
|
||||||
|
assert not np.any(which[:, None] == bad_constraints), "constraint and prior incompatible"
|
||||||
|
unconst = np.setdiff1d(which, constrained_positive_indices)
|
||||||
if len(unconst):
|
if len(unconst):
|
||||||
print "Warning: constraining parameters to be positive:"
|
print "Warning: constraining parameters to be positive:"
|
||||||
print '\n'.join([n for i,n in enumerate(self._get_param_names()) if i in unconst])
|
print '\n'.join([n for i, n in enumerate(self._get_param_names()) if i in unconst])
|
||||||
print '\n'
|
print '\n'
|
||||||
self.constrain_positive(unconst)
|
self.constrain_positive(unconst)
|
||||||
elif isinstance(what,priors.Gaussian):
|
elif isinstance(what, priors.Gaussian):
|
||||||
assert not np.any(which[:,None]==self.all_constrained_indices()), "constraint and prior incompatible"
|
assert not np.any(which[:, None] == self.all_constrained_indices()), "constraint and prior incompatible"
|
||||||
else:
|
else:
|
||||||
raise ValueError, "prior not recognised"
|
raise ValueError, "prior not recognised"
|
||||||
|
|
||||||
|
# store the prior in a local list
|
||||||
#store the prior in a local list
|
|
||||||
for w in which:
|
for w in which:
|
||||||
self.priors[w] = what
|
self.priors[w] = what
|
||||||
|
|
||||||
def get(self,name, return_names=False):
|
def get_gradient(self, name, return_names=False):
|
||||||
"""
|
|
||||||
Get a model parameter by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(name)
|
|
||||||
if len(matches):
|
|
||||||
if return_names:
|
|
||||||
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
|
||||||
else:
|
|
||||||
return self._get_params()[matches]
|
|
||||||
else:
|
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
|
||||||
|
|
||||||
def set(self,name,val):
|
|
||||||
"""
|
|
||||||
Set model parameter(s) by name. The name is provided as a regular expression. All parameters matching that regular expression are set to ghe given value.
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(name)
|
|
||||||
if len(matches):
|
|
||||||
x = self._get_params()
|
|
||||||
x[matches] = val
|
|
||||||
self._set_params(x)
|
|
||||||
else:
|
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
|
||||||
|
|
||||||
def get_gradient(self,name, return_names=False):
|
|
||||||
"""
|
"""
|
||||||
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
Get model gradient(s) by name. The name is applied as a regular expression and all parameters that match that regular expression are returned.
|
||||||
"""
|
"""
|
||||||
matches = self.grep_param_names(name)
|
matches = self.grep_param_names(name)
|
||||||
if len(matches):
|
if len(matches):
|
||||||
if return_names:
|
if return_names:
|
||||||
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
return self._log_likelihood_gradients()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
||||||
else:
|
else:
|
||||||
return self._log_likelihood_gradients()[matches]
|
return self._log_likelihood_gradients()[matches]
|
||||||
else:
|
else:
|
||||||
raise AttributeError, "no parameter matches %s"%name
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
def log_prior(self):
|
def log_prior(self):
|
||||||
"""evaluate the prior"""
|
"""evaluate the prior"""
|
||||||
return np.sum([p.lnpdf(x) for p, x in zip(self.priors,self._get_params()) if p is not None])
|
return np.sum([p.lnpdf(x) for p, x in zip(self.priors, self._get_params()) if p is not None])
|
||||||
|
|
||||||
def _log_prior_gradients(self):
|
def _log_prior_gradients(self):
|
||||||
"""evaluate the gradients of the priors"""
|
"""evaluate the gradients of the priors"""
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
ret = np.zeros(x.size)
|
ret = np.zeros(x.size)
|
||||||
[np.put(ret,i,p.lnpdf_grad(xx)) for i,(p,xx) in enumerate(zip(self.priors,x)) if not p is None]
|
[np.put(ret, i, p.lnpdf_grad(xx)) for i, (p, xx) in enumerate(zip(self.priors, x)) if not p is None]
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
def _transform_gradients(self, g):
|
def _transform_gradients(self, g):
|
||||||
"""
|
|
||||||
Takes a list of gradients and return an array of transformed gradients (positive/negative/tied/and so on)
|
|
||||||
"""
|
|
||||||
|
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
|
for index, constraint in zip(self.constrained_indices, self.constraints):
|
||||||
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
|
g[index] = g[index] * constraint.gradfactor(x[index])
|
||||||
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
|
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
||||||
[np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
|
if len(self.tied_indices) or len(self.fixed_indices):
|
||||||
if len(self.tied_indices) or len(self.constrained_fixed_indices):
|
to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
|
||||||
to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
|
return np.delete(g, to_remove)
|
||||||
return np.delete(g,to_remove)
|
|
||||||
else:
|
else:
|
||||||
return g
|
return g
|
||||||
|
|
||||||
|
|
||||||
def randomize(self):
|
def randomize(self):
|
||||||
"""
|
"""
|
||||||
Randomize the model.
|
Randomize the model.
|
||||||
Make this draw from the prior if one exists, else draw from N(0,1)
|
Make this draw from the prior if one exists, else draw from N(0,1)
|
||||||
"""
|
"""
|
||||||
#first take care of all parameters (from N(0,1))
|
# first take care of all parameters (from N(0,1))
|
||||||
x = self._get_params_transformed()
|
x = self._get_params_transformed()
|
||||||
x = np.random.randn(x.size)
|
x = np.random.randn(x.size)
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
#now draw from prior where possible
|
# now draw from prior where possible
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
[np.put(x,i,p.rvs(1)) for i,p in enumerate(self.priors) if not p is None]
|
[np.put(x, i, p.rvs(1)) for i, p in enumerate(self.priors) if not p is None]
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
self._set_params_transformed(self._get_params_transformed())#makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
self._set_params_transformed(self._get_params_transformed()) # makes sure all of the tied parameters get the same init (since there's only one prior object...)
|
||||||
|
|
||||||
|
|
||||||
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
def optimize_restarts(self, Nrestarts=10, robust=False, verbose=True, parallel=False, num_processes=None, **kwargs):
|
||||||
|
|
@ -196,10 +171,10 @@ class model(parameterised):
|
||||||
pool = mp.Pool(processes=num_processes)
|
pool = mp.Pool(processes=num_processes)
|
||||||
for i in range(Nrestarts):
|
for i in range(Nrestarts):
|
||||||
self.randomize()
|
self.randomize()
|
||||||
job = pool.apply_async(opt_wrapper, args = (self,), kwds = kwargs)
|
job = pool.apply_async(opt_wrapper, args=(self,), kwds=kwargs)
|
||||||
jobs.append(job)
|
jobs.append(job)
|
||||||
|
|
||||||
pool.close() # signal that no more data coming in
|
pool.close() # signal that no more data coming in
|
||||||
pool.join() # wait for all the tasks to complete
|
pool.join() # wait for all the tasks to complete
|
||||||
except KeyboardInterrupt:
|
except KeyboardInterrupt:
|
||||||
print "Ctrl+c received, terminating and joining pool."
|
print "Ctrl+c received, terminating and joining pool."
|
||||||
|
|
@ -215,10 +190,10 @@ class model(parameterised):
|
||||||
self.optimization_runs.append(jobs[i].get())
|
self.optimization_runs.append(jobs[i].get())
|
||||||
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print("Optimization restart {0}/{1}, f = {2}".format(i+1, Nrestarts, self.optimization_runs[-1].f_opt))
|
print("Optimization restart {0}/{1}, f = {2}".format(i + 1, Nrestarts, self.optimization_runs[-1].f_opt))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if robust:
|
if robust:
|
||||||
print("Warning - optimization restart {0}/{1} failed".format(i+1, Nrestarts))
|
print("Warning - optimization restart {0}/{1} failed".format(i + 1, Nrestarts))
|
||||||
else:
|
else:
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
|
@ -228,22 +203,22 @@ class model(parameterised):
|
||||||
else:
|
else:
|
||||||
self._set_params_transformed(initial_parameters)
|
self._set_params_transformed(initial_parameters)
|
||||||
|
|
||||||
def ensure_default_constraints(self,warn=False):
|
def ensure_default_constraints(self):
|
||||||
"""
|
"""
|
||||||
Ensure that any variables which should clearly be positive have been constrained somehow.
|
Ensure that any variables which should clearly be positive have been constrained somehow.
|
||||||
"""
|
"""
|
||||||
positive_strings = ['variance','lengthscale', 'precision']
|
positive_strings = ['variance', 'lengthscale', 'precision', 'kappa']
|
||||||
param_names = self._get_param_names()
|
param_names = self._get_param_names()
|
||||||
currently_constrained = self.all_constrained_indices()
|
currently_constrained = self.all_constrained_indices()
|
||||||
to_make_positive = []
|
to_make_positive = []
|
||||||
for s in positive_strings:
|
for s in positive_strings:
|
||||||
for i in self.grep_param_names(s):
|
for i in self.grep_param_names(s):
|
||||||
if not (i in currently_constrained):
|
if not (i in currently_constrained):
|
||||||
to_make_positive.append(param_names[i])
|
#to_make_positive.append(re.escape(param_names[i]))
|
||||||
if warn:
|
to_make_positive.append(i)
|
||||||
print "Warning! constraining %s postive"%name
|
|
||||||
if len(to_make_positive):
|
if len(to_make_positive):
|
||||||
self.constrain_positive('('+'|'.join(to_make_positive)+')')
|
#self.constrain_positive('(' + '|'.join(to_make_positive) + ')')
|
||||||
|
self.constrain_positive(np.asarray(to_make_positive))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -261,14 +236,14 @@ class model(parameterised):
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
||||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
||||||
return - LL_gradients - prior_gradients
|
return -LL_gradients - prior_gradients
|
||||||
|
|
||||||
def objective_and_gradients(self, x):
|
def objective_and_gradients(self, x):
|
||||||
self._set_params_transformed(x)
|
self._set_params_transformed(x)
|
||||||
obj_f = -self.log_likelihood() - self.log_prior()
|
obj_f = -self.log_likelihood() - self.log_prior()
|
||||||
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
LL_gradients = self._transform_gradients(self._log_likelihood_gradients())
|
||||||
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
prior_gradients = self._transform_gradients(self._log_prior_gradients())
|
||||||
obj_grads = - LL_gradients - prior_gradients
|
obj_grads = -LL_gradients - prior_gradients
|
||||||
return obj_f, obj_grads
|
return obj_f, obj_grads
|
||||||
|
|
||||||
def optimize(self, optimizer=None, start=None, **kwargs):
|
def optimize(self, optimizer=None, start=None, **kwargs):
|
||||||
|
|
@ -278,7 +253,7 @@ class model(parameterised):
|
||||||
|
|
||||||
:max_f_eval: maximum number of function evaluations
|
:max_f_eval: maximum number of function evaluations
|
||||||
:messages: whether to display during optimisation
|
:messages: whether to display during optimisation
|
||||||
:param optimzer: whice optimizer to use (defaults to self.preferred optimizer)
|
:param optimzer: which optimizer to use (defaults to self.preferred optimizer)
|
||||||
:type optimzer: string TODO: valid strings?
|
:type optimzer: string TODO: valid strings?
|
||||||
"""
|
"""
|
||||||
if optimizer is None:
|
if optimizer is None:
|
||||||
|
|
@ -288,13 +263,13 @@ class model(parameterised):
|
||||||
start = self._get_params_transformed()
|
start = self._get_params_transformed()
|
||||||
|
|
||||||
optimizer = optimization.get_optimizer(optimizer)
|
optimizer = optimization.get_optimizer(optimizer)
|
||||||
opt = optimizer(start, model = self, **kwargs)
|
opt = optimizer(start, model=self, **kwargs)
|
||||||
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
opt.run(f_fp=self.objective_and_gradients, f=self.objective_function, fp=self.objective_function_gradients)
|
||||||
self.optimization_runs.append(opt)
|
self.optimization_runs.append(opt)
|
||||||
|
|
||||||
self._set_params_transformed(opt.x_opt)
|
self._set_params_transformed(opt.x_opt)
|
||||||
|
|
||||||
def optimize_SGD(self, momentum = 0.1, learning_rate = 0.01, iterations = 20, **kwargs):
|
def optimize_SGD(self, momentum=0.1, learning_rate=0.01, iterations=20, **kwargs):
|
||||||
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
# assert self.Y.shape[1] > 1, "SGD only works with D > 1"
|
||||||
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
|
sgd = SGD.StochasticGD(self, iterations, learning_rate, momentum, **kwargs)
|
||||||
sgd.run()
|
sgd.run()
|
||||||
|
|
@ -302,8 +277,8 @@ class model(parameterised):
|
||||||
|
|
||||||
def Laplace_covariance(self):
|
def Laplace_covariance(self):
|
||||||
"""return the covariance matric of a Laplace approximatino at the current (stationary) point"""
|
"""return the covariance matric of a Laplace approximatino at the current (stationary) point"""
|
||||||
#TODO add in the prior contributions for MAP estimation
|
# TODO add in the prior contributions for MAP estimation
|
||||||
#TODO fix the hessian for tied, constrained and fixed components
|
# TODO fix the hessian for tied, constrained and fixed components
|
||||||
if hasattr(self, 'log_likelihood_hessian'):
|
if hasattr(self, 'log_likelihood_hessian'):
|
||||||
A = -self.log_likelihood_hessian()
|
A = -self.log_likelihood_hessian()
|
||||||
|
|
||||||
|
|
@ -317,8 +292,8 @@ class model(parameterised):
|
||||||
A = -h(x)
|
A = -h(x)
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
# check for almost zero components on the diagonal which screw up the cholesky
|
# check for almost zero components on the diagonal which screw up the cholesky
|
||||||
aa = np.nonzero((np.diag(A)<1e-6) & (np.diag(A)>0.))[0]
|
aa = np.nonzero((np.diag(A) < 1e-6) & (np.diag(A) > 0.))[0]
|
||||||
A[aa,aa] = 0.
|
A[aa, aa] = 0.
|
||||||
return A
|
return A
|
||||||
|
|
||||||
def Laplace_evidence(self):
|
def Laplace_evidence(self):
|
||||||
|
|
@ -329,11 +304,11 @@ class model(parameterised):
|
||||||
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
|
hld = np.sum(np.log(np.diag(jitchol(A)[0])))
|
||||||
except:
|
except:
|
||||||
return np.nan
|
return np.nan
|
||||||
return 0.5*self._get_params().size*np.log(2*np.pi) + self.log_likelihood() - hld
|
return 0.5 * self._get_params().size * np.log(2 * np.pi) + self.log_likelihood() - hld
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
s = parameterised.__str__(self).split('\n')
|
s = parameterised.__str__(self).split('\n')
|
||||||
#add priors to the string
|
# add priors to the string
|
||||||
strs = [str(p) if p is not None else '' for p in self.priors]
|
strs = [str(p) if p is not None else '' for p in self.priors]
|
||||||
width = np.array(max([len(p) for p in strs] + [5])) + 4
|
width = np.array(max([len(p) for p in strs] + [5])) + 4
|
||||||
|
|
||||||
|
|
@ -344,16 +319,16 @@ class model(parameterised):
|
||||||
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
|
obj_funct += ', Log prior: {0:.3e}, LL+prior = {0:.3e}'.format(log_prior, log_like + log_prior)
|
||||||
obj_funct += '\n\n'
|
obj_funct += '\n\n'
|
||||||
s[0] = obj_funct + s[0]
|
s[0] = obj_funct + s[0]
|
||||||
s[0] += "|{h:^{col}}".format(h = 'Prior', col = width)
|
s[0] += "|{h:^{col}}".format(h='Prior', col=width)
|
||||||
s[1] += '-'*(width + 1)
|
s[1] += '-' * (width + 1)
|
||||||
|
|
||||||
for p in range(2, len(strs)+2):
|
for p in range(2, len(strs) + 2):
|
||||||
s[p] += '|{prior:^{width}}'.format(prior = strs[p-2], width = width)
|
s[p] += '|{prior:^{width}}'.format(prior=strs[p - 2], width=width)
|
||||||
|
|
||||||
return '\n'.join(s)
|
return '\n'.join(s)
|
||||||
|
|
||||||
|
|
||||||
def checkgrad(self, target_param = None, verbose=False, step=1e-6, tolerance = 1e-3):
|
def checkgrad(self, target_param=None, verbose=False, step=1e-6, tolerance=1e-3):
|
||||||
"""
|
"""
|
||||||
Check the gradient of the model by comparing to a numerical estimate.
|
Check the gradient of the model by comparing to a numerical estimate.
|
||||||
If the verbose flag is passed, invividual components are tested (and printed)
|
If the verbose flag is passed, invividual components are tested (and printed)
|
||||||
|
|
@ -373,27 +348,24 @@ class model(parameterised):
|
||||||
x = self._get_params_transformed().copy()
|
x = self._get_params_transformed().copy()
|
||||||
|
|
||||||
if not verbose:
|
if not verbose:
|
||||||
#just check the global ratio
|
# just check the global ratio
|
||||||
dx = step*np.sign(np.random.uniform(-1,1,x.size))
|
dx = step * np.sign(np.random.uniform(-1, 1, x.size))
|
||||||
|
|
||||||
#evaulate around the point x
|
# evaulate around the point x
|
||||||
f1, g1 = self.objective_and_gradients(x+dx)
|
f1, g1 = self.objective_and_gradients(x + dx)
|
||||||
f2, g2 = self.objective_and_gradients(x-dx)
|
f2, g2 = self.objective_and_gradients(x - dx)
|
||||||
gradient = self.objective_function_gradients(x)
|
gradient = self.objective_function_gradients(x)
|
||||||
|
|
||||||
numerical_gradient = (f1-f2)/(2*dx)
|
numerical_gradient = (f1 - f2) / (2 * dx)
|
||||||
global_ratio = (f1-f2)/(2*np.dot(dx,gradient))
|
global_ratio = (f1 - f2) / (2 * np.dot(dx, gradient))
|
||||||
|
|
||||||
if (np.abs(1.-global_ratio)<tolerance) and not np.isnan(global_ratio):
|
return (np.abs(1. - global_ratio) < tolerance) or (np.abs(gradient - numerical_gradient).mean() - 1) < tolerance
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
#check the gradient of each parameter individually, and do some pretty printing
|
# check the gradient of each parameter individually, and do some pretty printing
|
||||||
try:
|
try:
|
||||||
names = self._get_param_names_transformed()
|
names = self._get_param_names_transformed()
|
||||||
except NotImplementedError:
|
except NotImplementedError:
|
||||||
names = ['Variable %i'%i for i in range(len(x))]
|
names = ['Variable %i' % i for i in range(len(x))]
|
||||||
|
|
||||||
# Prepare for pretty-printing
|
# Prepare for pretty-printing
|
||||||
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
|
header = ['Name', 'Ratio', 'Difference', 'Analytical', 'Numerical']
|
||||||
|
|
@ -402,9 +374,9 @@ class model(parameterised):
|
||||||
cols = [max_names]
|
cols = [max_names]
|
||||||
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
cols.extend([max(float_len, len(header[i])) for i in range(1, len(header))])
|
||||||
cols = np.array(cols) + 5
|
cols = np.array(cols) + 5
|
||||||
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
|
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
|
||||||
header_string = map(lambda x: '|'.join(x), [header_string])
|
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||||
separator = '-'*len(header_string[0])
|
separator = '-' * len(header_string[0])
|
||||||
print '\n'.join([header_string[0], separator])
|
print '\n'.join([header_string[0], separator])
|
||||||
|
|
||||||
if target_param is None:
|
if target_param is None:
|
||||||
|
|
@ -420,11 +392,11 @@ class model(parameterised):
|
||||||
f2, g2 = self.objective_and_gradients(xx)
|
f2, g2 = self.objective_and_gradients(xx)
|
||||||
gradient = self.objective_function_gradients(x)[i]
|
gradient = self.objective_function_gradients(x)[i]
|
||||||
|
|
||||||
numerical_gradient = (f1-f2)/(2*step)
|
numerical_gradient = (f1 - f2) / (2 * step)
|
||||||
ratio = (f1-f2)/(2*step*gradient)
|
ratio = (f1 - f2) / (2 * step * gradient)
|
||||||
difference = np.abs((f1-f2)/2/step - gradient)
|
difference = np.abs((f1 - f2) / 2 / step - gradient)
|
||||||
|
|
||||||
if (np.abs(ratio-1)<tolerance):
|
if (np.abs(1. - ratio) < tolerance) or np.abs(difference) < tolerance:
|
||||||
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
formatted_name = "\033[92m {0} \033[0m".format(names[i])
|
||||||
else:
|
else:
|
||||||
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
formatted_name = "\033[91m {0} \033[0m".format(names[i])
|
||||||
|
|
@ -432,32 +404,33 @@ class model(parameterised):
|
||||||
d = '%.6f' % float(difference)
|
d = '%.6f' % float(difference)
|
||||||
g = '%.6f' % gradient
|
g = '%.6f' % gradient
|
||||||
ng = '%.6f' % float(numerical_gradient)
|
ng = '%.6f' % float(numerical_gradient)
|
||||||
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name,r,d,g, ng, c0 = cols[0]+9, c1 = cols[1], c2 = cols[2], c3 = cols[3], c4 = cols[4])
|
grad_string = "{0:^{c0}}|{1:^{c1}}|{2:^{c2}}|{3:^{c3}}|{4:^{c4}}".format(formatted_name, r, d, g, ng, c0=cols[0] + 9, c1=cols[1], c2=cols[2], c3=cols[3], c4=cols[4])
|
||||||
print grad_string
|
print grad_string
|
||||||
|
|
||||||
def input_sensitivity(self):
|
def input_sensitivity(self):
|
||||||
"""
|
"""
|
||||||
return an array describing the sesitivity of the model to each input
|
return an array describing the sesitivity of the model to each input
|
||||||
|
|
||||||
NB. Right now, we're basing this on the lengthscales (or variances) of the kernel.
|
NB. Right now, we're basing this on the lengthscales (or
|
||||||
TODO: proper sensitivity analysis
|
variances) of the kernel. TODO: proper sensitivity analysis
|
||||||
"""
|
where we integrate across the model inputs and evaluate the
|
||||||
|
effect on the variance of the model output. """
|
||||||
|
|
||||||
if not hasattr(self,'kern'):
|
if not hasattr(self, 'kern'):
|
||||||
raise ValueError, "this model has no kernel"
|
raise ValueError, "this model has no kernel"
|
||||||
|
|
||||||
k = [p for p in self.kern.parts if p.name in ['rbf','linear']]
|
k = [p for p in self.kern.parts if p.name in ['rbf', 'linear']]
|
||||||
if (not len(k)==1) or (not k[0].ARD):
|
if (not len(k) == 1) or (not k[0].ARD):
|
||||||
raise ValueError, "cannot determine sensitivity for this kernel"
|
raise ValueError, "cannot determine sensitivity for this kernel"
|
||||||
k = k[0]
|
k = k[0]
|
||||||
|
|
||||||
if k.name=='rbf':
|
if k.name == 'rbf':
|
||||||
return k.lengthscale
|
return k.lengthscale
|
||||||
elif k.name=='linear':
|
elif k.name == 'linear':
|
||||||
return 1./k.variances
|
return 1. / k.variances
|
||||||
|
|
||||||
|
|
||||||
def pseudo_EM(self,epsilon=.1,**kwargs):
|
def pseudo_EM(self, epsilon=.1, **kwargs):
|
||||||
"""
|
"""
|
||||||
TODO: Should this not bein the GP class?
|
TODO: Should this not bein the GP class?
|
||||||
EM - like algorithm for Expectation Propagation and Laplace approximation
|
EM - like algorithm for Expectation Propagation and Laplace approximation
|
||||||
|
|
@ -471,7 +444,7 @@ class model(parameterised):
|
||||||
:type optimzer: string TODO: valid strings?
|
:type optimzer: string TODO: valid strings?
|
||||||
|
|
||||||
"""
|
"""
|
||||||
assert isinstance(self.likelihood,likelihoods.EP), "EPEM is only available for EP likelihoods"
|
assert isinstance(self.likelihood, likelihoods.EP), "EPEM is only available for EP likelihoods"
|
||||||
ll_change = epsilon + 1.
|
ll_change = epsilon + 1.
|
||||||
iteration = 0
|
iteration = 0
|
||||||
last_ll = -np.exp(1000)
|
last_ll = -np.exp(1000)
|
||||||
|
|
@ -491,9 +464,9 @@ class model(parameterised):
|
||||||
ll_change = new_ll - last_ll
|
ll_change = new_ll - last_ll
|
||||||
|
|
||||||
if ll_change < 0:
|
if ll_change < 0:
|
||||||
self.likelihood = last_approximation #restore previous likelihood approximation
|
self.likelihood = last_approximation # restore previous likelihood approximation
|
||||||
self._set_params(last_params) #restore model parameters
|
self._set_params(last_params) # restore model parameters
|
||||||
print "Log-likelihood decrement: %s \nLast likelihood update discarded." %ll_change
|
print "Log-likelihood decrement: %s \nLast likelihood update discarded." % ll_change
|
||||||
stop = True
|
stop = True
|
||||||
else:
|
else:
|
||||||
self.optimize(**kwargs)
|
self.optimize(**kwargs)
|
||||||
|
|
@ -502,5 +475,5 @@ class model(parameterised):
|
||||||
stop = True
|
stop = True
|
||||||
iteration += 1
|
iteration += 1
|
||||||
if stop:
|
if stop:
|
||||||
print "%s iterations." %iteration
|
print "%s iterations." % iteration
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,26 +8,8 @@ import copy
|
||||||
import cPickle
|
import cPickle
|
||||||
import os
|
import os
|
||||||
from ..util.squashers import sigmoid
|
from ..util.squashers import sigmoid
|
||||||
|
import warnings
|
||||||
def truncate_pad(string,width,align='m'):
|
import transformations
|
||||||
"""
|
|
||||||
A helper function to make aligned strings for parameterised.__str__
|
|
||||||
"""
|
|
||||||
width=max(width,4)
|
|
||||||
if len(string)>width:
|
|
||||||
return string[:width-3]+'...'
|
|
||||||
elif len(string)==width:
|
|
||||||
return string
|
|
||||||
elif len(string)<width:
|
|
||||||
diff = width-len(string)
|
|
||||||
if align=='m':
|
|
||||||
return ' '*np.floor(diff/2.) + string + ' '*np.ceil(diff/2.)
|
|
||||||
elif align=='l':
|
|
||||||
return string + ' '*diff
|
|
||||||
elif align=='r':
|
|
||||||
return ' '*diff + string
|
|
||||||
else:
|
|
||||||
raise ValueError
|
|
||||||
|
|
||||||
class parameterised(object):
|
class parameterised(object):
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
@ -35,50 +17,108 @@ class parameterised(object):
|
||||||
This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
|
This is the base class for model and kernel. Mostly just handles tieing and constraining of parameters
|
||||||
"""
|
"""
|
||||||
self.tied_indices = []
|
self.tied_indices = []
|
||||||
self.constrained_fixed_indices = []
|
self.fixed_indices = []
|
||||||
self.constrained_fixed_values = []
|
self.fixed_values = []
|
||||||
self.constrained_positive_indices = np.empty(shape=(0,),dtype=np.int64)
|
self.constrained_indices = []
|
||||||
self.constrained_negative_indices = np.empty(shape=(0,),dtype=np.int64)
|
self.constraints = []
|
||||||
self.constrained_bounded_indices = []
|
|
||||||
self.constrained_bounded_uppers = []
|
|
||||||
self.constrained_bounded_lowers = []
|
|
||||||
|
|
||||||
def pickle(self,filename,protocol=-1):
|
def pickle(self, filename, protocol= -1):
|
||||||
f = file(filename,'w')
|
f = file(filename, 'w')
|
||||||
cPickle.dump(self,f,protocol)
|
cPickle.dump(self, f, protocol)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
"""
|
"""Returns a (deep) copy of the current model """
|
||||||
Returns a (deep) copy of the current model
|
|
||||||
"""
|
|
||||||
|
|
||||||
return copy.deepcopy(self)
|
return copy.deepcopy(self)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def params(self):
|
||||||
|
"""
|
||||||
|
Returns a **copy** of parameters in non transformed space
|
||||||
|
|
||||||
|
:see_also: :py:func:`GPy.core.parameterised.params_transformed`
|
||||||
|
"""
|
||||||
|
return self._get_params()
|
||||||
|
|
||||||
|
@params.setter
|
||||||
|
def params(self, params):
|
||||||
|
self._set_params(params)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def params_transformed(self):
|
||||||
|
"""
|
||||||
|
Returns a **copy** of parameters in transformed space
|
||||||
|
|
||||||
|
:see_also: :py:func:`GPy.core.parameterised.params`
|
||||||
|
"""
|
||||||
|
return self._get_params_transformed()
|
||||||
|
|
||||||
|
@params_transformed.setter
|
||||||
|
def params_transformed(self, params):
|
||||||
|
self._set_params_transformed(params)
|
||||||
|
|
||||||
|
_get_set_deprecation = """get and set methods wont be available at next minor release
|
||||||
|
in the next releases you will get and set with following syntax:
|
||||||
|
Assume m is a model class:
|
||||||
|
print m['var'] # > prints all parameters matching 'var'
|
||||||
|
m['var'] = 2. # > sets all parameters matching 'var' to 2.
|
||||||
|
m['var'] = <array-like> # > sets parameters matching 'var' to <array-like>
|
||||||
|
"""
|
||||||
|
def get(self, name):
|
||||||
|
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
||||||
|
return self[name]
|
||||||
|
|
||||||
|
def set(self, name, val):
|
||||||
|
warnings.warn(self._get_set_deprecation, FutureWarning, stacklevel=2)
|
||||||
|
self[name] = val
|
||||||
|
|
||||||
|
def __getitem__(self, name, return_names=False):
|
||||||
|
"""
|
||||||
|
Get a model parameter by name. The name is applied as a regular
|
||||||
|
expression and all parameters that match that regular expression are
|
||||||
|
returned.
|
||||||
|
"""
|
||||||
|
matches = self.grep_param_names(name)
|
||||||
|
if len(matches):
|
||||||
|
if return_names:
|
||||||
|
return self._get_params()[matches], np.asarray(self._get_param_names())[matches].tolist()
|
||||||
|
else:
|
||||||
|
return self._get_params()[matches]
|
||||||
|
else:
|
||||||
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
|
def __setitem__(self, name, val):
|
||||||
|
"""
|
||||||
|
Set model parameter(s) by name. The name is provided as a regular
|
||||||
|
expression. All parameters matching that regular expression are set to
|
||||||
|
the given value.
|
||||||
|
"""
|
||||||
|
matches = self.grep_param_names(name)
|
||||||
|
if len(matches):
|
||||||
|
val = np.array(val)
|
||||||
|
assert (val.size == 1) or val.size == len(matches), "Shape mismatch: {}:({},)".format(val.size, len(matches))
|
||||||
|
x = self.params
|
||||||
|
x[matches] = val
|
||||||
|
self.params = x
|
||||||
|
else:
|
||||||
|
raise AttributeError, "no parameter matches %s" % name
|
||||||
|
|
||||||
def tie_params(self, which):
|
def tie_params(self, which):
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(which)
|
||||||
assert matches.size > 0, "need at least something to tie together"
|
assert matches.size > 0, "need at least something to tie together"
|
||||||
if len(self.tied_indices):
|
if len(self.tied_indices):
|
||||||
assert not np.any(matches[:,None]==np.hstack(self.tied_indices)), "Some indices are already tied!"
|
assert not np.any(matches[:, None] == np.hstack(self.tied_indices)), "Some indices are already tied!"
|
||||||
self.tied_indices.append(matches)
|
self.tied_indices.append(matches)
|
||||||
#TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
|
# TODO only one of the priors will be evaluated. Give a warning message if the priors are not identical
|
||||||
if hasattr(self,'prior'):
|
if hasattr(self, 'prior'):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
self._set_params_transformed(self._get_params_transformed())# sets tied parameters to single value
|
self._set_params_transformed(self._get_params_transformed()) # sets tied parameters to single value
|
||||||
|
|
||||||
def untie_everything(self):
|
def untie_everything(self):
|
||||||
"""Unties all parameters by setting tied_indices to an empty list."""
|
"""Unties all parameters by setting tied_indices to an empty list."""
|
||||||
self.tied_indices = []
|
self.tied_indices = []
|
||||||
|
|
||||||
def all_constrained_indices(self):
|
|
||||||
"""Return a np array of all the constrained indices"""
|
|
||||||
ret = [np.hstack(i) for i in [self.constrained_bounded_indices, self.constrained_positive_indices, self.constrained_negative_indices, self.constrained_fixed_indices] if len(i)]
|
|
||||||
if len(ret):
|
|
||||||
return np.hstack(ret)
|
|
||||||
else:
|
|
||||||
return []
|
|
||||||
def grep_param_names(self, expr):
|
def grep_param_names(self, expr):
|
||||||
"""
|
"""
|
||||||
Arguments
|
Arguments
|
||||||
|
|
@ -91,7 +131,7 @@ class parameterised(object):
|
||||||
|
|
||||||
Notes
|
Notes
|
||||||
-----
|
-----
|
||||||
Other objects are passed through - i.e. integers which were'nt meant for grepping
|
Other objects are passed through - i.e. integers which weren't meant for grepping
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if type(expr) in [str, np.string_, np.str]:
|
if type(expr) in [str, np.string_, np.str]:
|
||||||
|
|
@ -103,101 +143,78 @@ class parameterised(object):
|
||||||
return expr
|
return expr
|
||||||
|
|
||||||
def Nparam_transformed(self):
|
def Nparam_transformed(self):
|
||||||
ties = 0
|
removed = 0
|
||||||
for ar in self.tied_indices:
|
for tie in self.tied_indices:
|
||||||
ties += ar.size - 1
|
removed += tie.size - 1
|
||||||
return self.Nparam - len(self.constrained_fixed_indices) - ties
|
|
||||||
|
|
||||||
def constrain_positive(self, which):
|
for fix in self.fixed_indices:
|
||||||
"""
|
removed += fix.size
|
||||||
Set positive constraints.
|
|
||||||
|
|
||||||
Arguments
|
return len(self._get_params()) - removed
|
||||||
---------
|
|
||||||
which -- np.array(dtype=int), or regular expression object or string
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(which)
|
|
||||||
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
|
|
||||||
self.constrained_positive_indices = np.hstack((self.constrained_positive_indices, matches))
|
|
||||||
#check to ensure constraint is in place
|
|
||||||
x = self._get_params()
|
|
||||||
for i,xx in enumerate(x):
|
|
||||||
if (xx<0) & (i in matches):
|
|
||||||
x[i] = -xx
|
|
||||||
self._set_params(x)
|
|
||||||
|
|
||||||
|
def unconstrain(self, which):
|
||||||
def unconstrain(self,which):
|
|
||||||
"""Unconstrain matching parameters. does not untie parameters"""
|
"""Unconstrain matching parameters. does not untie parameters"""
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(which)
|
||||||
#positive/negative
|
|
||||||
self.constrained_positive_indices = np.delete(self.constrained_positive_indices,np.nonzero(np.sum(self.constrained_positive_indices[:,None]==matches[None,:],1))[0])
|
#tranformed contraints:
|
||||||
self.constrained_negative_indices = np.delete(self.constrained_negative_indices,np.nonzero(np.sum(self.constrained_negative_indices[:,None]==matches[None,:],1))[0])
|
for match in matches:
|
||||||
#bounded
|
self.constrained_indices = [i[i<>match] for i in self.constrained_indices]
|
||||||
if len(self.constrained_bounded_indices):
|
|
||||||
self.constrained_bounded_indices = [np.delete(a,np.nonzero(np.sum(a[:,None]==matches[None,:],1))[0]) for a in self.constrained_bounded_indices]
|
#remove empty constraints
|
||||||
if np.hstack(self.constrained_bounded_indices).size:
|
tmp = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
|
||||||
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = zip(*[(u,l,i) for u,l,i in zip(self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices) if i.size])
|
|
||||||
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = list(self.constrained_bounded_uppers), list(self.constrained_bounded_lowers), list(self.constrained_bounded_indices)
|
|
||||||
else:
|
|
||||||
self.constrained_bounded_uppers, self.constrained_bounded_lowers, self.constrained_bounded_indices = [],[],[]
|
|
||||||
#fixed:
|
|
||||||
for i,indices in enumerate(self.constrained_fixed_indices):
|
|
||||||
self.constrained_fixed_indices[i] = np.delete(indices,np.nonzero(np.sum(indices[:,None]==matches[None,:],1))[0])
|
|
||||||
#remove empty elements
|
|
||||||
tmp = [(i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values) if len(i)]
|
|
||||||
if tmp:
|
if tmp:
|
||||||
self.constrained_fixed_indices, self.constrained_fixed_values = zip(*tmp)
|
self.constrained_indices, self.constraints = zip(*[(i,t) for i,t in zip(self.constrained_indices,self.constraints) if len(i)])
|
||||||
self.constrained_fixed_indices, self.constrained_fixed_values = list(self.constrained_fixed_indices), list(self.constrained_fixed_values)
|
self.constrained_indices, self.constraints = list(self.constrained_indices), list(self.constraints)
|
||||||
|
|
||||||
|
# fixed:
|
||||||
|
self.fixed_values = [np.delete(values, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices,values in zip(self.fixed_indices,self.fixed_values)]
|
||||||
|
self.fixed_indices = [np.delete(indices, np.nonzero(np.sum(indices[:, None] == matches[None, :], 1))[0]) for indices in self.fixed_indices]
|
||||||
|
|
||||||
|
# remove empty elements
|
||||||
|
tmp = [(i, v) for i, v in zip(self.fixed_indices, self.fixed_values) if len(i)]
|
||||||
|
if tmp:
|
||||||
|
self.fixed_indices, self.fixed_values = zip(*tmp)
|
||||||
|
self.fixed_indices, self.fixed_values = list(self.fixed_indices), list(self.fixed_values)
|
||||||
else:
|
else:
|
||||||
self.constrained_fixed_indices, self.constrained_fixed_values = [],[]
|
self.fixed_indices, self.fixed_values = [], []
|
||||||
|
|
||||||
|
def constrain_negative(self, which):
|
||||||
|
""" Set negative constraints. """
|
||||||
|
self.constrain(which, transformations.negative_exponent())
|
||||||
|
|
||||||
|
def constrain_positive(self, which):
|
||||||
|
""" Set positive constraints. """
|
||||||
|
self.constrain(which, transformations.logexp())
|
||||||
|
|
||||||
def constrain_negative(self,which):
|
def constrain_bounded(self, which,lower, upper):
|
||||||
"""
|
""" Set bounded constraints. """
|
||||||
Set negative constraints.
|
self.constrain(which, transformations.logistic(lower, upper))
|
||||||
|
|
||||||
:param which: which variables to constrain
|
def all_constrained_indices(self):
|
||||||
:type which: regular expression string
|
if len(self.constrained_indices) or len(self.fixed_indices):
|
||||||
|
return np.hstack(self.constrained_indices + self.fixed_indices)
|
||||||
|
else:
|
||||||
|
return np.empty(shape=(0,))
|
||||||
|
|
||||||
|
def constrain(self,which,transform):
|
||||||
|
assert isinstance(transform,transformations.transformation)
|
||||||
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(which)
|
||||||
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
|
overlap = set(matches).intersection(set(self.all_constrained_indices()))
|
||||||
self.constrained_negative_indices = np.hstack((self.constrained_negative_indices, matches))
|
if overlap:
|
||||||
#check to ensure constraint is in place
|
self.unconstrain(np.asarray(list(overlap)))
|
||||||
|
print 'Warning: re-constraining these parameters'
|
||||||
|
pn = self._get_param_names()
|
||||||
|
for i in overlap:
|
||||||
|
print pn[i]
|
||||||
|
|
||||||
|
self.constrained_indices.append(matches)
|
||||||
|
self.constraints.append(transform)
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
for i,xx in enumerate(x):
|
x[matches] = transform.initialize(x[matches])
|
||||||
if (xx>0.) and (i in matches):
|
|
||||||
x[i] = -xx
|
|
||||||
self._set_params(x)
|
self._set_params(x)
|
||||||
|
|
||||||
|
def constrain_fixed(self, which, value=None):
|
||||||
|
|
||||||
def constrain_bounded(self, which, lower, upper):
|
|
||||||
"""Set bounded constraints.
|
|
||||||
|
|
||||||
Arguments
|
|
||||||
---------
|
|
||||||
which -- np.array(dtype=int), or regular expression object or string
|
|
||||||
upper -- (float) the upper bound on the constraint
|
|
||||||
lower -- (float) the lower bound on the constraint
|
|
||||||
"""
|
|
||||||
matches = self.grep_param_names(which)
|
|
||||||
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
|
|
||||||
assert lower < upper, "lower bound must be smaller than upper bound!"
|
|
||||||
self.constrained_bounded_indices.append(matches)
|
|
||||||
self.constrained_bounded_uppers.append(upper)
|
|
||||||
self.constrained_bounded_lowers.append(lower)
|
|
||||||
#check to ensure constraint is in place
|
|
||||||
x = self._get_params()
|
|
||||||
for i,xx in enumerate(x):
|
|
||||||
if ((xx<=lower)|(xx>=upper)) & (i in matches):
|
|
||||||
x[i] = sigmoid(xx)*(upper-lower) + lower
|
|
||||||
self._set_params(x)
|
|
||||||
|
|
||||||
|
|
||||||
def constrain_fixed(self, which, value = None):
|
|
||||||
"""
|
"""
|
||||||
Arguments
|
Arguments
|
||||||
---------
|
---------
|
||||||
|
|
@ -211,55 +228,50 @@ class parameterised(object):
|
||||||
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
|
To fix multiple parameters to the same value, simply pass a regular expression which matches both parameter names, or pass both of the indexes
|
||||||
"""
|
"""
|
||||||
matches = self.grep_param_names(which)
|
matches = self.grep_param_names(which)
|
||||||
assert not np.any(matches[:,None]==self.all_constrained_indices()), "Some indices are already constrained"
|
assert not np.any(matches[:, None] == self.all_constrained_indices()), "Some indices are already constrained"
|
||||||
self.constrained_fixed_indices.append(matches)
|
self.fixed_indices.append(matches)
|
||||||
if value != None:
|
if value != None:
|
||||||
self.constrained_fixed_values.append(value)
|
self.fixed_values.append(value)
|
||||||
else:
|
else:
|
||||||
self.constrained_fixed_values.append(self._get_params()[self.constrained_fixed_indices[-1]])
|
self.fixed_values.append(self._get_params()[self.fixed_indices[-1]])
|
||||||
|
|
||||||
#self.constrained_fixed_values.append(value)
|
# self.fixed_values.append(value)
|
||||||
self._set_params_transformed(self._get_params_transformed())
|
self._set_params_transformed(self._get_params_transformed())
|
||||||
|
|
||||||
def _get_params_transformed(self):
|
def _get_params_transformed(self):
|
||||||
"""use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
|
"""use self._get_params to get the 'true' parameters of the model, which are then tied, constrained and fixed"""
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
x[self.constrained_positive_indices] = np.log(x[self.constrained_positive_indices])
|
[np.put(x,i,t.finv(x[i])) for i,t in zip(self.constrained_indices,self.constraints)]
|
||||||
x[self.constrained_negative_indices] = np.log(-x[self.constrained_negative_indices])
|
|
||||||
[np.put(x,i,np.log(np.clip(x[i]-l,1e-10,np.inf)/np.clip(h-x[i],1e-10,np.inf))) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
|
|
||||||
|
|
||||||
to_remove = self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]
|
to_remove = self.fixed_indices + [t[1:] for t in self.tied_indices]
|
||||||
if len(to_remove):
|
if len(to_remove):
|
||||||
return np.delete(x,np.hstack(to_remove))
|
return np.delete(x, np.hstack(to_remove))
|
||||||
else:
|
else:
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
def _set_params_transformed(self, x):
|
||||||
def _set_params_transformed(self,x):
|
|
||||||
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
|
""" takes the vector x, which is then modified (by untying, reparameterising or inserting fixed values), and then call self._set_params"""
|
||||||
|
|
||||||
#work out how many places are fixed, and where they are. tricky logic!
|
# work out how many places are fixed, and where they are. tricky logic!
|
||||||
Nfix_places = 0.
|
fix_places = self.fixed_indices + [t[1:] for t in self.tied_indices]
|
||||||
if len(self.tied_indices):
|
if len(fix_places):
|
||||||
Nfix_places += np.hstack(self.tied_indices).size-len(self.tied_indices)
|
fix_places = np.hstack(fix_places)
|
||||||
if len(self.constrained_fixed_indices):
|
Nfix_places = fix_places.size
|
||||||
Nfix_places += np.hstack(self.constrained_fixed_indices).size
|
|
||||||
if Nfix_places:
|
|
||||||
fix_places = np.hstack(self.constrained_fixed_indices+[t[1:] for t in self.tied_indices])
|
|
||||||
else:
|
else:
|
||||||
fix_places = []
|
Nfix_places = 0
|
||||||
|
|
||||||
free_places = np.setdiff1d(np.arange(Nfix_places+x.size,dtype=np.int),fix_places)
|
free_places = np.setdiff1d(np.arange(Nfix_places + x.size, dtype=np.int), fix_places)
|
||||||
|
|
||||||
#put the models values in the vector xx
|
# put the models values in the vector xx
|
||||||
xx = np.zeros(Nfix_places+free_places.size,dtype=np.float64)
|
xx = np.zeros(Nfix_places + free_places.size, dtype=np.float64)
|
||||||
|
|
||||||
xx[free_places] = x
|
xx[free_places] = x
|
||||||
[np.put(xx,i,v) for i,v in zip(self.constrained_fixed_indices, self.constrained_fixed_values)]
|
[np.put(xx, i, v) for i, v in zip(self.fixed_indices, self.fixed_values)]
|
||||||
[np.put(xx,i,v) for i,v in [(t[1:],xx[t[0]]) for t in self.tied_indices] ]
|
[np.put(xx, i, v) for i, v in [(t[1:], xx[t[0]]) for t in self.tied_indices] ]
|
||||||
xx[self.constrained_positive_indices] = np.exp(xx[self.constrained_positive_indices])
|
|
||||||
xx[self.constrained_negative_indices] = -np.exp(xx[self.constrained_negative_indices])
|
[np.put(xx,i,t.f(xx[i])) for i,t in zip(self.constrained_indices, self.constraints)]
|
||||||
[np.put(xx,i,low+sigmoid(xx[i])*(high-low)) for i,low,high in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
|
if hasattr(self,'debug'):
|
||||||
|
stop
|
||||||
self._set_params(xx)
|
self._set_params(xx)
|
||||||
|
|
||||||
def _get_param_names_transformed(self):
|
def _get_param_names_transformed(self):
|
||||||
|
|
@ -267,33 +279,29 @@ class parameterised(object):
|
||||||
Returns the parameter names as propagated after constraining,
|
Returns the parameter names as propagated after constraining,
|
||||||
tying or fixing, i.e. a list of the same length as _get_params_transformed()
|
tying or fixing, i.e. a list of the same length as _get_params_transformed()
|
||||||
"""
|
"""
|
||||||
n = self._get_param_names()
|
n = self._get_param_names()
|
||||||
|
|
||||||
#remove/concatenate the tied parameter names
|
# remove/concatenate the tied parameter names
|
||||||
if len(self.tied_indices):
|
if len(self.tied_indices):
|
||||||
for t in self.tied_indices:
|
for t in self.tied_indices:
|
||||||
n[t[0]] = "<tie>".join([n[tt] for tt in t])
|
n[t[0]] = "<tie>".join([n[tt] for tt in t])
|
||||||
remove = np.hstack([t[1:] for t in self.tied_indices])
|
remove = np.hstack([t[1:] for t in self.tied_indices])
|
||||||
else:
|
else:
|
||||||
remove=np.empty(shape=(0,),dtype=np.int)
|
remove = np.empty(shape=(0,), dtype=np.int)
|
||||||
|
|
||||||
#also remove the fixed params
|
# also remove the fixed params
|
||||||
if len(self.constrained_fixed_indices):
|
if len(self.fixed_indices):
|
||||||
remove = np.hstack((remove, np.hstack(self.constrained_fixed_indices)))
|
remove = np.hstack((remove, np.hstack(self.fixed_indices)))
|
||||||
|
|
||||||
#add markers to show that some variables are constrained
|
# add markers to show that some variables are constrained
|
||||||
for i in self.constrained_positive_indices:
|
for i,t in zip(self.constrained_indices,self.constraints):
|
||||||
n[i] = n[i]+'(+ve)'
|
|
||||||
for i in self.constrained_negative_indices:
|
|
||||||
n[i] = n[i]+'(-ve)'
|
|
||||||
for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers):
|
|
||||||
for ii in i:
|
for ii in i:
|
||||||
n[ii] = n[ii]+'(bounded)'
|
n[ii] = n[ii] + t.__str__()
|
||||||
|
|
||||||
n = [nn for i,nn in enumerate(n) if not i in remove]
|
n = [nn for i, nn in enumerate(n) if not i in remove]
|
||||||
return n
|
return n
|
||||||
|
|
||||||
def __str__(self,nw=30):
|
def __str__(self, nw=30):
|
||||||
"""
|
"""
|
||||||
Return a string describing the parameter names and their ties and constraints
|
Return a string describing the parameter names and their ties and constraints
|
||||||
"""
|
"""
|
||||||
|
|
@ -302,25 +310,21 @@ class parameterised(object):
|
||||||
|
|
||||||
if not N:
|
if not N:
|
||||||
return "This object has no free parameters."
|
return "This object has no free parameters."
|
||||||
header = ['Name','Value','Constraints','Ties']
|
header = ['Name', 'Value', 'Constraints', 'Ties']
|
||||||
values = self._get_params() #map(str,self._get_params())
|
values = self._get_params() # map(str,self._get_params())
|
||||||
#sort out the constraints
|
# sort out the constraints
|
||||||
constraints = ['']*len(names)
|
constraints = [''] * len(names)
|
||||||
for i in self.constrained_positive_indices:
|
for i,t in zip(self.constrained_indices,self.constraints):
|
||||||
constraints[i] = '(+ve)'
|
for ii in i:
|
||||||
for i in self.constrained_negative_indices:
|
constraints[ii] = t.__str__()
|
||||||
constraints[i] = '(-ve)'
|
for i in self.fixed_indices:
|
||||||
for i in self.constrained_fixed_indices:
|
|
||||||
for ii in i:
|
for ii in i:
|
||||||
constraints[ii] = 'Fixed'
|
constraints[ii] = 'Fixed'
|
||||||
for i,u,l in zip(self.constrained_bounded_indices, self.constrained_bounded_uppers, self.constrained_bounded_lowers):
|
# sort out the ties
|
||||||
for ii in i:
|
ties = [''] * len(names)
|
||||||
constraints[ii] = '('+str(l)+', '+str(u)+')'
|
for i, tie in enumerate(self.tied_indices):
|
||||||
#sort out the ties
|
|
||||||
ties = ['']*len(names)
|
|
||||||
for i,tie in enumerate(self.tied_indices):
|
|
||||||
for j in tie:
|
for j in tie:
|
||||||
ties[j] = '('+str(i)+')'
|
ties[j] = '(' + str(i) + ')'
|
||||||
|
|
||||||
values = ['%.4f' % float(v) for v in values]
|
values = ['%.4f' % float(v) for v in values]
|
||||||
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
|
max_names = max([len(names[i]) for i in range(len(names))] + [len(header[0])])
|
||||||
|
|
@ -330,10 +334,10 @@ class parameterised(object):
|
||||||
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
|
cols = np.array([max_names, max_values, max_constraint, max_ties]) + 4
|
||||||
columns = cols.sum()
|
columns = cols.sum()
|
||||||
|
|
||||||
header_string = ["{h:^{col}}".format(h = header[i], col = cols[i]) for i in range(len(cols))]
|
header_string = ["{h:^{col}}".format(h=header[i], col=cols[i]) for i in range(len(cols))]
|
||||||
header_string = map(lambda x: '|'.join(x), [header_string])
|
header_string = map(lambda x: '|'.join(x), [header_string])
|
||||||
separator = '-'*len(header_string[0])
|
separator = '-' * len(header_string[0])
|
||||||
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n = names[i], v = values[i], c = constraints[i], t = ties[i], c0 = cols[0], c1 = cols[1], c2 = cols[2], c3 = cols[3]) for i in range(len(values))]
|
param_string = ["{n:^{c0}}|{v:^{c1}}|{c:^{c2}}|{t:^{c3}}".format(n=names[i], v=values[i], c=constraints[i], t=ties[i], c0=cols[0], c1=cols[1], c2=cols[2], c3=cols[3]) for i in range(len(values))]
|
||||||
|
|
||||||
|
|
||||||
return ('\n'.join([header_string[0], separator]+param_string)) + '\n'
|
return ('\n'.join([header_string[0], separator] + param_string)) + '\n'
|
||||||
|
|
|
||||||
85
GPy/core/transformations.py
Normal file
85
GPy/core/transformations.py
Normal file
|
|
@ -0,0 +1,85 @@
|
||||||
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
class transformation(object):
|
||||||
|
def __init__(self):
|
||||||
|
#set the domain. Suggest we use 'positive', 'bounded', etc
|
||||||
|
self.domain = 'undefined'
|
||||||
|
def f(self, x):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def finv(self,x):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def gradfactor(self,f):
|
||||||
|
""" df_dx evaluated at self.f(x)=f"""
|
||||||
|
raise NotImplementedError
|
||||||
|
def initialize(self,f):
|
||||||
|
""" produce a sensible initial values for f(x)"""
|
||||||
|
raise NotImplementedError
|
||||||
|
def __str__(self):
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
class logexp(transformation):
|
||||||
|
def __init__(self):
|
||||||
|
self.domain= 'positive'
|
||||||
|
def f(self,x):
|
||||||
|
return np.log(1. + np.exp(x))
|
||||||
|
def finv(self,f):
|
||||||
|
return np.log(np.exp(f) - 1.)
|
||||||
|
def gradfactor(self,f):
|
||||||
|
ef = np.exp(f)
|
||||||
|
return (ef - 1.)/ef
|
||||||
|
def initialize(self,f):
|
||||||
|
return np.abs(f)
|
||||||
|
def __str__(self):
|
||||||
|
return '(+ve)'
|
||||||
|
|
||||||
|
class exponent(transformation):
|
||||||
|
def __init__(self):
|
||||||
|
self.domain= 'positive'
|
||||||
|
def f(self,x):
|
||||||
|
return np.exp(x)
|
||||||
|
def finv(self,x):
|
||||||
|
return np.log(x)
|
||||||
|
def gradfactor(self,f):
|
||||||
|
return f
|
||||||
|
def initialize(self,f):
|
||||||
|
return np.abs(f)
|
||||||
|
def __str__(self):
|
||||||
|
return '(+ve)'
|
||||||
|
|
||||||
|
class negative_exponent(transformation):
|
||||||
|
def __init__(self):
|
||||||
|
self.domain= 'negative'
|
||||||
|
def f(self,x):
|
||||||
|
return -np.exp(x)
|
||||||
|
def finv(self,x):
|
||||||
|
return np.log(-x)
|
||||||
|
def gradfactor(self,f):
|
||||||
|
return f
|
||||||
|
def initialize(self,f):
|
||||||
|
return -np.abs(f)
|
||||||
|
def __str__(self):
|
||||||
|
return '(-ve)'
|
||||||
|
|
||||||
|
class logistic(transformation):
|
||||||
|
def __init__(self,lower,upper):
|
||||||
|
self.domain= 'bounded'
|
||||||
|
assert lower < upper
|
||||||
|
self.lower, self.upper = float(lower), float(upper)
|
||||||
|
self.difference = self.upper - self.lower
|
||||||
|
def f(self,x):
|
||||||
|
return self.lower + self.difference/(1.+np.exp(-x))
|
||||||
|
def finv(self,f):
|
||||||
|
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))
|
||||||
|
def gradfactor(self,f):
|
||||||
|
return (f-self.lower)*(self.upper-f)/self.difference
|
||||||
|
def initialize(self,f):
|
||||||
|
return self.f(f*0.)
|
||||||
|
def __str__(self):
|
||||||
|
return '({},{})'.format(self.lower,self.upper)
|
||||||
|
|
||||||
|
|
@ -6,6 +6,8 @@ import pylab as pb
|
||||||
from matplotlib import pyplot as plt, pyplot
|
from matplotlib import pyplot as plt, pyplot
|
||||||
|
|
||||||
import GPy
|
import GPy
|
||||||
|
from GPy.models.Bayesian_GPLVM import Bayesian_GPLVM
|
||||||
|
from GPy.util.datasets import simulation_BGPLVM
|
||||||
|
|
||||||
default_seed = np.random.seed(123344)
|
default_seed = np.random.seed(123344)
|
||||||
|
|
||||||
|
|
@ -46,7 +48,7 @@ def GPLVM_oil_100(optimize=True):
|
||||||
data = GPy.util.datasets.oil_100()
|
data = GPy.util.datasets.oil_100()
|
||||||
|
|
||||||
# create simple GP model
|
# create simple GP model
|
||||||
kernel = GPy.kern.rbf(6, ARD = True) + GPy.kern.bias(6)
|
kernel = GPy.kern.rbf(6, ARD=True) + GPy.kern.bias(6)
|
||||||
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel)
|
m = GPy.models.GPLVM(data['X'], 6, kernel=kernel)
|
||||||
m.data_labels = data['Y'].argmax(axis=1)
|
m.data_labels = data['Y'].argmax(axis=1)
|
||||||
|
|
||||||
|
|
@ -79,11 +81,19 @@ def BGPLVM_oil(optimize=True, N=100, Q=10, M=15, max_f_eval=300):
|
||||||
else:
|
else:
|
||||||
m.ensure_default_constraints()
|
m.ensure_default_constraints()
|
||||||
|
|
||||||
# plot
|
y = m.likelihood.Y[0, :]
|
||||||
print(m)
|
fig, (latent_axes, hist_axes) = plt.subplots(1, 2)
|
||||||
m.plot_latent(labels=m.data_labels)
|
plt.sca(latent_axes)
|
||||||
pb.figure()
|
m.plot_latent()
|
||||||
pb.bar(np.arange(m.kern.D), 1. / m.input_sensitivity())
|
data_show = GPy.util.visualize.vector_show(y)
|
||||||
|
lvm_visualizer = GPy.util.visualize.lvm_dimselect(m.X[0, :], m, data_show, latent_axes=latent_axes, sense_axes=sense_axes)
|
||||||
|
raw_input('Press enter to finish')
|
||||||
|
plt.close('all')
|
||||||
|
# # plot
|
||||||
|
# print(m)
|
||||||
|
# m.plot_latent(labels=m.data_labels)
|
||||||
|
# pb.figure()
|
||||||
|
# pb.bar(np.arange(m.kern.D), 1. / m.input_sensitivity())
|
||||||
return m
|
return m
|
||||||
|
|
||||||
def oil_100():
|
def oil_100():
|
||||||
|
|
@ -99,6 +109,183 @@ def oil_100():
|
||||||
# m.plot_latent(labels=data['Y'].argmax(axis=1))
|
# m.plot_latent(labels=data['Y'].argmax(axis=1))
|
||||||
return m
|
return m
|
||||||
|
|
||||||
|
def _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim=False):
|
||||||
|
x = np.linspace(0, 4 * np.pi, N)[:, None]
|
||||||
|
s1 = np.vectorize(lambda x: np.sin(x))
|
||||||
|
s2 = np.vectorize(lambda x: np.cos(x))
|
||||||
|
s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
|
||||||
|
sS = np.vectorize(lambda x: np.sin(2 * x))
|
||||||
|
|
||||||
|
s1 = s1(x)
|
||||||
|
s2 = s2(x)
|
||||||
|
s3 = s3(x)
|
||||||
|
sS = sS(x)
|
||||||
|
|
||||||
|
# s1 -= s1.mean()
|
||||||
|
# s2 -= s2.mean()
|
||||||
|
# s3 -= s3.mean()
|
||||||
|
# sS -= sS.mean()
|
||||||
|
# s1 /= .5 * (np.abs(s1).max() - np.abs(s1).min())
|
||||||
|
# s2 /= .5 * (np.abs(s2).max() - np.abs(s2).min())
|
||||||
|
# s3 /= .5 * (np.abs(s3).max() - np.abs(s3).min())
|
||||||
|
# sS /= .5 * (np.abs(sS).max() - np.abs(sS).min())
|
||||||
|
|
||||||
|
S1 = np.hstack([s1, sS])
|
||||||
|
S2 = np.hstack([s2, sS])
|
||||||
|
S3 = np.hstack([s3, sS])
|
||||||
|
|
||||||
|
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
|
||||||
|
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
|
||||||
|
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
|
||||||
|
|
||||||
|
Y1 += .1 * np.random.randn(*Y1.shape)
|
||||||
|
Y2 += .1 * np.random.randn(*Y2.shape)
|
||||||
|
Y3 += .1 * np.random.randn(*Y3.shape)
|
||||||
|
|
||||||
|
Y1 -= Y1.mean(0)
|
||||||
|
Y2 -= Y2.mean(0)
|
||||||
|
Y3 -= Y3.mean(0)
|
||||||
|
Y1 /= Y1.std(0)
|
||||||
|
Y2 /= Y2.std(0)
|
||||||
|
Y3 /= Y3.std(0)
|
||||||
|
|
||||||
|
slist = [s1, s2, s3, sS]
|
||||||
|
Ylist = [Y1, Y2, Y3]
|
||||||
|
|
||||||
|
if plot_sim:
|
||||||
|
import pylab
|
||||||
|
import itertools
|
||||||
|
fig = pylab.figure("MRD Simulation", figsize=(8, 6))
|
||||||
|
fig.clf()
|
||||||
|
ax = fig.add_subplot(2, 1, 1)
|
||||||
|
labls = sorted(filter(lambda x: x.startswith("s"), locals()))
|
||||||
|
for S, lab in itertools.izip(slist, labls):
|
||||||
|
ax.plot(S, label=lab)
|
||||||
|
ax.legend()
|
||||||
|
for i, Y in enumerate(Ylist):
|
||||||
|
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
|
||||||
|
ax.imshow(Y)
|
||||||
|
ax.set_title("Y{}".format(i + 1))
|
||||||
|
pylab.draw()
|
||||||
|
pylab.tight_layout()
|
||||||
|
|
||||||
|
return slist, [S1, S2, S3], Ylist
|
||||||
|
|
||||||
|
def bgplvm_simulation_matlab_compare():
|
||||||
|
sim_data = simulation_BGPLVM()
|
||||||
|
Y = sim_data['Y']
|
||||||
|
S = sim_data['S']
|
||||||
|
mu = sim_data['mu']
|
||||||
|
M, [_, Q] = 20, mu.shape
|
||||||
|
|
||||||
|
from GPy.models import mrd
|
||||||
|
from GPy import kern
|
||||||
|
reload(mrd); reload(kern)
|
||||||
|
# k = kern.rbf(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
||||||
|
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2))
|
||||||
|
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k,
|
||||||
|
# X=mu,
|
||||||
|
# X_variance=S,
|
||||||
|
_debug=True)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.auto_scale_factor = True
|
||||||
|
m['noise'] = Y.var() / 100.
|
||||||
|
m['linear_variance'] = .01
|
||||||
|
|
||||||
|
# lscstr = 'X_variance'
|
||||||
|
# m[lscstr] = .01
|
||||||
|
# m.unconstrain(lscstr); m.constrain_fixed(lscstr, .1)
|
||||||
|
|
||||||
|
# cstr = 'white'
|
||||||
|
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
|
||||||
|
|
||||||
|
# cstr = 'noise'
|
||||||
|
# m.unconstrain(cstr); m.constrain_bounded(cstr, .01, 1.)
|
||||||
|
return m
|
||||||
|
|
||||||
|
def bgplvm_simulation(burnin='scg', plot_sim=False,
|
||||||
|
max_burnin=100, true_X=False,
|
||||||
|
do_opt=True,
|
||||||
|
max_f_eval=1000):
|
||||||
|
D1, D2, D3, N, M, Q = 10, 8, 8, 250, 10, 6
|
||||||
|
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
||||||
|
|
||||||
|
from GPy.models import mrd
|
||||||
|
from GPy import kern
|
||||||
|
reload(mrd); reload(kern)
|
||||||
|
|
||||||
|
|
||||||
|
Y = Ylist[0]
|
||||||
|
|
||||||
|
k = kern.linear(Q, ARD=True) + kern.bias(Q, np.exp(-2)) + kern.white(Q, np.exp(-2)) # + kern.bias(Q)
|
||||||
|
# k = kern.white(Q, .00001) + kern.bias(Q)
|
||||||
|
m = Bayesian_GPLVM(Y, Q, init="PCA", M=M, kernel=k, _debug=True)
|
||||||
|
# m.set('noise',)
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m['noise'] = Y.var() / 100.
|
||||||
|
m['linear_variance'] = .001
|
||||||
|
# m.auto_scale_factor = True
|
||||||
|
# m.scale_factor = 1.
|
||||||
|
|
||||||
|
|
||||||
|
if burnin:
|
||||||
|
print "initializing beta"
|
||||||
|
cstr = "noise"
|
||||||
|
m.unconstrain(cstr); m.constrain_fixed(cstr, Y.var() / 70.)
|
||||||
|
m.optimize(burnin, messages=1, max_f_eval=max_burnin)
|
||||||
|
|
||||||
|
print "releasing beta"
|
||||||
|
cstr = "noise"
|
||||||
|
m.unconstrain(cstr); m.constrain_positive(cstr)
|
||||||
|
|
||||||
|
if true_X:
|
||||||
|
true_X = np.hstack((slist[0], slist[3], 0. * np.ones((N, Q - 2))))
|
||||||
|
m.set('X_\d', true_X)
|
||||||
|
m.constrain_fixed("X_\d")
|
||||||
|
|
||||||
|
cstr = 'X_variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_fixed(cstr, .0001)
|
||||||
|
m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-7, .1)
|
||||||
|
|
||||||
|
# cstr = 'X_variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-3, 1.)
|
||||||
|
|
||||||
|
# m['X_var'] = np.ones(N * Q) * .5 + np.random.randn(N * Q) * .01
|
||||||
|
|
||||||
|
# cstr = "iip"
|
||||||
|
# m.unconstrain(cstr); m.constrain_fixed(cstr)
|
||||||
|
|
||||||
|
# cstr = 'variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 1.)
|
||||||
|
# cstr = 'X_\d'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, -10., 10.)
|
||||||
|
#
|
||||||
|
# cstr = 'noise'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-5, 1.)
|
||||||
|
#
|
||||||
|
# cstr = 'white'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-6, 1.)
|
||||||
|
#
|
||||||
|
# cstr = 'linear_variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
|
||||||
|
|
||||||
|
# cstr = 'variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-10, 10.)
|
||||||
|
|
||||||
|
# np.seterr(all='call')
|
||||||
|
# def ipdbonerr(errtype, flags):
|
||||||
|
# import ipdb; ipdb.set_trace()
|
||||||
|
# np.seterrcall(ipdbonerr)
|
||||||
|
|
||||||
|
if do_opt and burnin:
|
||||||
|
try:
|
||||||
|
m.optimize(burnin, messages=1, max_f_eval=max_f_eval)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
return m
|
||||||
|
return m
|
||||||
|
|
||||||
def mrd_simulation(plot_sim=False):
|
def mrd_simulation(plot_sim=False):
|
||||||
# num = 2
|
# num = 2
|
||||||
# ard1 = np.array([1., 1, 0, 0], dtype=float)
|
# ard1 = np.array([1., 1, 0, 0], dtype=float)
|
||||||
|
|
@ -117,32 +304,8 @@ def mrd_simulation(plot_sim=False):
|
||||||
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
|
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(X), D2).T
|
||||||
# Y2 -= Y2.mean(0)
|
# Y2 -= Y2.mean(0)
|
||||||
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
|
# make_params = lambda ard: np.hstack([[1], ard, [1, .3]])
|
||||||
|
D1, D2, D3, N, M, Q = 2000, 34, 8, 500, 3, 6
|
||||||
D1, D2, D3, N, M, Q = 50, 100, 8, 200, 2, 5
|
slist, Slist, Ylist = _simulate_sincos(D1, D2, D3, N, M, Q, plot_sim)
|
||||||
x = np.linspace(0, 8 * np.pi, N)[:, None]
|
|
||||||
|
|
||||||
s1 = np.vectorize(lambda x: np.sin(x))
|
|
||||||
s2 = np.vectorize(lambda x: np.cos(x))
|
|
||||||
s3 = np.vectorize(lambda x:-np.exp(-np.cos(2 * x)))
|
|
||||||
sS = np.vectorize(lambda x: x * np.sin(2 * x))
|
|
||||||
|
|
||||||
s1 = s1(x)
|
|
||||||
s2 = s2(x)
|
|
||||||
s3 = s3(x)
|
|
||||||
sS = sS(x)
|
|
||||||
|
|
||||||
s1 -= s1.mean()
|
|
||||||
s2 -= s2.mean()
|
|
||||||
s3 -= s3.mean()
|
|
||||||
sS -= sS.mean()
|
|
||||||
s1 /= np.abs(s1).max()
|
|
||||||
s2 /= np.abs(s2).max()
|
|
||||||
s3 /= np.abs(s3).max()
|
|
||||||
sS /= np.abs(sS).max()
|
|
||||||
|
|
||||||
S1 = np.hstack([s1, sS])
|
|
||||||
S2 = np.hstack([s2, sS])
|
|
||||||
S3 = np.hstack([s3, sS])
|
|
||||||
|
|
||||||
from GPy.models import mrd
|
from GPy.models import mrd
|
||||||
from GPy import kern
|
from GPy import kern
|
||||||
|
|
@ -153,73 +316,40 @@ def mrd_simulation(plot_sim=False):
|
||||||
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T
|
# Y2 = np.random.multivariate_normal(np.zeros(N), k.K(S2), D2).T
|
||||||
# Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T
|
# Y3 = np.random.multivariate_normal(np.zeros(N), k.K(S3), D3).T
|
||||||
|
|
||||||
Y1 = S1.dot(np.random.randn(S1.shape[1], D1))
|
Ylist = Ylist[0:2]
|
||||||
Y2 = S2.dot(np.random.randn(S2.shape[1], D2))
|
|
||||||
Y3 = S3.dot(np.random.randn(S3.shape[1], D3))
|
|
||||||
|
|
||||||
Y1 += .5 * np.random.randn(*Y1.shape)
|
|
||||||
Y2 += .5 * np.random.randn(*Y2.shape)
|
|
||||||
Y3 += .5 * np.random.randn(*Y3.shape)
|
|
||||||
|
|
||||||
# Y1 -= Y1.mean(0)
|
|
||||||
# Y2 -= Y2.mean(0)
|
|
||||||
# Y3 -= Y3.mean(0)
|
|
||||||
|
|
||||||
# Y1 /= Y1.std(0)
|
|
||||||
# Y2 /= Y2.std(0)
|
|
||||||
# Y3 /= Y3.std(0)
|
|
||||||
|
|
||||||
Slist = [s1, s2, sS]
|
|
||||||
Ylist = [Y1, Y2]
|
|
||||||
|
|
||||||
if plot_sim:
|
|
||||||
import pylab
|
|
||||||
import itertools
|
|
||||||
fig = pylab.figure("MRD Simulation", figsize=(8, 6))
|
|
||||||
fig.clf()
|
|
||||||
ax = fig.add_subplot(2, 1, 1)
|
|
||||||
labls = sorted(filter(lambda x: x.startswith("s"), locals()))
|
|
||||||
for S, lab in itertools.izip(Slist, labls):
|
|
||||||
ax.plot(x, S, label=lab)
|
|
||||||
ax.legend()
|
|
||||||
for i, Y in enumerate(Ylist):
|
|
||||||
ax = fig.add_subplot(2, len(Ylist), len(Ylist) + 1 + i)
|
|
||||||
ax.imshow(Y)
|
|
||||||
ax.set_title("Y{}".format(i + 1))
|
|
||||||
pylab.draw()
|
|
||||||
pylab.tight_layout()
|
|
||||||
|
|
||||||
# k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
|
# k = kern.rbf(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
|
||||||
k = kern.linear(Q, ARD=True) + kern.bias(Q) + kern.white(Q)
|
|
||||||
m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", _debug=False)
|
k = kern.linear(Q, ARD=True) + kern.bias(Q, .01) + kern.white(Q, .001)
|
||||||
m.ensure_default_constraints()
|
m = mrd.MRD(*Ylist, Q=Q, M=M, kernel=k, initx="concat", initz='permute', _debug=False)
|
||||||
|
|
||||||
for i, Y in enumerate(Ylist):
|
for i, Y in enumerate(Ylist):
|
||||||
m.set('{}_noise'.format(i + 1), Y.var() / 100.)
|
m.set('{}_noise'.format(i + 1), Y.var() / 100.)
|
||||||
|
|
||||||
# import ipdb;ipdb.set_trace()
|
m.ensure_default_constraints()
|
||||||
cstr = "variance"
|
m.auto_scale_factor = True
|
||||||
m.unconstrain(cstr); m.constrain_bounded(cstr, 1e-15, 1.)
|
|
||||||
|
# cstr = 'variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_bounded(cstr, 1e-12, 1.)
|
||||||
|
#
|
||||||
|
# cstr = 'linear_variance'
|
||||||
|
# m.unconstrain(cstr), m.constrain_positive(cstr)
|
||||||
|
|
||||||
# print "initializing beta"
|
# print "initializing beta"
|
||||||
# cstr = "noise"
|
# cstr = "noise"
|
||||||
# m.unconstrain(cstr); m.constrain_fixed(cstr)
|
# m.unconstrain(cstr); m.constrain_fixed(cstr)
|
||||||
# m.optimize('scg', messages=1, max_f_eval=200)
|
# m.optimize('scg', messages=1, max_f_eval=100)
|
||||||
#
|
|
||||||
# print "releasing beta"
|
# print "releasing beta"
|
||||||
# cstr = "noise"
|
# cstr = "noise"
|
||||||
# m.unconstrain(cstr); m.constrain_positive(cstr)
|
# m.unconstrain(cstr); m.constrain_positive(cstr)
|
||||||
|
|
||||||
|
np.seterr(all='call')
|
||||||
|
def ipdbonerr(errtype, flags):
|
||||||
|
import ipdb; ipdb.set_trace()
|
||||||
|
np.seterrcall(ipdbonerr)
|
||||||
|
|
||||||
m.auto_scale_factor = True
|
return m # , mtest
|
||||||
|
|
||||||
# fig = pyplot.figure("expected", figsize=(8, 3))
|
|
||||||
# ax = fig.add_subplot(121)
|
|
||||||
# ax.bar(np.arange(ard1.size) + .1, ard1)
|
|
||||||
# ax = fig.add_subplot(122)
|
|
||||||
# ax.bar(np.arange(ard2.size) + .1, ard2)
|
|
||||||
|
|
||||||
return m
|
|
||||||
|
|
||||||
def mrd_silhouette():
|
def mrd_silhouette():
|
||||||
|
|
||||||
|
|
@ -237,7 +367,7 @@ def brendan_faces():
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
data_show = GPy.util.visualize.image_show(y[None, :], dimensions=(20, 28), transpose=True, invert=False, scale=False)
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
plt.close('all')
|
plt.close('all')
|
||||||
|
|
||||||
|
|
@ -254,7 +384,29 @@ def stick():
|
||||||
ax = m.plot_latent()
|
ax = m.plot_latent()
|
||||||
y = m.likelihood.Y[0, :]
|
y = m.likelihood.Y[0, :]
|
||||||
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
data_show = GPy.util.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||||
lvm_visualizer = GPy.util.visualize.lvm(m, data_show, ax)
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
|
||||||
|
raw_input('Press enter to finish')
|
||||||
|
plt.close('all')
|
||||||
|
|
||||||
|
return m
|
||||||
|
|
||||||
|
def cmu_mocap(subject='35', motion=['01'], in_place=True):
|
||||||
|
|
||||||
|
data = GPy.util.datasets.cmu_mocap(subject, motion)
|
||||||
|
Y = data['Y']
|
||||||
|
if in_place:
|
||||||
|
# Make figure move in place.
|
||||||
|
data['Y'][:, 0:3] = 0.0
|
||||||
|
m = GPy.models.GPLVM(data['Y'], 2, normalize_Y=True)
|
||||||
|
|
||||||
|
# optimize
|
||||||
|
m.ensure_default_constraints()
|
||||||
|
m.optimize(messages=1, max_f_eval=10000)
|
||||||
|
|
||||||
|
ax = m.plot_latent()
|
||||||
|
y = m.likelihood.Y[0, :]
|
||||||
|
data_show = GPy.util.visualize.skeleton_show(y[None, :], data['skel'])
|
||||||
|
lvm_visualizer = GPy.util.visualize.lvm(m.X[0, :], m, data_show, ax)
|
||||||
raw_input('Press enter to finish')
|
raw_input('Press enter to finish')
|
||||||
plt.close('all')
|
plt.close('all')
|
||||||
|
|
||||||
|
|
|
||||||
275
GPy/inference/conjugate_gradient_descent.py
Normal file
275
GPy/inference/conjugate_gradient_descent.py
Normal file
|
|
@ -0,0 +1,275 @@
|
||||||
|
'''
|
||||||
|
Created on 24 Apr 2013
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
from GPy.inference.gradient_descent_update_rules import FletcherReeves
|
||||||
|
from Queue import Empty
|
||||||
|
from multiprocessing import Value
|
||||||
|
from multiprocessing.queues import Queue
|
||||||
|
from multiprocessing.synchronize import Event
|
||||||
|
from scipy.optimize.linesearch import line_search_wolfe1, line_search_wolfe2
|
||||||
|
from threading import Thread
|
||||||
|
import numpy
|
||||||
|
import sys
|
||||||
|
|
||||||
|
RUNNING = "running"
|
||||||
|
CONVERGED = "converged"
|
||||||
|
MAXITER = "maximum number of iterations reached"
|
||||||
|
MAX_F_EVAL = "maximum number of function calls reached"
|
||||||
|
LINE_SEARCH = "line search failed"
|
||||||
|
KBINTERRUPT = "interrupted"
|
||||||
|
|
||||||
|
class _Async_Optimization(Thread):
|
||||||
|
|
||||||
|
def __init__(self, f, df, x0, update_rule, runsignal, SENTINEL,
|
||||||
|
report_every=10, messages=0, maxiter=5e3, max_f_eval=15e3,
|
||||||
|
gtol=1e-6, outqueue=None, *args, **kw):
|
||||||
|
"""
|
||||||
|
Helper Process class for async optimization
|
||||||
|
|
||||||
|
f_call and df_call are Multiprocessing Values, for synchronized assignment
|
||||||
|
"""
|
||||||
|
self.f_call = Value('i', 0)
|
||||||
|
self.df_call = Value('i', 0)
|
||||||
|
self.f = self.f_wrapper(f, self.f_call)
|
||||||
|
self.df = self.f_wrapper(df, self.df_call)
|
||||||
|
self.x0 = x0
|
||||||
|
self.update_rule = update_rule
|
||||||
|
self.report_every = report_every
|
||||||
|
self.messages = messages
|
||||||
|
self.maxiter = maxiter
|
||||||
|
self.max_f_eval = max_f_eval
|
||||||
|
self.gtol = gtol
|
||||||
|
self.SENTINEL = SENTINEL
|
||||||
|
self.runsignal = runsignal
|
||||||
|
# self.parent = parent
|
||||||
|
# self.result = None
|
||||||
|
self.outq = outqueue
|
||||||
|
super(_Async_Optimization, self).__init__(target=self.run,
|
||||||
|
name="CG Optimization",
|
||||||
|
*args, **kw)
|
||||||
|
|
||||||
|
# def __enter__(self):
|
||||||
|
# return self
|
||||||
|
#
|
||||||
|
# def __exit__(self, type, value, traceback):
|
||||||
|
# return isinstance(value, TypeError)
|
||||||
|
|
||||||
|
def f_wrapper(self, f, counter):
|
||||||
|
def f_w(*a, **kw):
|
||||||
|
counter.value += 1
|
||||||
|
return f(*a, **kw)
|
||||||
|
return f_w
|
||||||
|
|
||||||
|
def callback(self, *a):
|
||||||
|
if self.outq is not None:
|
||||||
|
self.outq.put(a)
|
||||||
|
# self.parent and self.parent.callback(*a, **kw)
|
||||||
|
pass
|
||||||
|
# print "callback done"
|
||||||
|
|
||||||
|
def callback_return(self, *a):
|
||||||
|
self.callback(*a)
|
||||||
|
self.callback(self.SENTINEL)
|
||||||
|
self.runsignal.clear()
|
||||||
|
|
||||||
|
def run(self, *args, **kwargs):
|
||||||
|
raise NotImplementedError("Overwrite this with optimization (for async use)")
|
||||||
|
pass
|
||||||
|
|
||||||
|
class _CGDAsync(_Async_Optimization):
|
||||||
|
|
||||||
|
def reset(self, xi, *a, **kw):
|
||||||
|
gi = -self.df(xi, *a, **kw)
|
||||||
|
si = gi
|
||||||
|
ur = self.update_rule(gi)
|
||||||
|
return gi, ur, si
|
||||||
|
|
||||||
|
def run(self, *a, **kw):
|
||||||
|
status = RUNNING
|
||||||
|
|
||||||
|
fi = self.f(self.x0)
|
||||||
|
fi_old = fi + 5000
|
||||||
|
|
||||||
|
gi, ur, si = self.reset(self.x0, *a, **kw)
|
||||||
|
xi = self.x0
|
||||||
|
xi_old = numpy.nan
|
||||||
|
it = 0
|
||||||
|
|
||||||
|
while it < self.maxiter:
|
||||||
|
if not self.runsignal.is_set():
|
||||||
|
break
|
||||||
|
|
||||||
|
if self.f_call.value > self.max_f_eval:
|
||||||
|
status = MAX_F_EVAL
|
||||||
|
|
||||||
|
gi = -self.df(xi, *a, **kw)
|
||||||
|
if numpy.dot(gi.T, gi) < self.gtol:
|
||||||
|
status = CONVERGED
|
||||||
|
break
|
||||||
|
if numpy.isnan(numpy.dot(gi.T, gi)):
|
||||||
|
if numpy.any(numpy.isnan(xi_old)):
|
||||||
|
status = CONVERGED
|
||||||
|
break
|
||||||
|
self.reset(xi_old)
|
||||||
|
|
||||||
|
gammai = ur(gi)
|
||||||
|
if gammai < 1e-6 or it % xi.shape[0] == 0:
|
||||||
|
gi, ur, si = self.reset(xi, *a, **kw)
|
||||||
|
si = gi + gammai * si
|
||||||
|
alphai, _, _, fi2, fi_old2, gfi = line_search_wolfe1(self.f,
|
||||||
|
self.df,
|
||||||
|
xi,
|
||||||
|
si, gi,
|
||||||
|
fi, fi_old)
|
||||||
|
if alphai is not None and fi2 < fi:
|
||||||
|
fi, fi_old = fi2, fi_old2
|
||||||
|
else:
|
||||||
|
alphai, _, _, fi, fi_old, gfi = \
|
||||||
|
line_search_wolfe2(self.f, self.df,
|
||||||
|
xi, si, gi,
|
||||||
|
fi, fi_old)
|
||||||
|
if alphai is None:
|
||||||
|
# This line search also failed to find a better solution.
|
||||||
|
status = LINE_SEARCH
|
||||||
|
break
|
||||||
|
if gfi is not None:
|
||||||
|
gi = gfi
|
||||||
|
|
||||||
|
if numpy.isnan(fi) or fi_old < fi:
|
||||||
|
gi, ur, si = self.reset(xi, *a, **kw)
|
||||||
|
else:
|
||||||
|
xi += numpy.dot(alphai, si)
|
||||||
|
if self.messages:
|
||||||
|
sys.stdout.write("\r")
|
||||||
|
sys.stdout.flush()
|
||||||
|
sys.stdout.write("iteration: {0:> 6g} f:{1:> 12e} |g|:{2:> 12e}".format(it, fi, numpy.dot(gi.T, gi)))
|
||||||
|
|
||||||
|
if it % self.report_every == 0:
|
||||||
|
self.callback(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||||
|
it += 1
|
||||||
|
else:
|
||||||
|
status = MAXITER
|
||||||
|
self.callback_return(xi, fi, gi, it, self.f_call.value, self.df_call.value, status)
|
||||||
|
self.result = [xi, fi, gi, it, self.f_call.value, self.df_call.value, status]
|
||||||
|
|
||||||
|
class Async_Optimize(object):
|
||||||
|
callback = lambda *x: None
|
||||||
|
runsignal = Event()
|
||||||
|
SENTINEL = "SENTINEL"
|
||||||
|
|
||||||
|
def async_callback_collect(self, q):
|
||||||
|
while self.runsignal.is_set():
|
||||||
|
try:
|
||||||
|
for ret in iter(lambda: q.get(timeout=1), self.SENTINEL):
|
||||||
|
self.callback(*ret)
|
||||||
|
except Empty:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
|
||||||
|
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||||
|
report_every=10, *args, **kwargs):
|
||||||
|
self.runsignal.set()
|
||||||
|
c = None
|
||||||
|
outqueue = None
|
||||||
|
if callback:
|
||||||
|
outqueue = Queue()
|
||||||
|
self.callback = callback
|
||||||
|
c = Thread(target=self.async_callback_collect, args=(outqueue,))
|
||||||
|
c.start()
|
||||||
|
p = _CGDAsync(f, df, x0, update_rule, self.runsignal, self.SENTINEL,
|
||||||
|
report_every=report_every, messages=messages, maxiter=maxiter,
|
||||||
|
max_f_eval=max_f_eval, gtol=gtol, outqueue=outqueue, *args, **kwargs)
|
||||||
|
p.start()
|
||||||
|
return p, c
|
||||||
|
|
||||||
|
def opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
||||||
|
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||||
|
report_every=10, *args, **kwargs):
|
||||||
|
p, c = self.opt_async(f, df, x0, callback, update_rule, messages,
|
||||||
|
maxiter, max_f_eval, gtol,
|
||||||
|
report_every, *args, **kwargs)
|
||||||
|
while self.runsignal.is_set():
|
||||||
|
try:
|
||||||
|
p.join(1)
|
||||||
|
# c.join(1)
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
# print "^C"
|
||||||
|
self.runsignal.clear()
|
||||||
|
p.join()
|
||||||
|
c.join()
|
||||||
|
if c and c.is_alive():
|
||||||
|
print "WARNING: callback still running, optimisation done!"
|
||||||
|
return p.result
|
||||||
|
|
||||||
|
class CGD(Async_Optimize):
|
||||||
|
'''
|
||||||
|
Conjugate gradient descent algorithm to minimize
|
||||||
|
function f with gradients df, starting at x0
|
||||||
|
with update rule update_rule
|
||||||
|
|
||||||
|
if df returns tuple (grad, natgrad) it will optimize according
|
||||||
|
to natural gradient rules
|
||||||
|
'''
|
||||||
|
opt_name = "Conjugate Gradient Descent"
|
||||||
|
|
||||||
|
def opt_async(self, *a, **kw):
|
||||||
|
"""
|
||||||
|
opt_async(self, f, df, x0, callback, update_rule=FletcherReeves,
|
||||||
|
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||||
|
report_every=10, *args, **kwargs)
|
||||||
|
|
||||||
|
callback gets called every `report_every` iterations
|
||||||
|
|
||||||
|
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||||
|
|
||||||
|
if df returns tuple (grad, natgrad) it will optimize according
|
||||||
|
to natural gradient rules
|
||||||
|
|
||||||
|
f, and df will be called with
|
||||||
|
|
||||||
|
f(xi, *args, **kwargs)
|
||||||
|
df(xi, *args, **kwargs)
|
||||||
|
|
||||||
|
**returns**
|
||||||
|
-----------
|
||||||
|
|
||||||
|
Started `Process` object, optimizing asynchronously
|
||||||
|
|
||||||
|
**calls**
|
||||||
|
---------
|
||||||
|
|
||||||
|
callback(x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message)
|
||||||
|
|
||||||
|
at end of optimization!
|
||||||
|
"""
|
||||||
|
return super(CGD, self).opt_async(*a, **kw)
|
||||||
|
|
||||||
|
def opt(self, *a, **kw):
|
||||||
|
"""
|
||||||
|
opt(self, f, df, x0, callback=None, update_rule=FletcherReeves,
|
||||||
|
messages=0, maxiter=5e3, max_f_eval=15e3, gtol=1e-6,
|
||||||
|
report_every=10, *args, **kwargs)
|
||||||
|
|
||||||
|
Minimize f, calling callback every `report_every` iterations with following syntax:
|
||||||
|
|
||||||
|
callback(xi, fi, gi, iteration, function_calls, gradient_calls, status_message)
|
||||||
|
|
||||||
|
if df returns tuple (grad, natgrad) it will optimize according
|
||||||
|
to natural gradient rules
|
||||||
|
|
||||||
|
f, and df will be called with
|
||||||
|
|
||||||
|
f(xi, *args, **kwargs)
|
||||||
|
df(xi, *args, **kwargs)
|
||||||
|
|
||||||
|
**returns**
|
||||||
|
---------
|
||||||
|
|
||||||
|
x_opt, f_opt, g_opt, iteration, function_calls, gradient_calls, status_message
|
||||||
|
|
||||||
|
at end of optimization
|
||||||
|
"""
|
||||||
|
return super(CGD, self).opt(*a, **kw)
|
||||||
|
|
||||||
43
GPy/inference/gradient_descent_update_rules.py
Normal file
43
GPy/inference/gradient_descent_update_rules.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
'''
|
||||||
|
Created on 24 Apr 2013
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
class GDUpdateRule():
|
||||||
|
_gradnat = None
|
||||||
|
_gradnatold = None
|
||||||
|
def __init__(self, initgrad, initgradnat=None):
|
||||||
|
self.grad = initgrad
|
||||||
|
if initgradnat:
|
||||||
|
self.gradnat = initgradnat
|
||||||
|
else:
|
||||||
|
self.gradnat = initgrad
|
||||||
|
# self.grad, self.gradnat
|
||||||
|
def _gamma(self):
|
||||||
|
raise NotImplemented("""Implement gamma update rule here,
|
||||||
|
you can use self.grad and self.gradold for parameters, as well as
|
||||||
|
self.gradnat and self.gradnatold for natural gradients.""")
|
||||||
|
def __call__(self, grad, gradnat=None, si=None, *args, **kw):
|
||||||
|
"""
|
||||||
|
Return gamma for given gradients and optional natural gradients
|
||||||
|
"""
|
||||||
|
if not gradnat:
|
||||||
|
gradnat = grad
|
||||||
|
self.gradold = self.grad
|
||||||
|
self.gradnatold = self.gradnat
|
||||||
|
self.grad = grad
|
||||||
|
self.gradnat = gradnat
|
||||||
|
self.si = si
|
||||||
|
return self._gamma(*args, **kw)
|
||||||
|
|
||||||
|
class FletcherReeves(GDUpdateRule):
|
||||||
|
'''
|
||||||
|
Fletcher Reeves update rule for gamma
|
||||||
|
'''
|
||||||
|
def _gamma(self, *a, **kw):
|
||||||
|
tmp = numpy.dot(self.grad.T, self.gradnat)
|
||||||
|
if tmp:
|
||||||
|
return tmp / numpy.dot(self.gradold.T, self.gradnatold)
|
||||||
|
return tmp
|
||||||
146
GPy/inference/natural_gradient_scg.py
Normal file
146
GPy/inference/natural_gradient_scg.py
Normal file
|
|
@ -0,0 +1,146 @@
|
||||||
|
#Copyright I. Nabney, N.Lawrence and James Hensman (1996 - 2012)
|
||||||
|
|
||||||
|
#Scaled Conjuagte Gradients, originally in Matlab as part of the Netlab toolbox by I. Nabney, converted to python N. Lawrence and given a pythonic interface by James Hensman
|
||||||
|
|
||||||
|
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT
|
||||||
|
# HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||||
|
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
|
||||||
|
# NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||||
|
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
||||||
|
# REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
|
||||||
|
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
# HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
# LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
||||||
|
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
# POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def SCG(f, gradf, x, optargs=(), maxiters=500, max_f_eval=500, display=True, xtol=1e-6, ftol=1e-6):
|
||||||
|
"""
|
||||||
|
Optimisation through Scaled Conjugate Gradients (SCG)
|
||||||
|
|
||||||
|
f: the objective function
|
||||||
|
gradf : the gradient function (should return a 1D np.ndarray)
|
||||||
|
x : the initial condition
|
||||||
|
|
||||||
|
Returns
|
||||||
|
x the optimal value for x
|
||||||
|
flog : a list of all the objective values
|
||||||
|
|
||||||
|
"""
|
||||||
|
|
||||||
|
sigma0 = 1.0e-4
|
||||||
|
fold = f(x, *optargs) # Initial function value.
|
||||||
|
function_eval = 1
|
||||||
|
fnow = fold
|
||||||
|
gradnew = gradf(x, *optargs) # Initial gradient.
|
||||||
|
gradold = gradnew.copy()
|
||||||
|
d = -gradnew # Initial search direction.
|
||||||
|
success = True # Force calculation of directional derivs.
|
||||||
|
nsuccess = 0 # nsuccess counts number of successes.
|
||||||
|
beta = 1.0 # Initial scale parameter.
|
||||||
|
betamin = 1.0e-15 # Lower bound on scale.
|
||||||
|
betamax = 1.0e100 # Upper bound on scale.
|
||||||
|
status = "Not converged"
|
||||||
|
|
||||||
|
flog = [fold]
|
||||||
|
|
||||||
|
iteration = 0
|
||||||
|
|
||||||
|
# Main optimization loop.
|
||||||
|
while iteration < maxiters:
|
||||||
|
|
||||||
|
# Calculate first and second directional derivatives.
|
||||||
|
if success:
|
||||||
|
mu = np.dot(d, gradnew)
|
||||||
|
if mu >= 0:
|
||||||
|
d = -gradnew
|
||||||
|
mu = np.dot(d, gradnew)
|
||||||
|
kappa = np.dot(d, d)
|
||||||
|
sigma = sigma0/np.sqrt(kappa)
|
||||||
|
xplus = x + sigma*d
|
||||||
|
gplus = gradf(xplus, *optargs)
|
||||||
|
theta = np.dot(d, (gplus - gradnew))/sigma
|
||||||
|
|
||||||
|
# Increase effective curvature and evaluate step size alpha.
|
||||||
|
delta = theta + beta*kappa
|
||||||
|
if delta <= 0:
|
||||||
|
delta = beta*kappa
|
||||||
|
beta = beta - theta/kappa
|
||||||
|
|
||||||
|
alpha = - mu/delta
|
||||||
|
|
||||||
|
# Calculate the comparison ratio.
|
||||||
|
xnew = x + alpha*d
|
||||||
|
fnew = f(xnew, *optargs)
|
||||||
|
function_eval += 1
|
||||||
|
|
||||||
|
if function_eval >= max_f_eval:
|
||||||
|
status = "Maximum number of function evaluations exceeded"
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
|
Delta = 2.*(fnew - fold)/(alpha*mu)
|
||||||
|
if Delta >= 0.:
|
||||||
|
success = True
|
||||||
|
nsuccess += 1
|
||||||
|
x = xnew
|
||||||
|
fnow = fnew
|
||||||
|
else:
|
||||||
|
success = False
|
||||||
|
fnow = fold
|
||||||
|
|
||||||
|
# Store relevant variables
|
||||||
|
flog.append(fnow) # Current function value
|
||||||
|
|
||||||
|
iteration += 1
|
||||||
|
if display:
|
||||||
|
print '\r',
|
||||||
|
print 'Iteration: {0:>5g} Objective:{1:> 12e} Scale:{2:> 12e}'.format(iteration, fnow, beta),
|
||||||
|
# print 'Iteration:', iteration, ' Objective:', fnow, ' Scale:', beta, '\r',
|
||||||
|
sys.stdout.flush()
|
||||||
|
|
||||||
|
if success:
|
||||||
|
# Test for termination
|
||||||
|
if (np.max(np.abs(alpha*d)) < xtol) or (np.abs(fnew-fold) < ftol):
|
||||||
|
status='converged'
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Update variables for new position
|
||||||
|
fold = fnew
|
||||||
|
gradold = gradnew
|
||||||
|
gradnew = gradf(x, *optargs)
|
||||||
|
# If the gradient is zero then we are done.
|
||||||
|
if np.dot(gradnew,gradnew) == 0:
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
||||||
|
# Adjust beta according to comparison ratio.
|
||||||
|
if Delta < 0.25:
|
||||||
|
beta = min(4.0*beta, betamax)
|
||||||
|
if Delta > 0.75:
|
||||||
|
beta = max(0.5*beta, betamin)
|
||||||
|
|
||||||
|
# Update search direction using Polak-Ribiere formula, or re-start
|
||||||
|
# in direction of negative gradient after nparams steps.
|
||||||
|
if nsuccess == x.size:
|
||||||
|
d = -gradnew
|
||||||
|
nsuccess = 0
|
||||||
|
elif success:
|
||||||
|
gamma = np.dot(gradold - gradnew,gradnew)/(mu)
|
||||||
|
d = gamma*d - gradnew
|
||||||
|
|
||||||
|
# If we get here, then we haven't terminated in the given number of
|
||||||
|
# iterations.
|
||||||
|
status = "maxiter exceeded"
|
||||||
|
|
||||||
|
return x, flog, function_eval, status
|
||||||
|
|
@ -36,12 +36,16 @@ class Brownian(kernpart):
|
||||||
return ['variance']
|
return ['variance']
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self,X,X2,target):
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
target += self.variance*np.fmin(X,X2.T)
|
target += self.variance*np.fmin(X,X2.T)
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
target += self.variance*X.flatten()
|
target += self.variance*X.flatten()
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
|
if X2 is None:
|
||||||
|
X2 = X
|
||||||
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
target += np.sum(np.fmin(X,X2.T)*dL_dK)
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||||
|
|
|
||||||
|
|
@ -2,5 +2,9 @@
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
|
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
|
||||||
|
try:
|
||||||
|
from constructors import rbf_sympy, sympykern # these depend on sympy
|
||||||
|
except:
|
||||||
|
pass
|
||||||
from kern import kern
|
from kern import kern
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
|
||||||
from coregionalise import coregionalise as coregionalise_part
|
from coregionalise import coregionalise as coregionalise_part
|
||||||
from rational_quadratic import rational_quadratic as rational_quadraticpart
|
from rational_quadratic import rational_quadratic as rational_quadraticpart
|
||||||
from rbfcos import rbfcos as rbfcospart
|
from rbfcos import rbfcos as rbfcospart
|
||||||
|
from independent_outputs import independent_outputs as independent_output_part
|
||||||
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
||||||
#using meta-classes to make the objects construct properly wthout them.
|
#using meta-classes to make the objects construct properly wthout them.
|
||||||
|
|
||||||
|
|
@ -165,34 +166,40 @@ def Brownian(D,variance=1.):
|
||||||
part = Brownianpart(D,variance)
|
part = Brownianpart(D,variance)
|
||||||
return kern(D, [part])
|
return kern(D, [part])
|
||||||
|
|
||||||
import sympy as sp
|
try:
|
||||||
from sympykern import spkern
|
import sympy as sp
|
||||||
from sympy.parsing.sympy_parser import parse_expr
|
from sympykern import spkern
|
||||||
|
from sympy.parsing.sympy_parser import parse_expr
|
||||||
|
sympy_available = True
|
||||||
|
except ImportError:
|
||||||
|
sympy_available = False
|
||||||
|
|
||||||
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
|
if sympy_available:
|
||||||
"""
|
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
|
||||||
Radial Basis Function covariance.
|
"""
|
||||||
"""
|
Radial Basis Function covariance.
|
||||||
X = [sp.var('x%i'%i) for i in range(D)]
|
"""
|
||||||
Z = [sp.var('z%i'%i) for i in range(D)]
|
X = [sp.var('x%i'%i) for i in range(D)]
|
||||||
rbf_variance = sp.var('rbf_variance',positive=True)
|
Z = [sp.var('z%i'%i) for i in range(D)]
|
||||||
if ARD:
|
rbf_variance = sp.var('rbf_variance',positive=True)
|
||||||
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
|
if ARD:
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
|
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
|
||||||
dist = parse_expr(dist_string)
|
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
|
||||||
f = rbf_variance*sp.exp(-dist/2.)
|
dist = parse_expr(dist_string)
|
||||||
else:
|
f = rbf_variance*sp.exp(-dist/2.)
|
||||||
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
else:
|
||||||
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
|
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
||||||
dist = parse_expr(dist_string)
|
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
|
||||||
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
dist = parse_expr(dist_string)
|
||||||
return kern(D,[spkern(D,f)])
|
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
||||||
|
return kern(D,[spkern(D,f)])
|
||||||
|
|
||||||
def sympykern(D,k):
|
def sympykern(D,k):
|
||||||
"""
|
"""
|
||||||
A kernel from a symbolic sympy representation
|
A kernel from a symbolic sympy representation
|
||||||
"""
|
"""
|
||||||
return kern(D,[spkern(D,k)])
|
return kern(D,[spkern(D,k)])
|
||||||
|
del sympy_available
|
||||||
|
|
||||||
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
|
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
|
||||||
"""
|
"""
|
||||||
|
|
@ -318,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
|
||||||
"""
|
"""
|
||||||
part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
|
part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
|
||||||
return kern(D,[part])
|
return kern(D,[part])
|
||||||
|
|
||||||
|
def independent_outputs(k):
|
||||||
|
"""
|
||||||
|
Construct a kernel with independent outputs from an existing kernel
|
||||||
|
"""
|
||||||
|
for sl in k.input_slices:
|
||||||
|
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
||||||
|
parts = [independent_output_part(p) for p in k.parts]
|
||||||
|
return kern(k.D+1,parts)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,10 +5,11 @@ from kernpart import kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from GPy.util.linalg import mdot, pdinv
|
from GPy.util.linalg import mdot, pdinv
|
||||||
import pdb
|
import pdb
|
||||||
|
from scipy import weave
|
||||||
|
|
||||||
class coregionalise(kernpart):
|
class coregionalise(kernpart):
|
||||||
"""
|
"""
|
||||||
Kernel for Intrisec Corregionalization Models
|
Kernel for Intrinsic Corregionalization Models
|
||||||
"""
|
"""
|
||||||
def __init__(self,Nout,R=1, W=None, kappa=None):
|
def __init__(self,Nout,R=1, W=None, kappa=None):
|
||||||
self.D = 1
|
self.D = 1
|
||||||
|
|
@ -42,19 +43,70 @@ class coregionalise(kernpart):
|
||||||
|
|
||||||
def K(self,index,index2,target):
|
def K(self,index,index2,target):
|
||||||
index = np.asarray(index,dtype=np.int)
|
index = np.asarray(index,dtype=np.int)
|
||||||
|
|
||||||
|
#here's the old code (numpy)
|
||||||
|
#if index2 is None:
|
||||||
|
#index2 = index
|
||||||
|
#else:
|
||||||
|
#index2 = np.asarray(index2,dtype=np.int)
|
||||||
|
#false_target = target.copy()
|
||||||
|
#ii,jj = np.meshgrid(index,index2)
|
||||||
|
#ii,jj = ii.T, jj.T
|
||||||
|
#false_target += self.B[ii,jj]
|
||||||
|
|
||||||
if index2 is None:
|
if index2 is None:
|
||||||
index2 = index
|
code="""
|
||||||
|
for(int i=0;i<N; i++){
|
||||||
|
target[i+i*N] += B[index[i]+Nout*index[i]];
|
||||||
|
for(int j=0; j<i; j++){
|
||||||
|
target[j+i*N] += B[index[i]+Nout*index[j]];
|
||||||
|
target[i+j*N] += target[j+i*N];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
N,B,Nout = index.size, self.B, self.Nout
|
||||||
|
weave.inline(code,['target','index','N','B','Nout'])
|
||||||
else:
|
else:
|
||||||
index2 = np.asarray(index2,dtype=np.int)
|
index2 = np.asarray(index2,dtype=np.int)
|
||||||
ii,jj = np.meshgrid(index,index2)
|
code="""
|
||||||
ii,jj = ii.T, jj.T
|
for(int i=0;i<M; i++){
|
||||||
target += self.B[ii,jj]
|
for(int j=0; j<N; j++){
|
||||||
|
target[i+j*M] += B[Nout*index[j]+index2[i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
N,M,B,Nout = index.size,index2.size, self.B, self.Nout
|
||||||
|
weave.inline(code,['target','index','index2','N','M','B','Nout'])
|
||||||
|
|
||||||
|
|
||||||
def Kdiag(self,index,target):
|
def Kdiag(self,index,target):
|
||||||
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,index,index2,target):
|
def dK_dtheta(self,dL_dK,index,index2,target):
|
||||||
index = np.asarray(index,dtype=np.int)
|
index = np.asarray(index,dtype=np.int)
|
||||||
|
dL_dK_small = np.zeros_like(self.B)
|
||||||
|
if index2 is None:
|
||||||
|
index2 = index
|
||||||
|
else:
|
||||||
|
index2 = np.asarray(index2,dtype=np.int)
|
||||||
|
|
||||||
|
code="""
|
||||||
|
for(int i=0; i<M; i++){
|
||||||
|
for(int j=0; j<N; j++){
|
||||||
|
dL_dK_small[index[j] + Nout*index2[i]] += dL_dK[i+j*M];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
N, M, Nout = index.size, index2.size, self.Nout
|
||||||
|
weave.inline(code, ['N','M','Nout','dL_dK','dL_dK_small','index','index2'])
|
||||||
|
|
||||||
|
dkappa = np.diag(dL_dK_small)
|
||||||
|
dL_dK_small += dL_dK_small.T
|
||||||
|
dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
|
||||||
|
|
||||||
|
target += np.hstack([dW.flatten(),dkappa])
|
||||||
|
|
||||||
|
def dK_dtheta_old(self,dL_dK,index,index2,target):
|
||||||
if index2 is None:
|
if index2 is None:
|
||||||
index2 = index
|
index2 = index
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
97
GPy/kern/independent_outputs.py
Normal file
97
GPy/kern/independent_outputs.py
Normal file
|
|
@ -0,0 +1,97 @@
|
||||||
|
# Copyright (c) 2012, James Hesnsman
|
||||||
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
from kernpart import kernpart
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
def index_to_slices(index):
|
||||||
|
"""
|
||||||
|
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||||
|
|
||||||
|
e.g.
|
||||||
|
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
||||||
|
returns
|
||||||
|
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
|
||||||
|
|
||||||
|
or, a more complicated example
|
||||||
|
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
|
||||||
|
returns
|
||||||
|
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
#contruct the return structure
|
||||||
|
ind = np.asarray(index,dtype=np.int64)
|
||||||
|
ret = [[] for i in range(ind.max()+1)]
|
||||||
|
|
||||||
|
#find the switchpoints
|
||||||
|
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
|
||||||
|
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
|
||||||
|
|
||||||
|
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
||||||
|
return ret
|
||||||
|
|
||||||
|
class independent_outputs(kernpart):
|
||||||
|
"""
|
||||||
|
A kernel part shich can reopresent several independent functions.
|
||||||
|
this kernel 'switches off' parts of the matrix where the output indexes are different.
|
||||||
|
|
||||||
|
The index of the functions is given by the last column in the input X
|
||||||
|
the rest of the columns of X are passed to the kernel for computation (in blocks).
|
||||||
|
|
||||||
|
"""
|
||||||
|
def __init__(self,k):
|
||||||
|
self.D = k.D + 1
|
||||||
|
self.Nparam = k.Nparam
|
||||||
|
self.name = 'iops('+ k.name + ')'
|
||||||
|
self.k = k
|
||||||
|
|
||||||
|
def _get_params(self):
|
||||||
|
return self.k._get_params()
|
||||||
|
|
||||||
|
def _set_params(self,x):
|
||||||
|
self.k._set_params(x)
|
||||||
|
self.params = x
|
||||||
|
|
||||||
|
def _get_param_names(self):
|
||||||
|
return self.k._get_param_names()
|
||||||
|
|
||||||
|
def K(self,X,X2,target):
|
||||||
|
#Sort out the slices from the input data
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
|
||||||
|
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
def Kdiag(self,X,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||||
|
|
||||||
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
|
||||||
|
def dK_dX(self,dL_dK,X,X2,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
if X2 is None:
|
||||||
|
X2,slices2 = X,slices
|
||||||
|
else:
|
||||||
|
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||||
|
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||||
|
|
||||||
|
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
|
||||||
|
|
||||||
|
|
||||||
|
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||||
|
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||||
|
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
|
||||||
571
GPy/kern/kern.py
571
GPy/kern/kern.py
|
|
@ -9,19 +9,14 @@ from kernpart import kernpart
|
||||||
import itertools
|
import itertools
|
||||||
from prod_orthogonal import prod_orthogonal
|
from prod_orthogonal import prod_orthogonal
|
||||||
from prod import prod
|
from prod import prod
|
||||||
|
from ..util.linalg import symmetrify
|
||||||
|
|
||||||
class kern(parameterised):
|
class kern(parameterised):
|
||||||
def __init__(self,D,parts=[], input_slices=None):
|
def __init__(self, D, parts=[], input_slices=None):
|
||||||
"""
|
"""
|
||||||
This kernel does 'compound' structures.
|
This is the main kernel class for GPy. It handles multiple (additive) kernel functions, and keeps track of variaous things like which parameters live where.
|
||||||
|
|
||||||
The compund structure enables many features of GPy, including
|
The technical code for kernels is divided into _parts_ (see e.g. rbf.py). This obnject contains a list of parts, which are computed additively. For multiplication, special _prod_ parts are used.
|
||||||
- Hierarchical models
|
|
||||||
- Correleated output models
|
|
||||||
- multi-view learning
|
|
||||||
|
|
||||||
Hadamard product and outer-product kernels will require a new class.
|
|
||||||
This feature is currently WONTFIX. for small number sof inputs, you can use the sympy kernel for this.
|
|
||||||
|
|
||||||
:param D: The dimensioality of the kernel's input space
|
:param D: The dimensioality of the kernel's input space
|
||||||
:type D: int
|
:type D: int
|
||||||
|
|
@ -37,15 +32,15 @@ class kern(parameterised):
|
||||||
|
|
||||||
self.D = D
|
self.D = D
|
||||||
|
|
||||||
#deal with input_slices
|
# deal with input_slices
|
||||||
if input_slices is None:
|
if input_slices is None:
|
||||||
self.input_slices = [slice(None) for p in self.parts]
|
self.input_slices = [slice(None) for p in self.parts]
|
||||||
else:
|
else:
|
||||||
assert len(input_slices)==len(self.parts)
|
assert len(input_slices) == len(self.parts)
|
||||||
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
|
self.input_slices = [sl if type(sl) is slice else slice(None) for sl in input_slices]
|
||||||
|
|
||||||
for p in self.parts:
|
for p in self.parts:
|
||||||
assert isinstance(p,kernpart), "bad kernel part"
|
assert isinstance(p, kernpart), "bad kernel part"
|
||||||
|
|
||||||
self.compute_param_slices()
|
self.compute_param_slices()
|
||||||
|
|
||||||
|
|
@ -53,11 +48,7 @@ class kern(parameterised):
|
||||||
|
|
||||||
|
|
||||||
def plot_ARD(self, ax=None):
|
def plot_ARD(self, ax=None):
|
||||||
"""
|
"""If an ARD kernel is present, it bar-plots the ARD parameters"""
|
||||||
If an ARD kernel is present, it bar-plots the ARD parameters
|
|
||||||
|
|
||||||
|
|
||||||
"""
|
|
||||||
if ax is None:
|
if ax is None:
|
||||||
ax = pb.gca()
|
ax = pb.gca()
|
||||||
for p in self.parts:
|
for p in self.parts:
|
||||||
|
|
@ -67,22 +58,20 @@ class kern(parameterised):
|
||||||
if p.name == 'linear':
|
if p.name == 'linear':
|
||||||
ard_params = p.variances
|
ard_params = p.variances
|
||||||
else:
|
else:
|
||||||
ard_params = 1./p.lengthscale
|
ard_params = 1. / p.lengthscale
|
||||||
|
|
||||||
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
|
ax.bar(np.arange(len(ard_params)) - 0.4, ard_params)
|
||||||
ax.set_xticks(np.arange(len(ard_params)),
|
ax.set_xticks(np.arange(len(ard_params)))
|
||||||
["${}$".format(i + 1) for i in range(len(ard_params))])
|
ax.set_xticklabels([r"${}$".format(i + 1) for i in range(len(ard_params))])
|
||||||
return ax
|
return ax
|
||||||
|
|
||||||
def _transform_gradients(self,g):
|
def _transform_gradients(self, g):
|
||||||
x = self._get_params()
|
x = self._get_params()
|
||||||
g[self.constrained_positive_indices] = g[self.constrained_positive_indices]*x[self.constrained_positive_indices]
|
[np.put(x,i,x*t.gradfactor(x[i])) for i,t in zip(self.constrained_indices, self.constraints)]
|
||||||
g[self.constrained_negative_indices] = g[self.constrained_negative_indices]*x[self.constrained_negative_indices]
|
[np.put(g, i, v) for i, v in [(t[0], np.sum(g[t])) for t in self.tied_indices]]
|
||||||
[np.put(g,i,g[i]*(x[i]-l)*(h-x[i])/(h-l)) for i,l,h in zip(self.constrained_bounded_indices, self.constrained_bounded_lowers, self.constrained_bounded_uppers)]
|
if len(self.tied_indices) or len(self.fixed_indices):
|
||||||
[np.put(g,i,v) for i,v in [(t[0],np.sum(g[t])) for t in self.tied_indices]]
|
to_remove = np.hstack((self.fixed_indices + [t[1:] for t in self.tied_indices]))
|
||||||
if len(self.tied_indices) or len(self.constrained_fixed_indices):
|
return np.delete(g, to_remove)
|
||||||
to_remove = np.hstack((self.constrained_fixed_indices+[t[1:] for t in self.tied_indices]))
|
|
||||||
return np.delete(g,to_remove)
|
|
||||||
else:
|
else:
|
||||||
return g
|
return g
|
||||||
|
|
||||||
|
|
@ -91,52 +80,21 @@ class kern(parameterised):
|
||||||
self.param_slices = []
|
self.param_slices = []
|
||||||
count = 0
|
count = 0
|
||||||
for p in self.parts:
|
for p in self.parts:
|
||||||
self.param_slices.append(slice(count,count+p.Nparam))
|
self.param_slices.append(slice(count, count + p.Nparam))
|
||||||
count += p.Nparam
|
count += p.Nparam
|
||||||
|
|
||||||
def _process_slices(self,slices1=None,slices2=None):
|
def __add__(self, other):
|
||||||
"""
|
|
||||||
Format the slices so that they can easily be used.
|
|
||||||
Both slices can be any of three things:
|
|
||||||
- If None, the new points covary through every kernel part (default)
|
|
||||||
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
|
||||||
- If a list of booleans, specifying which kernel parts are active
|
|
||||||
|
|
||||||
if the second arg is False, return only slices1
|
|
||||||
|
|
||||||
returns actual lists of slice objects
|
|
||||||
"""
|
|
||||||
if slices1 is None:
|
|
||||||
slices1 = [slice(None)]*self.Nparts
|
|
||||||
elif all([type(s_i) is bool for s_i in slices1]):
|
|
||||||
slices1 = [slice(None) if s_i else slice(0) for s_i in slices1]
|
|
||||||
else:
|
|
||||||
assert all([type(s_i) is slice for s_i in slices1]), "invalid slice objects"
|
|
||||||
if slices2 is None:
|
|
||||||
slices2 = [slice(None)]*self.Nparts
|
|
||||||
elif slices2 is False:
|
|
||||||
return slices1
|
|
||||||
elif all([type(s_i) is bool for s_i in slices2]):
|
|
||||||
slices2 = [slice(None) if s_i else slice(0) for s_i in slices2]
|
|
||||||
else:
|
|
||||||
assert all([type(s_i) is slice for s_i in slices2]), "invalid slice objects"
|
|
||||||
return slices1, slices2
|
|
||||||
|
|
||||||
def __add__(self,other):
|
|
||||||
assert self.D == other.D
|
assert self.D == other.D
|
||||||
newkern = kern(self.D,self.parts+other.parts, self.input_slices + other.input_slices)
|
newkern = kern(self.D, self.parts + other.parts, self.input_slices + other.input_slices)
|
||||||
#transfer constraints:
|
# transfer constraints:
|
||||||
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
|
newkern.constrained_indices = self.constrained_indices + [i+self.Nparam for i in other.constrained_indices]
|
||||||
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
|
newkern.constraints = self.constraints + other.constraints
|
||||||
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
|
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
|
||||||
newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
|
newkern.fixed_values = self.fixed_values + other.fixed_values
|
||||||
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
|
|
||||||
newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
|
|
||||||
newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
|
|
||||||
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
|
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
|
||||||
return newkern
|
return newkern
|
||||||
|
|
||||||
def add(self,other):
|
def add(self, other):
|
||||||
"""
|
"""
|
||||||
Add another kernel to this one. Both kernels are defined on the same _space_
|
Add another kernel to this one. Both kernels are defined on the same _space_
|
||||||
:param other: the other kernel to be added
|
:param other: the other kernel to be added
|
||||||
|
|
@ -144,38 +102,37 @@ class kern(parameterised):
|
||||||
"""
|
"""
|
||||||
return self + other
|
return self + other
|
||||||
|
|
||||||
def add_orthogonal(self,other):
|
def add_orthogonal(self, other):
|
||||||
"""
|
"""
|
||||||
Add another kernel to this one. Both kernels are defined on separate spaces
|
Add another kernel to this one. Both kernels are defined on separate spaces
|
||||||
:param other: the other kernel to be added
|
:param other: the other kernel to be added
|
||||||
:type other: GPy.kern
|
:type other: GPy.kern
|
||||||
"""
|
"""
|
||||||
#deal with input slices
|
# deal with input slices
|
||||||
D = self.D + other.D
|
D = self.D + other.D
|
||||||
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
|
self_input_slices = [slice(*sl.indices(self.D)) for sl in self.input_slices]
|
||||||
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
|
other_input_indices = [sl.indices(other.D) for sl in other.input_slices]
|
||||||
other_input_slices = [slice(i[0]+self.D,i[1]+self.D,i[2]) for i in other_input_indices]
|
other_input_slices = [slice(i[0] + self.D, i[1] + self.D, i[2]) for i in other_input_indices]
|
||||||
|
|
||||||
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
|
newkern = kern(D, self.parts + other.parts, self_input_slices + other_input_slices)
|
||||||
|
|
||||||
#transfer constraints:
|
# transfer constraints:
|
||||||
newkern.constrained_positive_indices = np.hstack((self.constrained_positive_indices, self.Nparam + other.constrained_positive_indices))
|
newkern.constrained_indices = self.constrained_indices + [x+self.Nparam for x in other.constrained_indices]
|
||||||
newkern.constrained_negative_indices = np.hstack((self.constrained_negative_indices, self.Nparam + other.constrained_negative_indices))
|
newkern.constraints = self.constraints + other.constraints
|
||||||
newkern.constrained_bounded_indices = self.constrained_bounded_indices + [self.Nparam + x for x in other.constrained_bounded_indices]
|
newkern.fixed_indices = self.fixed_indices + [self.Nparam + x for x in other.fixed_indices]
|
||||||
newkern.constrained_bounded_lowers = self.constrained_bounded_lowers + other.constrained_bounded_lowers
|
newkern.fixed_values = self.fixed_values + other.fixed_values
|
||||||
|
newkern.constraints = self.constraints + other.constraints
|
||||||
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
|
newkern.constrained_bounded_uppers = self.constrained_bounded_uppers + other.constrained_bounded_uppers
|
||||||
newkern.constrained_fixed_indices = self.constrained_fixed_indices + [self.Nparam + x for x in other.constrained_fixed_indices]
|
|
||||||
newkern.constrained_fixed_values = self.constrained_fixed_values + other.constrained_fixed_values
|
|
||||||
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
|
newkern.tied_indices = self.tied_indices + [self.Nparam + x for x in other.tied_indices]
|
||||||
return newkern
|
return newkern
|
||||||
|
|
||||||
def __mul__(self,other):
|
def __mul__(self, other):
|
||||||
"""
|
"""
|
||||||
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
|
Shortcut for `prod_orthogonal`. Note that `+` assumes that we sum 2 kernels defines on the same space whereas `*` assumes that the kernels are defined on different subspaces.
|
||||||
"""
|
"""
|
||||||
return self.prod(other)
|
return self.prod(other)
|
||||||
|
|
||||||
def prod(self,other):
|
def prod(self, other):
|
||||||
"""
|
"""
|
||||||
multiply two kernels defined on the same spaces.
|
multiply two kernels defined on the same spaces.
|
||||||
:param other: the other kernel to be added
|
:param other: the other kernel to be added
|
||||||
|
|
@ -184,20 +141,20 @@ class kern(parameterised):
|
||||||
K1 = self.copy()
|
K1 = self.copy()
|
||||||
K2 = other.copy()
|
K2 = other.copy()
|
||||||
|
|
||||||
newkernparts = [prod(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
|
newkernparts = [prod(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
|
||||||
|
|
||||||
slices = []
|
slices = []
|
||||||
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
|
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
|
||||||
s1, s2 = [False]*K1.D, [False]*K2.D
|
s1, s2 = [False] * K1.D, [False] * K2.D
|
||||||
s1[sl1], s2[sl2] = [True], [True]
|
s1[sl1], s2[sl2] = [True], [True]
|
||||||
slices += [s1+s2]
|
slices += [s1 + s2]
|
||||||
|
|
||||||
newkern = kern(K1.D, newkernparts, slices)
|
newkern = kern(K1.D, newkernparts, slices)
|
||||||
newkern._follow_constrains(K1,K2)
|
newkern._follow_constrains(K1, K2)
|
||||||
|
|
||||||
return newkern
|
return newkern
|
||||||
|
|
||||||
def prod_orthogonal(self,other):
|
def prod_orthogonal(self, other):
|
||||||
"""
|
"""
|
||||||
multiply two kernels. Both kernels are defined on separate spaces.
|
multiply two kernels. Both kernels are defined on separate spaces.
|
||||||
:param other: the other kernel to be added
|
:param other: the other kernel to be added
|
||||||
|
|
@ -206,31 +163,31 @@ class kern(parameterised):
|
||||||
K1 = self.copy()
|
K1 = self.copy()
|
||||||
K2 = other.copy()
|
K2 = other.copy()
|
||||||
|
|
||||||
newkernparts = [prod_orthogonal(k1,k2) for k1, k2 in itertools.product(K1.parts,K2.parts)]
|
newkernparts = [prod_orthogonal(k1, k2) for k1, k2 in itertools.product(K1.parts, K2.parts)]
|
||||||
|
|
||||||
slices = []
|
slices = []
|
||||||
for sl1, sl2 in itertools.product(K1.input_slices,K2.input_slices):
|
for sl1, sl2 in itertools.product(K1.input_slices, K2.input_slices):
|
||||||
s1, s2 = [False]*K1.D, [False]*K2.D
|
s1, s2 = [False] * K1.D, [False] * K2.D
|
||||||
s1[sl1], s2[sl2] = [True], [True]
|
s1[sl1], s2[sl2] = [True], [True]
|
||||||
slices += [s1+s2]
|
slices += [s1 + s2]
|
||||||
|
|
||||||
newkern = kern(K1.D + K2.D, newkernparts, slices)
|
newkern = kern(K1.D + K2.D, newkernparts, slices)
|
||||||
newkern._follow_constrains(K1,K2)
|
newkern._follow_constrains(K1, K2)
|
||||||
|
|
||||||
return newkern
|
return newkern
|
||||||
|
|
||||||
def _follow_constrains(self,K1,K2):
|
def _follow_constrains(self, K1, K2):
|
||||||
|
|
||||||
# Build the array that allows to go from the initial indices of the param to the new ones
|
# Build the array that allows to go from the initial indices of the param to the new ones
|
||||||
K1_param = []
|
K1_param = []
|
||||||
n = 0
|
n = 0
|
||||||
for k1 in K1.parts:
|
for k1 in K1.parts:
|
||||||
K1_param += [range(n,n+k1.Nparam)]
|
K1_param += [range(n, n + k1.Nparam)]
|
||||||
n += k1.Nparam
|
n += k1.Nparam
|
||||||
n = 0
|
n = 0
|
||||||
K2_param = []
|
K2_param = []
|
||||||
for k2 in K2.parts:
|
for k2 in K2.parts:
|
||||||
K2_param += [range(K1.Nparam+n,K1.Nparam+n+k2.Nparam)]
|
K2_param += [range(K1.Nparam + n, K1.Nparam + n + k2.Nparam)]
|
||||||
n += k2.Nparam
|
n += k2.Nparam
|
||||||
index_param = []
|
index_param = []
|
||||||
for p1 in K1_param:
|
for p1 in K1_param:
|
||||||
|
|
@ -241,60 +198,53 @@ class kern(parameterised):
|
||||||
# Get the ties and constrains of the kernels before the multiplication
|
# Get the ties and constrains of the kernels before the multiplication
|
||||||
prev_ties = K1.tied_indices + [arr + K1.Nparam for arr in K2.tied_indices]
|
prev_ties = K1.tied_indices + [arr + K1.Nparam for arr in K2.tied_indices]
|
||||||
|
|
||||||
prev_constr_pos = np.append(K1.constrained_positive_indices, K1.Nparam + K2.constrained_positive_indices)
|
prev_constr_ind = [K1.constrained_indices] + [K1.Nparam + i for i in K2.constrained_indices]
|
||||||
prev_constr_neg = np.append(K1.constrained_negative_indices, K1.Nparam + K2.constrained_negative_indices)
|
prev_constr = K1.constraints + K2.constraints
|
||||||
|
|
||||||
prev_constr_fix = K1.constrained_fixed_indices + [arr + K1.Nparam for arr in K2.constrained_fixed_indices]
|
prev_constr_fix = K1.fixed_indices + [arr + K1.Nparam for arr in K2.fixed_indices]
|
||||||
prev_constr_fix_values = K1.constrained_fixed_values + K2.constrained_fixed_values
|
prev_constr_fix_values = K1.fixed_values + K2.fixed_values
|
||||||
|
|
||||||
prev_constr_bou = K1.constrained_bounded_indices + [arr + K1.Nparam for arr in K2.constrained_bounded_indices]
|
|
||||||
prev_constr_bou_low = K1.constrained_bounded_lowers + K2.constrained_bounded_lowers
|
|
||||||
prev_constr_bou_upp = K1.constrained_bounded_uppers + K2.constrained_bounded_uppers
|
|
||||||
|
|
||||||
# follow the previous ties
|
# follow the previous ties
|
||||||
for arr in prev_ties:
|
for arr in prev_ties:
|
||||||
for j in arr:
|
for j in arr:
|
||||||
index_param[np.where(index_param==j)[0]] = arr[0]
|
index_param[np.where(index_param == j)[0]] = arr[0]
|
||||||
|
|
||||||
# ties and constrains
|
# ties and constrains
|
||||||
for i in range(K1.Nparam + K2.Nparam):
|
for i in range(K1.Nparam + K2.Nparam):
|
||||||
index = np.where(index_param==i)[0]
|
index = np.where(index_param == i)[0]
|
||||||
if index.size > 1:
|
if index.size > 1:
|
||||||
self.tie_params(index)
|
self.tie_params(index)
|
||||||
for i in prev_constr_pos:
|
for i,t in zip(prev_constr_ind,prev_constr):
|
||||||
self.constrain_positive(np.where(index_param==i)[0])
|
self.constrain(np.where(index_param == i)[0],t)
|
||||||
for i in prev_constr_neg:
|
|
||||||
self.constrain_neg(np.where(index_param==i)[0])
|
|
||||||
for j, i in enumerate(prev_constr_fix):
|
|
||||||
self.constrain_fixed(np.where(index_param==i)[0],prev_constr_fix_values[j])
|
|
||||||
for j, i in enumerate(prev_constr_bou):
|
|
||||||
self.constrain_bounded(np.where(index_param==i)[0],prev_constr_bou_low[j],prev_constr_bou_upp[j])
|
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack([p._get_params() for p in self.parts])
|
return np.hstack([p._get_params() for p in self.parts])
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self, x):
|
||||||
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
|
[p._set_params(x[s]) for p, s in zip(self.parts, self.param_slices)]
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
#this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
|
# this is a bit nasty: we wat to distinguish between parts with the same name by appending a count
|
||||||
part_names = np.array([k.name for k in self.parts],dtype=np.str)
|
part_names = np.array([k.name for k in self.parts], dtype=np.str)
|
||||||
counts = [np.sum(part_names==ni) for i, ni in enumerate(part_names)]
|
counts = [np.sum(part_names == ni) for i, ni in enumerate(part_names)]
|
||||||
cum_counts = [np.sum(part_names[i:]==ni) for i, ni in enumerate(part_names)]
|
cum_counts = [np.sum(part_names[i:] == ni) for i, ni in enumerate(part_names)]
|
||||||
names = [name+'_'+str(cum_count) if count>1 else name for name,count,cum_count in zip(part_names,counts,cum_counts)]
|
names = [name + '_' + str(cum_count) if count > 1 else name for name, count, cum_count in zip(part_names, counts, cum_counts)]
|
||||||
|
|
||||||
return sum([[name+'_'+n for n in k._get_param_names()] for name,k in zip(names,self.parts)],[])
|
return sum([[name + '_' + n for n in k._get_param_names()] for name, k in zip(names, self.parts)], [])
|
||||||
|
|
||||||
def K(self,X,X2=None,slices1=None,slices2=None):
|
def K(self, X, X2=None, which_parts='all'):
|
||||||
assert X.shape[1]==self.D
|
if which_parts=='all':
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
which_parts = [True]*self.Nparts
|
||||||
|
assert X.shape[1] == self.D
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
target = np.zeros((X.shape[0], X.shape[0]))
|
||||||
target = np.zeros((X.shape[0],X2.shape[0]))
|
[p.K(X[:, i_s], None, target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
|
||||||
[p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
else:
|
||||||
|
target = np.zeros((X.shape[0], X2.shape[0]))
|
||||||
|
[p.K(X[:, i_s], X2[:,i_s], target=target) for p, i_s, part_i_used in zip(self.parts, self.input_slices, which_parts) if part_i_used]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2=None,slices1=None,slices2=None):
|
def dK_dtheta(self, dL_dK, X, X2=None):
|
||||||
"""
|
"""
|
||||||
:param dL_dK: An array of dL_dK derivaties, dL_dK
|
:param dL_dK: An array of dL_dK derivaties, dL_dK
|
||||||
:type dL_dK: Np.ndarray (N x M)
|
:type dL_dK: Np.ndarray (N x M)
|
||||||
|
|
@ -302,286 +252,275 @@ class kern(parameterised):
|
||||||
:type X: np.ndarray (N x D)
|
:type X: np.ndarray (N x D)
|
||||||
:param X2: Observed dara inputs (optional, defaults to X)
|
:param X2: Observed dara inputs (optional, defaults to X)
|
||||||
:type X2: np.ndarray (M x D)
|
:type X2: np.ndarray (M x D)
|
||||||
:param slices1: a slice object for each kernel part, describing which data are affected by each kernel part
|
|
||||||
:type slices1: list of slice objects, or list of booleans
|
|
||||||
:param slices2: slices for X2
|
|
||||||
"""
|
"""
|
||||||
assert X.shape[1]==self.D
|
assert X.shape[1] == self.D
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
if X2 is None:
|
|
||||||
X2 = X
|
|
||||||
target = np.zeros(self.Nparam)
|
target = np.zeros(self.Nparam)
|
||||||
[p.dK_dtheta(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
|
if X2 is None:
|
||||||
|
[p.dK_dtheta(dL_dK, X[:, i_s], None, target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
|
||||||
|
else:
|
||||||
|
[p.dK_dtheta(dL_dK, X[:, i_s], X2[:, i_s], target[ps]) for p, i_s, ps, in zip(self.parts, self.input_slices, self.param_slices)]
|
||||||
|
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dK_dX(self,dL_dK,X,X2=None,slices1=None,slices2=None):
|
def dK_dX(self, dL_dK, X, X2=None):
|
||||||
if X2 is None:
|
if X2 is None:
|
||||||
X2 = X
|
X2 = X
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
target = np.zeros_like(X)
|
target = np.zeros_like(X)
|
||||||
[p.dK_dX(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
|
if X2 is None:
|
||||||
|
[p.dK_dX(dL_dK, X[:, i_s], None, target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
|
else:
|
||||||
|
[p.dK_dX(dL_dK, X[:, i_s], X2[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def Kdiag(self,X,slices=None):
|
def Kdiag(self, X, which_parts='all'):
|
||||||
assert X.shape[1]==self.D
|
if which_parts=='all':
|
||||||
slices = self._process_slices(slices,False)
|
which_parts = [True]*self.Nparts
|
||||||
|
assert X.shape[1] == self.D
|
||||||
target = np.zeros(X.shape[0])
|
target = np.zeros(X.shape[0])
|
||||||
[p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
|
[p.Kdiag(X[:, i_s], target=target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,slices=None):
|
def dKdiag_dtheta(self, dL_dKdiag, X):
|
||||||
assert X.shape[1]==self.D
|
assert X.shape[1] == self.D
|
||||||
assert len(dL_dKdiag.shape)==1
|
assert dL_dKdiag.size == X.shape[0]
|
||||||
assert dL_dKdiag.size==X.shape[0]
|
|
||||||
slices = self._process_slices(slices,False)
|
|
||||||
target = np.zeros(self.Nparam)
|
target = np.zeros(self.Nparam)
|
||||||
[p.dKdiag_dtheta(dL_dKdiag[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
|
[p.dKdiag_dtheta(dL_dKdiag, X[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, slices=None):
|
def dKdiag_dX(self, dL_dKdiag, X):
|
||||||
assert X.shape[1]==self.D
|
assert X.shape[1] == self.D
|
||||||
slices = self._process_slices(slices,False)
|
|
||||||
target = np.zeros_like(X)
|
target = np.zeros_like(X)
|
||||||
[p.dKdiag_dX(dL_dKdiag[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
|
[p.dKdiag_dX(dL_dKdiag, X[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def psi0(self,Z,mu,S,slices=None):
|
def psi0(self, Z, mu, S):
|
||||||
slices = self._process_slices(slices,False)
|
|
||||||
target = np.zeros(mu.shape[0])
|
target = np.zeros(mu.shape[0])
|
||||||
[p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)]
|
[p.psi0(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,slices=None):
|
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S):
|
||||||
slices = self._process_slices(slices,False)
|
|
||||||
target = np.zeros(self.Nparam)
|
target = np.zeros(self.Nparam)
|
||||||
[p.dpsi0_dtheta(dL_dpsi0[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
|
[p.dpsi0_dtheta(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,slices=None):
|
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S):
|
||||||
slices = self._process_slices(slices,False)
|
target_mu, target_S = np.zeros_like(mu), np.zeros_like(S)
|
||||||
target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
|
[p.dpsi0_dmuS(dL_dpsi0, Z[:,i_s], mu[:,i_s], S[:,i_s], target_mu[:,i_s], target_S[:,i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
[p.dpsi0_dmuS(dL_dpsi0,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
|
|
||||||
return target_mu,target_S
|
|
||||||
|
|
||||||
def psi1(self,Z,mu,S,slices1=None,slices2=None):
|
|
||||||
"""Think N,M,Q """
|
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
target = np.zeros((mu.shape[0],Z.shape[0]))
|
|
||||||
[p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
|
|
||||||
"""N,M,(Ntheta)"""
|
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
target = np.zeros((self.Nparam))
|
|
||||||
[p.dpsi1_dtheta(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
|
|
||||||
return self._transform_gradients(target)
|
|
||||||
|
|
||||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
|
|
||||||
"""N,M,Q"""
|
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
target = np.zeros_like(Z)
|
|
||||||
[p.dpsi1_dZ(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
|
||||||
return target
|
|
||||||
|
|
||||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
|
|
||||||
"""return shapes are N,M,Q"""
|
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
|
|
||||||
[p.dpsi1_dmuS(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
def psi2(self,Z,mu,S,slices1=None,slices2=None):
|
def psi1(self, Z, mu, S):
|
||||||
|
target = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
|
[p.psi1(Z[:,i_s], mu[:,i_s], S[:,i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S):
|
||||||
|
target = np.zeros((self.Nparam))
|
||||||
|
[p.dpsi1_dtheta(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, ps, i_s in zip(self.parts, self.param_slices, self.input_slices)]
|
||||||
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
|
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S):
|
||||||
|
target = np.zeros_like(Z)
|
||||||
|
[p.dpsi1_dZ(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
|
return target
|
||||||
|
|
||||||
|
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S):
|
||||||
|
"""return shapes are N,M,Q"""
|
||||||
|
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||||
|
[p.dpsi1_dmuS(dL_dpsi1, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
|
return target_mu, target_S
|
||||||
|
|
||||||
|
def psi2(self, Z, mu, S):
|
||||||
"""
|
"""
|
||||||
:param Z: np.ndarray of inducing inputs (M x Q)
|
:param Z: np.ndarray of inducing inputs (M x Q)
|
||||||
:param mu, S: np.ndarrays of means and variances (each N x Q)
|
:param mu, S: np.ndarrays of means and variances (each N x Q)
|
||||||
:returns psi2: np.ndarray (N,M,M)
|
:returns psi2: np.ndarray (N,M,M)
|
||||||
"""
|
"""
|
||||||
target = np.zeros((mu.shape[0],Z.shape[0],Z.shape[0]))
|
target = np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
[p.psi2(Z[:, i_s], mu[:, i_s], S[:, i_s], target) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
[p.psi2(Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s1,s2,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
|
||||||
|
|
||||||
#compute the "cross" terms
|
# compute the "cross" terms
|
||||||
for p1, p2 in itertools.combinations(self.parts,2):
|
#TODO: input_slices needed
|
||||||
#white doesn;t combine with anything
|
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||||
if p1.name=='white' or p2.name=='white':
|
# white doesn;t combine with anything
|
||||||
|
if p1.name == 'white' or p2.name == 'white':
|
||||||
pass
|
pass
|
||||||
#rbf X bias
|
# rbf X bias
|
||||||
elif p1.name=='bias' and p2.name=='rbf':
|
elif p1.name == 'bias' and p2.name == 'rbf':
|
||||||
target += p1.variance*(p2._psi1[:,:,None]+p2._psi1[:,None,:])
|
target += p1.variance * (p2._psi1[:, :, None] + p2._psi1[:, None, :])
|
||||||
elif p2.name=='bias' and p1.name=='rbf':
|
elif p2.name == 'bias' and p1.name == 'rbf':
|
||||||
target += p2.variance*(p1._psi1[:,:,None]+p1._psi1[:,None,:])
|
target += p2.variance * (p1._psi1[:, :, None] + p1._psi1[:, None, :])
|
||||||
#linear X bias
|
# linear X bias
|
||||||
elif p1.name=='bias' and p2.name=='linear':
|
elif p1.name == 'bias' and p2.name == 'linear':
|
||||||
tmp = np.zeros((mu.shape[0],Z.shape[0]))
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
p2.psi1(Z,mu,S,tmp)
|
p2.psi1(Z, mu, S, tmp)
|
||||||
target += p1.variance*(tmp[:,:,None] + tmp[:,None,:])
|
target += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
elif p2.name=='bias' and p1.name=='linear':
|
elif p2.name == 'bias' and p1.name == 'linear':
|
||||||
tmp = np.zeros((mu.shape[0],Z.shape[0]))
|
tmp = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
p1.psi1(Z,mu,S,tmp)
|
p1.psi1(Z, mu, S, tmp)
|
||||||
target += p2.variance*(tmp[:,:,None] + tmp[:,None,:])
|
target += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||||
#rbf X linear
|
# rbf X linear
|
||||||
elif p1.name=='linear' and p2.name=='rbf':
|
elif p1.name == 'linear' and p2.name == 'rbf':
|
||||||
raise NotImplementedError #TODO
|
raise NotImplementedError # TODO
|
||||||
elif p2.name=='linear' and p1.name=='rbf':
|
elif p2.name == 'linear' and p1.name == 'rbf':
|
||||||
raise NotImplementedError #TODO
|
raise NotImplementedError # TODO
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
return target
|
return target
|
||||||
|
|
||||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
|
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S):
|
||||||
"""Returns shape (N,M,M,Ntheta)"""
|
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
target = np.zeros(self.Nparam)
|
target = np.zeros(self.Nparam)
|
||||||
[p.dpsi2_dtheta(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
|
[p.dpsi2_dtheta(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[ps]) for p, i_s, ps in zip(self.parts, self.input_slices, self.param_slices)]
|
||||||
|
|
||||||
#compute the "cross" terms
|
# compute the "cross" terms
|
||||||
#TODO: better looping
|
# TODO: better looping, input_slices
|
||||||
for i1, i2 in itertools.combinations(range(len(self.parts)),2):
|
for i1, i2 in itertools.combinations(range(len(self.parts)), 2):
|
||||||
p1,p2 = self.parts[i1], self.parts[i2]
|
p1, p2 = self.parts[i1], self.parts[i2]
|
||||||
ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
# ipsl1, ipsl2 = self.input_slices[i1], self.input_slices[i2]
|
||||||
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
|
ps1, ps2 = self.param_slices[i1], self.param_slices[i2]
|
||||||
|
|
||||||
#white doesn;t combine with anything
|
# white doesn;t combine with anything
|
||||||
if p1.name=='white' or p2.name=='white':
|
if p1.name == 'white' or p2.name == 'white':
|
||||||
pass
|
pass
|
||||||
#rbf X bias
|
# rbf X bias
|
||||||
elif p1.name=='bias' and p2.name=='rbf':
|
elif p1.name == 'bias' and p2.name == 'rbf':
|
||||||
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2.,Z,mu,S,target[ps2])
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2])
|
||||||
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1*2.,Z,mu,S,target[ps1])
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2._psi1 * 2., Z, mu, S, target[ps1])
|
||||||
elif p2.name=='bias' and p1.name=='rbf':
|
elif p2.name == 'bias' and p1.name == 'rbf':
|
||||||
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2.,Z,mu,S,target[ps1])
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
|
||||||
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1*2.,Z,mu,S,target[ps2])
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1._psi1 * 2., Z, mu, S, target[ps2])
|
||||||
#linear X bias
|
# linear X bias
|
||||||
elif p1.name=='bias' and p2.name=='linear':
|
elif p1.name == 'bias' and p2.name == 'linear':
|
||||||
p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance*2., Z, mu, S, target[ps1])
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * p1.variance * 2., Z, mu, S, target[ps2]) # [ps1])
|
||||||
elif p2.name=='bias' and p1.name=='linear':
|
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance*2., Z, mu, S, target[ps1])
|
p2.psi1(Z, mu, S, psi1)
|
||||||
#rbf X linear
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps1])
|
||||||
elif p1.name=='linear' and p2.name=='rbf':
|
elif p2.name == 'bias' and p1.name == 'linear':
|
||||||
raise NotImplementedError #TODO
|
p1.dpsi1_dtheta(dL_dpsi2.sum(1) * p2.variance * 2., Z, mu, S, target[ps1])
|
||||||
elif p2.name=='linear' and p1.name=='rbf':
|
psi1 = np.zeros((mu.shape[0], Z.shape[0]))
|
||||||
raise NotImplementedError #TODO
|
p1.psi1(Z, mu, S, psi1)
|
||||||
|
p2.dpsi1_dtheta(dL_dpsi2.sum(1) * psi1 * 2., Z, mu, S, target[ps2])
|
||||||
|
# rbf X linear
|
||||||
|
elif p1.name == 'linear' and p2.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
|
elif p2.name == 'linear' and p1.name == 'rbf':
|
||||||
|
raise NotImplementedError # TODO
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
|
||||||
return self._transform_gradients(target)
|
return self._transform_gradients(target)
|
||||||
|
|
||||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
|
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S):
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
|
||||||
target = np.zeros_like(Z)
|
target = np.zeros_like(Z)
|
||||||
[p.dpsi2_dZ(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
[p.dpsi2_dZ(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
|
|
||||||
#compute the "cross" terms
|
# compute the "cross" terms
|
||||||
for p1, p2 in itertools.combinations(self.parts,2):
|
#TODO: we need input_slices here.
|
||||||
#white doesn;t combine with anything
|
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||||
if p1.name=='white' or p2.name=='white':
|
# white doesn;t combine with anything
|
||||||
|
if p1.name == 'white' or p2.name == 'white':
|
||||||
pass
|
pass
|
||||||
#rbf X bias
|
# rbf X bias
|
||||||
elif p1.name=='bias' and p2.name=='rbf':
|
elif p1.name == 'bias' and p2.name == 'rbf':
|
||||||
p2.dpsi1_dX(dL_dpsi2.sum(1).T*p1.variance,Z,mu,S,target)
|
p2.dpsi1_dX(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
|
||||||
elif p2.name=='bias' and p1.name=='rbf':
|
elif p2.name == 'bias' and p1.name == 'rbf':
|
||||||
p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance,Z,mu,S,target)
|
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
|
||||||
#linear X bias
|
# linear X bias
|
||||||
elif p1.name=='bias' and p2.name=='linear':
|
elif p1.name == 'bias' and p2.name == 'linear':
|
||||||
p2.dpsi1_dZ(dL_dpsi2.sum(1).T*p1.variance, Z, mu, S, target)
|
p2.dpsi1_dZ(dL_dpsi2.sum(1).T * p1.variance, Z, mu, S, target)
|
||||||
elif p2.name=='bias' and p1.name=='linear':
|
elif p2.name == 'bias' and p1.name == 'linear':
|
||||||
p1.dpsi1_dZ(dL_dpsi2.sum(1).T*p2.variance, Z, mu, S, target)
|
p1.dpsi1_dZ(dL_dpsi2.sum(1).T * p2.variance, Z, mu, S, target)
|
||||||
#rbf X linear
|
# rbf X linear
|
||||||
elif p1.name=='linear' and p2.name=='rbf':
|
elif p1.name == 'linear' and p2.name == 'rbf':
|
||||||
raise NotImplementedError #TODO
|
raise NotImplementedError # TODO
|
||||||
elif p2.name=='linear' and p1.name=='rbf':
|
elif p2.name == 'linear' and p1.name == 'rbf':
|
||||||
raise NotImplementedError #TODO
|
raise NotImplementedError # TODO
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
|
||||||
|
return target * 2.
|
||||||
|
|
||||||
return target
|
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S):
|
||||||
|
target_mu, target_S = np.zeros((2, mu.shape[0], mu.shape[1]))
|
||||||
|
[p.dpsi2_dmuS(dL_dpsi2, Z[:, i_s], mu[:, i_s], S[:, i_s], target_mu[:, i_s], target_S[:, i_s]) for p, i_s in zip(self.parts, self.input_slices)]
|
||||||
|
|
||||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
|
# compute the "cross" terms
|
||||||
"""return shapes are N,M,M,Q"""
|
#TODO: we need input_slices here.
|
||||||
slices1, slices2 = self._process_slices(slices1,slices2)
|
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||||
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
|
# white doesn;t combine with anything
|
||||||
[p.dpsi2_dmuS(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
|
if p1.name == 'white' or p2.name == 'white':
|
||||||
|
|
||||||
#compute the "cross" terms
|
|
||||||
for p1, p2 in itertools.combinations(self.parts,2):
|
|
||||||
#white doesn;t combine with anything
|
|
||||||
if p1.name=='white' or p2.name=='white':
|
|
||||||
pass
|
pass
|
||||||
#rbf X bias
|
# rbf X bias
|
||||||
elif p1.name=='bias' and p2.name=='rbf':
|
elif p1.name == 'bias' and p2.name == 'rbf':
|
||||||
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2.,Z,mu,S,target_mu,target_S)
|
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
elif p2.name=='bias' and p1.name=='rbf':
|
elif p2.name == 'bias' and p1.name == 'rbf':
|
||||||
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2.,Z,mu,S,target_mu,target_S)
|
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
#linear X bias
|
# linear X bias
|
||||||
elif p1.name=='bias' and p2.name=='linear':
|
elif p1.name == 'bias' and p2.name == 'linear':
|
||||||
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T*p1.variance*2., Z, mu, S, target_mu, target_S)
|
p2.dpsi1_dmuS(dL_dpsi2.sum(1).T * p1.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
elif p2.name=='bias' and p1.name=='linear':
|
elif p2.name == 'bias' and p1.name == 'linear':
|
||||||
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T*p2.variance*2., Z, mu, S, target_mu, target_S)
|
p1.dpsi1_dmuS(dL_dpsi2.sum(1).T * p2.variance * 2., Z, mu, S, target_mu, target_S)
|
||||||
#rbf X linear
|
# rbf X linear
|
||||||
elif p1.name=='linear' and p2.name=='rbf':
|
elif p1.name == 'linear' and p2.name == 'rbf':
|
||||||
raise NotImplementedError #TODO
|
raise NotImplementedError # TODO
|
||||||
elif p2.name=='linear' and p1.name=='rbf':
|
elif p2.name == 'linear' and p1.name == 'rbf':
|
||||||
raise NotImplementedError #TODO
|
raise NotImplementedError # TODO
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||||
|
|
||||||
return target_mu, target_S
|
return target_mu, target_S
|
||||||
|
|
||||||
def plot(self, x = None, plot_limits=None,which_functions='all',resolution=None,*args,**kwargs):
|
def plot(self, x=None, plot_limits=None, which_functions='all', resolution=None, *args, **kwargs):
|
||||||
if which_functions=='all':
|
if which_functions == 'all':
|
||||||
which_functions = [True]*self.Nparts
|
which_functions = [True] * self.Nparts
|
||||||
if self.D == 1:
|
if self.D == 1:
|
||||||
if x is None:
|
if x is None:
|
||||||
x = np.zeros((1,1))
|
x = np.zeros((1, 1))
|
||||||
else:
|
else:
|
||||||
x = np.asarray(x)
|
x = np.asarray(x)
|
||||||
assert x.size == 1, "The size of the fixed variable x is not 1"
|
assert x.size == 1, "The size of the fixed variable x is not 1"
|
||||||
x = x.reshape((1,1))
|
x = x.reshape((1, 1))
|
||||||
|
|
||||||
if plot_limits == None:
|
if plot_limits == None:
|
||||||
xmin, xmax = (x-5).flatten(), (x+5).flatten()
|
xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
|
||||||
elif len(plot_limits) == 2:
|
elif len(plot_limits) == 2:
|
||||||
xmin, xmax = plot_limits
|
xmin, xmax = plot_limits
|
||||||
else:
|
else:
|
||||||
raise ValueError, "Bad limits for plotting"
|
raise ValueError, "Bad limits for plotting"
|
||||||
|
|
||||||
Xnew = np.linspace(xmin,xmax,resolution or 201)[:,None]
|
Xnew = np.linspace(xmin, xmax, resolution or 201)[:, None]
|
||||||
Kx = self.K(Xnew,x,slices2=which_functions)
|
Kx = self.K(Xnew, x, slices2=which_functions)
|
||||||
pb.plot(Xnew,Kx,*args,**kwargs)
|
pb.plot(Xnew, Kx, *args, **kwargs)
|
||||||
pb.xlim(xmin,xmax)
|
pb.xlim(xmin, xmax)
|
||||||
pb.xlabel("x")
|
pb.xlabel("x")
|
||||||
pb.ylabel("k(x,%0.1f)" %x)
|
pb.ylabel("k(x,%0.1f)" % x)
|
||||||
|
|
||||||
elif self.D == 2:
|
elif self.D == 2:
|
||||||
if x is None:
|
if x is None:
|
||||||
x = np.zeros((1,2))
|
x = np.zeros((1, 2))
|
||||||
else:
|
else:
|
||||||
x = np.asarray(x)
|
x = np.asarray(x)
|
||||||
assert x.size == 2, "The size of the fixed variable x is not 2"
|
assert x.size == 2, "The size of the fixed variable x is not 2"
|
||||||
x = x.reshape((1,2))
|
x = x.reshape((1, 2))
|
||||||
|
|
||||||
if plot_limits == None:
|
if plot_limits == None:
|
||||||
xmin, xmax = (x-5).flatten(), (x+5).flatten()
|
xmin, xmax = (x - 5).flatten(), (x + 5).flatten()
|
||||||
elif len(plot_limits) == 2:
|
elif len(plot_limits) == 2:
|
||||||
xmin, xmax = plot_limits
|
xmin, xmax = plot_limits
|
||||||
else:
|
else:
|
||||||
raise ValueError, "Bad limits for plotting"
|
raise ValueError, "Bad limits for plotting"
|
||||||
|
|
||||||
resolution = resolution or 51
|
resolution = resolution or 51
|
||||||
xx,yy = np.mgrid[xmin[0]:xmax[0]:1j*resolution,xmin[1]:xmax[1]:1j*resolution]
|
xx, yy = np.mgrid[xmin[0]:xmax[0]:1j * resolution, xmin[1]:xmax[1]:1j * resolution]
|
||||||
xg = np.linspace(xmin[0],xmax[0],resolution)
|
xg = np.linspace(xmin[0], xmax[0], resolution)
|
||||||
yg = np.linspace(xmin[1],xmax[1],resolution)
|
yg = np.linspace(xmin[1], xmax[1], resolution)
|
||||||
Xnew = np.vstack((xx.flatten(),yy.flatten())).T
|
Xnew = np.vstack((xx.flatten(), yy.flatten())).T
|
||||||
Kx = self.K(Xnew,x,slices2=which_functions)
|
Kx = self.K(Xnew, x, slices2=which_functions)
|
||||||
Kx = Kx.reshape(resolution,resolution).T
|
Kx = Kx.reshape(resolution, resolution).T
|
||||||
pb.contour(xg,yg,Kx,vmin=Kx.min(),vmax=Kx.max(),cmap=pb.cm.jet,*args,**kwargs)
|
pb.contour(xg, yg, Kx, vmin=Kx.min(), vmax=Kx.max(), cmap=pb.cm.jet, *args, **kwargs)
|
||||||
pb.xlim(xmin[0],xmax[0])
|
pb.xlim(xmin[0], xmax[0])
|
||||||
pb.ylim(xmin[1],xmax[1])
|
pb.ylim(xmin[1], xmax[1])
|
||||||
pb.xlabel("x1")
|
pb.xlabel("x1")
|
||||||
pb.ylabel("x2")
|
pb.ylabel("x2")
|
||||||
pb.title("k(x1,x2 ; %0.1f,%0.1f)" %(x[0,0],x[0,1]) )
|
pb.title("k(x1,x2 ; %0.1f,%0.1f)" % (x[0, 0], x[0, 1]))
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
|
raise NotImplementedError, "Cannot plot a kernel with more than two input dimensions"
|
||||||
|
|
|
||||||
|
|
@ -4,6 +4,8 @@
|
||||||
|
|
||||||
from kernpart import kernpart
|
from kernpart import kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from ..util.linalg import tdot
|
||||||
|
from scipy import weave
|
||||||
|
|
||||||
class linear(kernpart):
|
class linear(kernpart):
|
||||||
"""
|
"""
|
||||||
|
|
@ -22,7 +24,7 @@ class linear(kernpart):
|
||||||
:rtype: kernel object
|
:rtype: kernel object
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,D,variances=None,ARD=False):
|
def __init__(self, D, variances=None, ARD=False):
|
||||||
self.D = D
|
self.D = D
|
||||||
self.ARD = ARD
|
self.ARD = ARD
|
||||||
if ARD == False:
|
if ARD == False:
|
||||||
|
|
@ -44,15 +46,15 @@ class linear(kernpart):
|
||||||
variances = np.ones(self.D)
|
variances = np.ones(self.D)
|
||||||
self._set_params(variances.flatten())
|
self._set_params(variances.flatten())
|
||||||
|
|
||||||
#initialize cache
|
# initialize cache
|
||||||
self._Z, self._mu, self._S = np.empty(shape=(3,1))
|
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return self.variances
|
return self.variances
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self, x):
|
||||||
assert x.size==(self.Nparam)
|
assert x.size == (self.Nparam)
|
||||||
self.variances = x
|
self.variances = x
|
||||||
self.variances2 = np.square(self.variances)
|
self.variances2 = np.square(self.variances)
|
||||||
|
|
||||||
|
|
@ -60,124 +62,234 @@ class linear(kernpart):
|
||||||
if self.Nparam == 1:
|
if self.Nparam == 1:
|
||||||
return ['variance']
|
return ['variance']
|
||||||
else:
|
else:
|
||||||
return ['variance_%i'%i for i in range(self.variances.size)]
|
return ['variance_%i' % i for i in range(self.variances.size)]
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self, X, X2, target):
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
XX = X*np.sqrt(self.variances)
|
XX = X * np.sqrt(self.variances)
|
||||||
XX2 = X2*np.sqrt(self.variances)
|
if X2 is None:
|
||||||
target += np.dot(XX, XX2.T)
|
target += tdot(XX)
|
||||||
|
else:
|
||||||
|
XX2 = X2 * np.sqrt(self.variances)
|
||||||
|
target += np.dot(XX, XX2.T)
|
||||||
else:
|
else:
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += self.variances * self._dot_product
|
target += self.variances * self._dot_product
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self, X, target):
|
||||||
np.add(target,np.sum(self.variances*np.square(X),-1),target)
|
np.add(target, np.sum(self.variances * np.square(X), -1), target)
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
product = X[:,None,:]*X2[None,:,:]
|
if X2 is None:
|
||||||
target += (dL_dK[:,:,None]*product).sum(0).sum(0)
|
[np.add(target[i:i + 1], np.sum(dL_dK * tdot(X[:, i:i + 1])), target[i:i + 1]) for i in range(self.D)]
|
||||||
|
else:
|
||||||
|
product = X[:, None, :] * X2[None, :, :]
|
||||||
|
target += (dL_dK[:, :, None] * product).sum(0).sum(0)
|
||||||
else:
|
else:
|
||||||
self._K_computations(X, X2)
|
self._K_computations(X, X2)
|
||||||
target += np.sum(self._dot_product*dL_dK)
|
target += np.sum(self._dot_product * dL_dK)
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag, X, target):
|
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||||
tmp = dL_dKdiag[:,None]*X**2
|
tmp = dL_dKdiag[:, None] * X ** 2
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
target += tmp.sum(0)
|
target += tmp.sum(0)
|
||||||
else:
|
else:
|
||||||
target += tmp.sum()
|
target += tmp.sum()
|
||||||
|
|
||||||
def dK_dX(self,dL_dK,X,X2,target):
|
def dK_dX(self, dL_dK, X, X2, target):
|
||||||
target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
|
target += (((X2[:, None, :] * self.variances)) * dL_dK[:, :, None]).sum(0)
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# PSI statistics #
|
# PSI statistics #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def psi0(self,Z,mu,S,target):
|
def psi0(self, Z, mu, S, target):
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
target += np.sum(self.variances*self.mu2_S,1)
|
target += np.sum(self.variances * self.mu2_S, 1)
|
||||||
|
|
||||||
def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
|
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
tmp = dL_dpsi0[:, None] * self.mu2_S
|
tmp = dL_dpsi0[:, None] * self.mu2_S
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
target += tmp.sum(0)
|
target += tmp.sum(0)
|
||||||
else:
|
else:
|
||||||
target += tmp.sum()
|
target += tmp.sum()
|
||||||
|
|
||||||
def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S):
|
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||||
target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances)
|
target_mu += dL_dpsi0[:, None] * (2.0 * mu * self.variances)
|
||||||
target_S += dL_dpsi0[:, None] * self.variances
|
target_S += dL_dpsi0[:, None] * self.variances
|
||||||
|
|
||||||
def psi1(self,Z,mu,S,target):
|
def psi1(self, Z, mu, S, target):
|
||||||
"""the variance, it does nothing"""
|
"""the variance, it does nothing"""
|
||||||
self.K(mu,Z,target)
|
self._psi1 = self.K(mu, Z, target)
|
||||||
|
|
||||||
def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
|
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||||
"""the variance, it does nothing"""
|
"""the variance, it does nothing"""
|
||||||
self.dK_dtheta(dL_dpsi1,mu,Z,target)
|
self.dK_dtheta(dL_dpsi1, mu, Z, target)
|
||||||
|
|
||||||
def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
|
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||||
"""Do nothing for S, it does not affect psi1"""
|
"""Do nothing for S, it does not affect psi1"""
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1)
|
target_mu += (dL_dpsi1.T[:, :, None] * (Z * self.variances)).sum(1)
|
||||||
|
|
||||||
def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
|
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||||
self.dK_dX(dL_dpsi1.T,Z,mu,target)
|
self.dK_dX(dL_dpsi1.T, Z, mu, target)
|
||||||
|
|
||||||
def psi2(self,Z,mu,S,target):
|
def psi2(self, Z, mu, S, target):
|
||||||
"""
|
"""
|
||||||
returns N,M,M matrix
|
returns N,M,M matrix
|
||||||
"""
|
"""
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
|
# psi2_old = self.ZZ * np.square(self.variances) * self.mu2_S[:, None, None, :]
|
||||||
target += psi2.sum(-1)
|
# target += psi2.sum(-1)
|
||||||
#TODO: this could be faster using np.tensordot
|
# slow way of doing it, but right
|
||||||
|
# psi2_real = rm np.zeros((mu.shape[0], Z.shape[0], Z.shape[0]))
|
||||||
|
# for n in range(mu.shape[0]):
|
||||||
|
# for m_prime in range(Z.shape[0]):
|
||||||
|
# for m in range(Z.shape[0]):
|
||||||
|
# tmp = self._Z[m:m + 1] * self.variances
|
||||||
|
# tmp = np.dot(tmp, (tdot(self._mu[n:n + 1].T) + np.diag(S[n])))
|
||||||
|
# psi2_real[n, m, m_prime] = np.dot(tmp, (
|
||||||
|
# self._Z[m_prime:m_prime + 1] * self.variances).T)
|
||||||
|
# mu2_S = (self._mu[:, None, :] * self._mu[:, :, None])
|
||||||
|
# mu2_S[:, np.arange(self.D), np.arange(self.D)] += self._S
|
||||||
|
# psi2 = (self.ZA[None, :, None, :] * mu2_S[:, None]).sum(-1)
|
||||||
|
# psi2 = (psi2[:, :, None] * self.ZA[None, None]).sum(-1)
|
||||||
|
# psi2_tensor = np.tensordot(self.ZZ[None, :, :, :] * np.square(self.variances), self.mu2_S[:, None, None, :], ((3), (3))).squeeze().T
|
||||||
|
target += self._psi2
|
||||||
|
|
||||||
def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
|
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
|
tmp = dL_dpsi2[:, :, :, None] * (self.ZAinner[:, :, None, :] * (2 * Z)[None, None, :, :])
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
target += tmp.sum(0).sum(0).sum(0)
|
target += tmp.sum(0).sum(0).sum(0)
|
||||||
else:
|
else:
|
||||||
target += tmp.sum()
|
target += tmp.sum()
|
||||||
|
|
||||||
def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
|
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||||
"""Think N,M,M,Q """
|
"""Think N,M,M,Q """
|
||||||
self._psi_computations(Z,mu,S)
|
self._psi_computations(Z, mu, S)
|
||||||
tmp = self.ZZ*np.square(self.variances) # M,M,Q
|
AZZA = self.ZA.T[:, None, :, None] * self.ZA[None, :, None, :]
|
||||||
target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
|
AZZA = AZZA + AZZA.swapaxes(1, 2)
|
||||||
target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1)
|
AZZA_2 = AZZA/2.
|
||||||
|
#muAZZA = np.tensordot(mu,AZZA,(-1,0))
|
||||||
|
#target_mu_dummy, target_S_dummy = np.zeros_like(target_mu), np.zeros_like(target_S)
|
||||||
|
#target_mu_dummy += (dL_dpsi2[:, :, :, None] * muAZZA).sum(1).sum(1)
|
||||||
|
#target_S_dummy += (dL_dpsi2[:, :, :, None] * self.ZA[None, :, None, :] * self.ZA[None, None, :, :]).sum(1).sum(1)
|
||||||
|
|
||||||
|
#Using weave, we can exploiut the symmetry of this problem:
|
||||||
|
code = """
|
||||||
|
int n, m, mm,q,qq;
|
||||||
|
double factor,tmp;
|
||||||
|
#pragma omp parallel for private(m,mm,q,qq,factor,tmp)
|
||||||
|
for(n=0;n<N;n++){
|
||||||
|
for(m=0;m<M;m++){
|
||||||
|
for(mm=0;mm<=m;mm++){
|
||||||
|
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
|
||||||
|
if(m==mm)
|
||||||
|
factor = dL_dpsi2(n,m,mm);
|
||||||
|
else
|
||||||
|
factor = 2.0*dL_dpsi2(n,m,mm);
|
||||||
|
|
||||||
|
for(q=0;q<Q;q++){
|
||||||
|
|
||||||
|
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
|
||||||
|
tmp = 0.0;
|
||||||
|
for(qq=0;qq<Q;qq++){
|
||||||
|
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
|
||||||
|
}
|
||||||
|
|
||||||
|
target_mu(n,q) += factor*tmp;
|
||||||
|
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
support_code = """
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
weave_options = {'headers' : ['<omp.h>'],
|
||||||
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
|
||||||
|
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||||
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
|
arg_names=['N','M','Q','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
|
||||||
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
|
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||||
|
self._psi_computations(Z, mu, S)
|
||||||
|
#psi2_dZ = dL_dpsi2[:, :, :, None] * self.variances * self.ZAinner[:, :, None, :]
|
||||||
|
#dummy_target = np.zeros_like(target)
|
||||||
|
#dummy_target += psi2_dZ.sum(0).sum(0)
|
||||||
|
|
||||||
|
AZA = self.variances*self.ZAinner
|
||||||
|
code="""
|
||||||
|
int n,m,mm,q;
|
||||||
|
#pragma omp parallel for private(n,mm,q)
|
||||||
|
for(m=0;m<M;m++){
|
||||||
|
for(q=0;q<Q;q++){
|
||||||
|
for(mm=0;mm<M;mm++){
|
||||||
|
for(n=0;n<N;n++){
|
||||||
|
target(m,q) += dL_dpsi2(n,m,mm)*AZA(n,mm,q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
support_code = """
|
||||||
|
#include <omp.h>
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
weave_options = {'headers' : ['<omp.h>'],
|
||||||
|
'extra_compile_args': ['-fopenmp -O3'], #-march=native'],
|
||||||
|
'extra_link_args' : ['-lgomp']}
|
||||||
|
|
||||||
|
N,M,Q = mu.shape[0],Z.shape[0],mu.shape[1]
|
||||||
|
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||||
|
arg_names=['N','M','Q','AZA','target','dL_dpsi2'],
|
||||||
|
type_converters=weave.converters.blitz,**weave_options)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
|
|
||||||
self._psi_computations(Z,mu,S)
|
|
||||||
mu2_S = np.sum(self.mu2_S,0)# Q,
|
|
||||||
target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
|
|
||||||
|
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
# Precomputations #
|
# Precomputations #
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def _K_computations(self,X,X2):
|
def _K_computations(self, X, X2):
|
||||||
if X2 is None:
|
if not (np.array_equal(X, self._Xcache) and np.array_equal(X2, self._X2cache)):
|
||||||
X2 = X
|
self._Xcache = X.copy()
|
||||||
if not (np.all(X==self._Xcache) and np.all(X2==self._X2cache)):
|
if X2 is None:
|
||||||
self._Xcache = X
|
self._dot_product = tdot(X)
|
||||||
self._X2cache = X2
|
self._X2cache = None
|
||||||
self._dot_product = np.dot(X,X2.T)
|
else:
|
||||||
else:
|
self._X2cache = X2.copy()
|
||||||
# print "Cache hit!"
|
self._dot_product = np.dot(X, X2.T)
|
||||||
pass # TODO: insert debug message here (logging framework)
|
|
||||||
|
|
||||||
def _psi_computations(self,Z,mu,S):
|
def _psi_computations(self, Z, mu, S):
|
||||||
#here are the "statistics" for psi1 and psi2
|
# here are the "statistics" for psi1 and psi2
|
||||||
if not np.all(Z==self._Z):
|
Zv_changed = not (np.array_equal(Z, self._Z) and np.array_equal(self.variances, self._variances))
|
||||||
#Z has changed, compute Z specific stuff
|
muS_changed = not (np.array_equal(mu, self._mu) and np.array_equal(S, self._S))
|
||||||
self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
if Zv_changed:
|
||||||
self._Z = Z
|
# Z has changed, compute Z specific stuff
|
||||||
if not (np.all(mu==self._mu) and np.all(S==self._S)):
|
# self.ZZ = Z[:,None,:]*Z[None,:,:] # M,M,Q
|
||||||
self.mu2_S = np.square(mu)+S
|
# self.ZZ = np.empty((Z.shape[0], Z.shape[0], Z.shape[1]), order='F')
|
||||||
self._mu, self._S = mu, S
|
# [tdot(Z[:, i:i + 1], self.ZZ[:, :, i].T) for i in xrange(Z.shape[1])]
|
||||||
|
self.ZA = Z * self.variances
|
||||||
|
self._Z = Z.copy()
|
||||||
|
self._variances = self.variances.copy()
|
||||||
|
if muS_changed:
|
||||||
|
self.mu2_S = np.square(mu) + S
|
||||||
|
self.inner = (mu[:, None, :] * mu[:, :, None])
|
||||||
|
diag_indices = np.diag_indices(mu.shape[1], 2)
|
||||||
|
self.inner[:, diag_indices[0], diag_indices[1]] += S
|
||||||
|
self._mu, self._S = mu.copy(), S.copy()
|
||||||
|
if Zv_changed or muS_changed:
|
||||||
|
self.ZAinner = np.dot(self.ZA, self.inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [M x N x Q]!
|
||||||
|
self._psi2 = np.dot(self.ZAinner, self.ZA.T)
|
||||||
|
|
|
||||||
|
|
@ -40,9 +40,12 @@ class prod(kernpart):
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self,X,X2,target):
|
||||||
"""Compute the covariance matrix between X and X2."""
|
"""Compute the covariance matrix between X and X2."""
|
||||||
if X2 is None: X2 = X
|
if X2 is None:
|
||||||
target1 = np.zeros((X.shape[0],X2.shape[0]))
|
target1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||||
target2 = np.zeros((X.shape[0],X2.shape[0]))
|
target2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||||
|
else:
|
||||||
|
target1 = np.zeros((X.shape[0],X.shape[0]))
|
||||||
|
target2 = np.zeros((X.shape[0],X.shape[0]))
|
||||||
self.k1.K(X,X2,target1)
|
self.k1.K(X,X2,target1)
|
||||||
self.k2.K(X,X2,target2)
|
self.k2.K(X,X2,target2)
|
||||||
target += target1 * target2
|
target += target1 * target2
|
||||||
|
|
|
||||||
|
|
@ -21,41 +21,35 @@ class prod_orthogonal(kernpart):
|
||||||
self.name = k1.name + '<times>' + k2.name
|
self.name = k1.name + '<times>' + k2.name
|
||||||
self.k1 = k1
|
self.k1 = k1
|
||||||
self.k2 = k2
|
self.k2 = k2
|
||||||
|
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||||
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
|
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
"""return the value of the parameters."""
|
"""return the value of the parameters."""
|
||||||
return self.params
|
return np.hstack((self.k1._get_params(), self.k2._get_params()))
|
||||||
|
|
||||||
def _set_params(self,x):
|
def _set_params(self,x):
|
||||||
"""set the value of the parameters."""
|
"""set the value of the parameters."""
|
||||||
self.k1._set_params(x[:self.k1.Nparam])
|
self.k1._set_params(x[:self.k1.Nparam])
|
||||||
self.k2._set_params(x[self.k1.Nparam:])
|
self.k2._set_params(x[self.k1.Nparam:])
|
||||||
self.params = x
|
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
"""return parameter names."""
|
"""return parameter names."""
|
||||||
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
|
return [self.k1.name + '_' + param_name for param_name in self.k1._get_param_names()] + [self.k2.name + '_' + param_name for param_name in self.k2._get_param_names()]
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self,X,X2,target):
|
||||||
"""Compute the covariance matrix between X and X2."""
|
self._K_computations(X,X2)
|
||||||
if X2 is None: X2 = X
|
target += self._K1 * self._K2
|
||||||
target1 = np.zeros_like(target)
|
|
||||||
target2 = np.zeros_like(target)
|
|
||||||
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
|
|
||||||
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
|
|
||||||
target += target1 * target2
|
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to the parameters."""
|
"""derivative of the covariance matrix with respect to the parameters."""
|
||||||
if X2 is None: X2 = X
|
self._K_computations(X,X2)
|
||||||
K1 = np.zeros((X.shape[0],X2.shape[0]))
|
if X2 is None:
|
||||||
K2 = np.zeros((X.shape[0],X2.shape[0]))
|
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], None, target[:self.k1.Nparam])
|
||||||
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
|
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], None, target[self.k1.Nparam:])
|
||||||
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
|
else:
|
||||||
|
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
|
||||||
self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
|
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
|
||||||
self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||||
|
|
@ -75,14 +69,9 @@ class prod_orthogonal(kernpart):
|
||||||
|
|
||||||
def dK_dX(self,dL_dK,X,X2,target):
|
def dK_dX(self,dL_dK,X,X2,target):
|
||||||
"""derivative of the covariance matrix with respect to X."""
|
"""derivative of the covariance matrix with respect to X."""
|
||||||
if X2 is None: X2 = X
|
self._K_computations(X,X2)
|
||||||
K1 = np.zeros((X.shape[0],X2.shape[0]))
|
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
|
||||||
K2 = np.zeros((X.shape[0],X2.shape[0]))
|
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
|
||||||
self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
|
|
||||||
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
|
|
||||||
|
|
||||||
self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
|
|
||||||
self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
|
|
||||||
|
|
||||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||||
K1 = np.zeros(X.shape[0])
|
K1 = np.zeros(X.shape[0])
|
||||||
|
|
@ -93,3 +82,20 @@ class prod_orthogonal(kernpart):
|
||||||
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target)
|
self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target)
|
||||||
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target)
|
self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target)
|
||||||
|
|
||||||
|
def _K_computations(self,X,X2):
|
||||||
|
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||||
|
self._X = X.copy()
|
||||||
|
self._params == self._get_params().copy()
|
||||||
|
if X2 is None:
|
||||||
|
self._X2 = None
|
||||||
|
self._K1 = np.zeros((X.shape[0],X.shape[0]))
|
||||||
|
self._K2 = np.zeros((X.shape[0],X.shape[0]))
|
||||||
|
self.k1.K(X[:,:self.k1.D],None,self._K1)
|
||||||
|
self.k2.K(X[:,self.k1.D:],None,self._K2)
|
||||||
|
else:
|
||||||
|
self._X2 = X2.copy()
|
||||||
|
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||||
|
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||||
|
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
|
||||||
|
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from kernpart import kernpart
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import hashlib
|
import hashlib
|
||||||
from scipy import weave
|
from scipy import weave
|
||||||
|
from ..util.linalg import tdot
|
||||||
|
|
||||||
class rbf(kernpart):
|
class rbf(kernpart):
|
||||||
"""
|
"""
|
||||||
|
|
@ -74,11 +75,8 @@ class rbf(kernpart):
|
||||||
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
|
return ['variance']+['lengthscale_%i'%i for i in range(self.lengthscale.size)]
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self,X,X2,target):
|
||||||
if X2 is None:
|
|
||||||
X2 = X
|
|
||||||
|
|
||||||
self._K_computations(X,X2)
|
self._K_computations(X,X2)
|
||||||
np.add(self.variance*self._K_dvar, target,target)
|
target += self.variance*self._K_dvar
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
np.add(target,self.variance,target)
|
np.add(target,self.variance,target)
|
||||||
|
|
@ -87,6 +85,7 @@ class rbf(kernpart):
|
||||||
self._K_computations(X,X2)
|
self._K_computations(X,X2)
|
||||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||||
if self.ARD:
|
if self.ARD:
|
||||||
|
if X2 is None: X2 = X
|
||||||
[np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
|
[np.add(target[1+q:2+q],(self.variance/self.lengthscale[q]**3)*np.sum(self._K_dvar*dL_dK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.D)]
|
||||||
else:
|
else:
|
||||||
target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
|
target[1] += (self.variance/self.lengthscale)*np.sum(self._K_dvar*self._K_dist2*dL_dK)
|
||||||
|
|
@ -182,29 +181,31 @@ class rbf(kernpart):
|
||||||
#---------------------------------------#
|
#---------------------------------------#
|
||||||
|
|
||||||
def _K_computations(self,X,X2):
|
def _K_computations(self,X,X2):
|
||||||
if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
|
if not (np.array_equal(X,self._X) and np.array_equal(X2,self._X2) and np.array_equal(self._params , self._get_params())):
|
||||||
self._X = X.copy()
|
self._X = X.copy()
|
||||||
self._X2 = X2.copy()
|
|
||||||
self._params == self._get_params().copy()
|
self._params == self._get_params().copy()
|
||||||
if X2 is None: X2 = X
|
if X2 is None:
|
||||||
#never do this: self._K_dist = X[:,None,:]-X2[None,:,:] # this can be computationally heavy
|
self._X2 = None
|
||||||
#_K_dist = X[:,None,:]-X2[None,:,:]
|
X = X/self.lengthscale
|
||||||
#_K_dist2 = np.square(_K_dist/self.lengthscale)
|
Xsquare = np.sum(np.square(X),1)
|
||||||
X = X/self.lengthscale
|
self._K_dist2 = -2.*tdot(X) + (Xsquare[:,None] + Xsquare[None,:])
|
||||||
X2 = X2/self.lengthscale
|
else:
|
||||||
self._K_dist2 = (-2.*np.dot(X, X2.T) + np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
|
self._X2 = X2.copy()
|
||||||
|
X = X/self.lengthscale
|
||||||
|
X2 = X2/self.lengthscale
|
||||||
|
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X),1)[:,None] + np.sum(np.square(X2),1)[None,:])
|
||||||
self._K_dvar = np.exp(-0.5*self._K_dist2)
|
self._K_dvar = np.exp(-0.5*self._K_dist2)
|
||||||
|
|
||||||
def _psi_computations(self,Z,mu,S):
|
def _psi_computations(self,Z,mu,S):
|
||||||
#here are the "statistics" for psi1 and psi2
|
#here are the "statistics" for psi1 and psi2
|
||||||
if not np.all(Z==self._Z):
|
if not np.array_equal(Z, self._Z):
|
||||||
#Z has changed, compute Z specific stuff
|
#Z has changed, compute Z specific stuff
|
||||||
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
|
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
|
||||||
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
|
self._psi2_Zdist = 0.5*(Z[:,None,:]-Z[None,:,:]) # M,M,Q
|
||||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
|
self._psi2_Zdist_sq = np.square(self._psi2_Zdist/self.lengthscale) # M,M,Q
|
||||||
self._Z = Z
|
self._Z = Z
|
||||||
|
|
||||||
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
|
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
|
||||||
#something's changed. recompute EVERYTHING
|
#something's changed. recompute EVERYTHING
|
||||||
|
|
||||||
#psi1
|
#psi1
|
||||||
|
|
|
||||||
|
|
@ -30,17 +30,15 @@ class white(kernpart):
|
||||||
return ['variance']
|
return ['variance']
|
||||||
|
|
||||||
def K(self,X,X2,target):
|
def K(self,X,X2,target):
|
||||||
if X.shape==X2.shape:
|
if X2 is None:
|
||||||
if np.all(X==X2):
|
target += np.eye(X.shape[0])*self.variance
|
||||||
np.add(target,np.eye(X.shape[0])*self.variance,target)
|
|
||||||
|
|
||||||
def Kdiag(self,X,target):
|
def Kdiag(self,X,target):
|
||||||
target += self.variance
|
target += self.variance
|
||||||
|
|
||||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||||
if X.shape==X2.shape:
|
if X2 is None:
|
||||||
if np.all(X==X2):
|
target += np.trace(dL_dK)
|
||||||
target += np.trace(dL_dK)
|
|
||||||
|
|
||||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||||
target += np.sum(dL_dKdiag)
|
target += np.sum(dL_dKdiag)
|
||||||
|
|
|
||||||
|
|
@ -198,6 +198,7 @@ class EP(likelihood):
|
||||||
#Posterior distribution parameters update
|
#Posterior distribution parameters update
|
||||||
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
|
||||||
L = jitchol(LLT)
|
L = jitchol(LLT)
|
||||||
|
#cholUpdate(L,Kmn[:,i]*np.sqrt(Delta_tau))
|
||||||
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
|
V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
|
||||||
Sigma_diag = np.sum(V*V,-2)
|
Sigma_diag = np.sum(V*V,-2)
|
||||||
si = np.sum(V.T*V[:,i],-1)
|
si = np.sum(V.T*V[:,i],-1)
|
||||||
|
|
@ -250,6 +251,7 @@ class EP(likelihood):
|
||||||
R = R0.copy()
|
R = R0.copy()
|
||||||
Diag = Diag0.copy()
|
Diag = Diag0.copy()
|
||||||
Sigma_diag = Knn_diag
|
Sigma_diag = Knn_diag
|
||||||
|
RPT0 = np.dot(R0,P0.T)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Initial values - Cavity distribution parameters:
|
Initial values - Cavity distribution parameters:
|
||||||
|
|
@ -305,13 +307,7 @@ class EP(likelihood):
|
||||||
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
Iplus_Dprod_i = 1./(1.+ Diag0 * self.tau_tilde)
|
||||||
Diag = Diag0 * Iplus_Dprod_i
|
Diag = Diag0 * Iplus_Dprod_i
|
||||||
P = Iplus_Dprod_i[:,None] * P0
|
P = Iplus_Dprod_i[:,None] * P0
|
||||||
|
|
||||||
#Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
|
|
||||||
#P = (Diag / Diag0)[:,None] * P0
|
|
||||||
RPT0 = np.dot(R0,P0.T)
|
|
||||||
L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T))
|
L = jitchol(np.eye(M) + np.dot(RPT0,((1. - Iplus_Dprod_i)/Diag0)[:,None]*RPT0.T))
|
||||||
#L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Iplus_Dprod_i/Diag0)[:,None]*RPT0.T))
|
|
||||||
#L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
|
|
||||||
R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
|
R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
|
||||||
RPT = np.dot(R,P.T)
|
RPT = np.dot(R,P.T)
|
||||||
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
|
||||||
|
|
|
||||||
|
|
@ -2,19 +2,30 @@ import numpy as np
|
||||||
from likelihood import likelihood
|
from likelihood import likelihood
|
||||||
|
|
||||||
class Gaussian(likelihood):
|
class Gaussian(likelihood):
|
||||||
|
"""
|
||||||
|
Likelihood class for doing Expectation propagation
|
||||||
|
|
||||||
|
:param Y: observed output (Nx1 numpy.darray)
|
||||||
|
..Note:: Y values allowed depend on the likelihood_function used
|
||||||
|
:param variance :
|
||||||
|
:param normalize: whether to normalize the data before computing (predictions will be in original scales)
|
||||||
|
:type normalize: False|True
|
||||||
|
"""
|
||||||
def __init__(self,data,variance=1.,normalize=False):
|
def __init__(self,data,variance=1.,normalize=False):
|
||||||
self.is_heteroscedastic = False
|
self.is_heteroscedastic = False
|
||||||
self.Nparams = 1
|
self.Nparams = 1
|
||||||
self.Z = 0. # a correction factor which accounts for the approximation made
|
self.Z = 0. # a correction factor which accounts for the approximation made
|
||||||
N, self.D = data.shape
|
N, self.D = data.shape
|
||||||
|
|
||||||
#normaliztion
|
#normalization
|
||||||
if normalize:
|
if normalize:
|
||||||
self._mean = data.mean(0)[None,:]
|
self._bias = data.mean(0)[None,:]
|
||||||
self._std = data.std(0)[None,:]
|
self._scale = data.std(0)[None,:]
|
||||||
|
# Don't scale outputs which have zero variance to zero.
|
||||||
|
self._scale[np.nonzero(self._scale==0.)] = 1.0e-3
|
||||||
else:
|
else:
|
||||||
self._mean = np.zeros((1,self.D))
|
self._bias = np.zeros((1,self.D))
|
||||||
self._std = np.ones((1,self.D))
|
self._scale = np.ones((1,self.D))
|
||||||
|
|
||||||
self.set_data(data)
|
self.set_data(data)
|
||||||
|
|
||||||
|
|
@ -24,13 +35,13 @@ class Gaussian(likelihood):
|
||||||
self.data = data
|
self.data = data
|
||||||
self.N,D = data.shape
|
self.N,D = data.shape
|
||||||
assert D == self.D
|
assert D == self.D
|
||||||
self.Y = (self.data - self._mean)/self._std
|
self.Y = (self.data - self._bias)/self._scale
|
||||||
if D > self.N:
|
if D > self.N:
|
||||||
self.YYT = np.dot(self.Y,self.Y.T)
|
self.YYT = np.dot(self.Y,self.Y.T)
|
||||||
self.trYYT = np.trace(self.YYT)
|
self.trYYT = np.trace(self.YYT)
|
||||||
else:
|
else:
|
||||||
self.YYT = None
|
self.YYT = None
|
||||||
self.trYYT = None
|
self.trYYT = np.sum(np.square(self.Y))
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.asarray(self._variance)
|
return np.asarray(self._variance)
|
||||||
|
|
@ -47,19 +58,19 @@ class Gaussian(likelihood):
|
||||||
"""
|
"""
|
||||||
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
|
Un-normalize the prediction and add the likelihood variance, then return the 5%, 95% interval
|
||||||
"""
|
"""
|
||||||
mean = mu*self._std + self._mean
|
mean = mu*self._scale + self._bias
|
||||||
if full_cov:
|
if full_cov:
|
||||||
if self.D >1:
|
if self.D >1:
|
||||||
raise NotImplementedError, "TODO"
|
raise NotImplementedError, "TODO"
|
||||||
#Note. for D>1, we need to re-normalise all the outputs independently.
|
#Note. for D>1, we need to re-normalise all the outputs independently.
|
||||||
# This will mess up computations of diag(true_var), below.
|
# This will mess up computations of diag(true_var), below.
|
||||||
#note that the upper, lower quantiles should be the same shape as mean
|
#note that the upper, lower quantiles should be the same shape as mean
|
||||||
true_var = (var + np.eye(var.shape[0])*self._variance)*self._std**2
|
true_var = (var + np.eye(var.shape[0])*self._variance)*self._scale**2
|
||||||
_5pc = mean + - 2.*np.sqrt(np.diag(true_var))
|
_5pc = mean - 2.*np.sqrt(np.diag(true_var))
|
||||||
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
|
_95pc = mean + 2.*np.sqrt(np.diag(true_var))
|
||||||
else:
|
else:
|
||||||
true_var = (var + self._variance)*self._std**2
|
true_var = (var + self._variance)*self._scale**2
|
||||||
_5pc = mean + - 2.*np.sqrt(true_var)
|
_5pc = mean - 2.*np.sqrt(true_var)
|
||||||
_95pc = mean + 2.*np.sqrt(true_var)
|
_95pc = mean + 2.*np.sqrt(true_var)
|
||||||
return mean, true_var, _5pc, _95pc
|
return mean, true_var, _5pc, _95pc
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -53,9 +53,11 @@ class probit(likelihood_function):
|
||||||
mu = mu.flatten()
|
mu = mu.flatten()
|
||||||
var = var.flatten()
|
var = var.flatten()
|
||||||
mean = stats.norm.cdf(mu/np.sqrt(1+var))
|
mean = stats.norm.cdf(mu/np.sqrt(1+var))
|
||||||
p_025 = np.zeros(mu.shape)
|
norm_025 = [stats.norm.ppf(.025,m,v) for m,v in zip(mu,var)]
|
||||||
p_975 = np.ones(mu.shape)
|
norm_975 = [stats.norm.ppf(.975,m,v) for m,v in zip(mu,var)]
|
||||||
return mean, np.nan*var, p_025, p_975 # TODO: better values here (mean is okay)
|
p_025 = stats.norm.cdf(norm_025/np.sqrt(1+var))
|
||||||
|
p_975 = stats.norm.cdf(norm_975/np.sqrt(1+var))
|
||||||
|
return mean, np.nan*var, p_025, p_975 # TODO: var
|
||||||
|
|
||||||
class Poisson(likelihood_function):
|
class Poisson(likelihood_function):
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,10 @@ from sparse_GP import sparse_GP
|
||||||
from GPy.util.linalg import pdinv
|
from GPy.util.linalg import pdinv
|
||||||
from ..likelihoods import Gaussian
|
from ..likelihoods import Gaussian
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
from numpy.linalg.linalg import LinAlgError
|
||||||
|
import itertools
|
||||||
|
from matplotlib.colors import colorConverter
|
||||||
|
from matplotlib.figure import SubplotParams
|
||||||
|
|
||||||
class Bayesian_GPLVM(sparse_GP, GPLVM):
|
class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
"""
|
"""
|
||||||
|
|
@ -22,12 +26,14 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10, Z=None, kernel=None, **kwargs):
|
def __init__(self, Y, Q, X=None, X_variance=None, init='PCA', M=10,
|
||||||
|
Z=None, kernel=None, oldpsave=5, _debug=False,
|
||||||
|
**kwargs):
|
||||||
if X == None:
|
if X == None:
|
||||||
X = self.initialise_latent(init, Q, Y)
|
X = self.initialise_latent(init, Q, Y)
|
||||||
|
|
||||||
if X_variance is None:
|
if X_variance is None:
|
||||||
X_variance = np.ones_like(X) * 0.5
|
X_variance = np.clip((np.ones_like(X) * 0.5) + .01 * np.random.randn(*X.shape), 0.001, 1)
|
||||||
|
|
||||||
if Z is None:
|
if Z is None:
|
||||||
Z = np.random.permutation(X.copy())[:M]
|
Z = np.random.permutation(X.copy())[:M]
|
||||||
|
|
@ -36,9 +42,31 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(Q) + kern.white(Q)
|
kernel = kern.rbf(Q) + kern.white(Q)
|
||||||
|
|
||||||
|
self.oldpsave = oldpsave
|
||||||
|
self._oldps = []
|
||||||
|
self._debug = _debug
|
||||||
|
|
||||||
|
if self._debug:
|
||||||
|
self.f_call = 0
|
||||||
|
self._count = itertools.count()
|
||||||
|
self._savedklll = []
|
||||||
|
self._savedparams = []
|
||||||
|
self._savedgradients = []
|
||||||
|
self._savederrors = []
|
||||||
|
self._savedpsiKmm = []
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
|
sparse_GP.__init__(self, X, Gaussian(Y), kernel, Z=Z, X_variance=X_variance, **kwargs)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def oldps(self):
|
||||||
|
return self._oldps
|
||||||
|
@oldps.setter
|
||||||
|
def oldps(self, p):
|
||||||
|
if len(self._oldps) == (self.oldpsave + 1):
|
||||||
|
self._oldps.pop()
|
||||||
|
# if len(self._oldps) == 0 or not np.any([np.any(np.abs(p - op) > 1e-5) for op in self._oldps]):
|
||||||
|
self._oldps.insert(0, p.copy())
|
||||||
|
|
||||||
def _get_param_names(self):
|
def _get_param_names(self):
|
||||||
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
X_names = sum([['X_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
||||||
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
S_names = sum([['X_variance_%i_%i' % (n, q) for q in range(self.Q)] for n in range(self.N)], [])
|
||||||
|
|
@ -54,17 +82,26 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
===============================================================
|
===============================================================
|
||||||
|
|
||||||
"""
|
"""
|
||||||
return np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
x = np.hstack((self.X.flatten(), self.X_variance.flatten(), sparse_GP._get_params(self)))
|
||||||
|
return x
|
||||||
def _set_params(self, x):
|
|
||||||
N, Q = self.N, self.Q
|
|
||||||
self.X = x[:self.X.size].reshape(N, Q).copy()
|
|
||||||
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
|
|
||||||
sparse_GP._set_params(self, x[(2 * N * Q):])
|
|
||||||
|
|
||||||
|
def _set_params(self, x, save_old=True, save_count=0):
|
||||||
|
try:
|
||||||
|
N, Q = self.N, self.Q
|
||||||
|
self.X = x[:self.X.size].reshape(N, Q).copy()
|
||||||
|
self.X_variance = x[(N * Q):(2 * N * Q)].reshape(N, Q).copy()
|
||||||
|
sparse_GP._set_params(self, x[(2 * N * Q):])
|
||||||
|
self.oldps = x
|
||||||
|
except (LinAlgError, FloatingPointError, ZeroDivisionError):
|
||||||
|
print "\rWARNING: Caught LinAlgError, continueing without setting "
|
||||||
|
if self._debug:
|
||||||
|
self._savederrors.append(self.f_call)
|
||||||
|
if save_count > 10:
|
||||||
|
raise
|
||||||
|
self._set_params(self.oldps[-1], save_old=False, save_count=save_count + 1)
|
||||||
|
|
||||||
def dKL_dmuS(self):
|
def dKL_dmuS(self):
|
||||||
dKL_dS = (1. - (1. / self.X_variance)) * 0.5
|
dKL_dS = (1. - (1. / (self.X_variance))) * 0.5
|
||||||
dKL_dmu = self.X
|
dKL_dmu = self.X
|
||||||
return dKL_dmu, dKL_dS
|
return dKL_dmu, dKL_dS
|
||||||
|
|
||||||
|
|
@ -83,13 +120,40 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
|
return 0.5 * (var_mean + var_S) - 0.5 * self.Q * self.N
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
return sparse_GP.log_likelihood(self) - self.KL_divergence()
|
ll = sparse_GP.log_likelihood(self)
|
||||||
|
kl = self.KL_divergence()
|
||||||
|
|
||||||
|
# if ll < -2E4:
|
||||||
|
# ll = -2E4 + np.random.randn()
|
||||||
|
# if kl > 5E4:
|
||||||
|
# kl = 5E4 + np.random.randn()
|
||||||
|
|
||||||
|
if self._debug:
|
||||||
|
self.f_call = self._count.next()
|
||||||
|
if self.f_call % 1 == 0:
|
||||||
|
self._savedklll.append([self.f_call, ll, kl])
|
||||||
|
self._savedparams.append([self.f_call, self._get_params()])
|
||||||
|
self._savedgradients.append([self.f_call, self._log_likelihood_gradients()])
|
||||||
|
self._savedpsiKmm.append([self.f_call, [self.Kmm, self.dL_dKmm]])
|
||||||
|
# print "\nkl:", kl, "ll:", ll
|
||||||
|
return ll - kl
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
dKL_dmu, dKL_dS = self.dKL_dmuS()
|
||||||
dL_dmu, dL_dS = self.dL_dmuS()
|
dL_dmu, dL_dS = self.dL_dmuS()
|
||||||
# TODO: find way to make faster
|
# TODO: find way to make faster
|
||||||
dbound_dmuS = np.hstack(((dL_dmu - dKL_dmu).flatten(), (dL_dS - dKL_dS).flatten()))
|
|
||||||
|
d_dmu = (dL_dmu - dKL_dmu).flatten()
|
||||||
|
d_dS = (dL_dS - dKL_dS).flatten()
|
||||||
|
# TEST KL: ====================
|
||||||
|
# d_dmu = (dKL_dmu).flatten()
|
||||||
|
# d_dS = (dKL_dS).flatten()
|
||||||
|
# ========================
|
||||||
|
# TEST L: ====================
|
||||||
|
# d_dmu = (dL_dmu).flatten()
|
||||||
|
# d_dS = (dL_dS).flatten()
|
||||||
|
# ========================
|
||||||
|
dbound_dmuS = np.hstack((d_dmu, d_dS))
|
||||||
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
|
return np.hstack((dbound_dmuS.flatten(), sparse_GP._log_likelihood_gradients(self)))
|
||||||
|
|
||||||
def plot_latent(self, which_indices=None, *args, **kwargs):
|
def plot_latent(self, which_indices=None, *args, **kwargs):
|
||||||
|
|
@ -104,3 +168,288 @@ class Bayesian_GPLVM(sparse_GP, GPLVM):
|
||||||
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
|
ax = GPLVM.plot_latent(self, which_indices=[input_1, input_2], *args, **kwargs)
|
||||||
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
|
ax.plot(self.Z[:, input_1], self.Z[:, input_2], '^w')
|
||||||
return ax
|
return ax
|
||||||
|
|
||||||
|
def plot_X_1d(self, fig=None, axes=None, fig_num="MRD X 1d", colors=None):
|
||||||
|
"""
|
||||||
|
Plot latent space X in 1D:
|
||||||
|
|
||||||
|
-if fig is given, create Q subplots in fig and plot in these
|
||||||
|
-if axes is given plot Q 1D latent space plots of X into each `axis`
|
||||||
|
-if neither fig nor axes is given create a figure with fig_num and plot in there
|
||||||
|
|
||||||
|
colors:
|
||||||
|
|
||||||
|
colors of different latent space dimensions Q
|
||||||
|
"""
|
||||||
|
import pylab
|
||||||
|
if fig is None and axes is None:
|
||||||
|
fig = pylab.figure(num=fig_num, figsize=(8, min(12, (2 * self.X.shape[1]))))
|
||||||
|
if colors is None:
|
||||||
|
colors = pylab.gca()._get_lines.color_cycle
|
||||||
|
pylab.clf()
|
||||||
|
else:
|
||||||
|
colors = iter(colors)
|
||||||
|
plots = []
|
||||||
|
for i in range(self.X.shape[1]):
|
||||||
|
if axes is None:
|
||||||
|
ax = fig.add_subplot(self.X.shape[1], 1, i + 1)
|
||||||
|
else:
|
||||||
|
ax = axes[i]
|
||||||
|
ax.plot(self.X, c='k', alpha=.3)
|
||||||
|
plots.extend(ax.plot(self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{}}}$".format(i)))
|
||||||
|
ax.fill_between(np.arange(self.X.shape[0]),
|
||||||
|
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
|
||||||
|
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
|
||||||
|
facecolor=plots[-1].get_color(),
|
||||||
|
alpha=.3)
|
||||||
|
ax.legend(borderaxespad=0.)
|
||||||
|
if i < self.X.shape[1] - 1:
|
||||||
|
ax.set_xticklabels('')
|
||||||
|
pylab.draw()
|
||||||
|
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||||
|
return fig
|
||||||
|
|
||||||
|
def _debug_filter_params(self, x):
|
||||||
|
start, end = 0, self.X.size,
|
||||||
|
X = x[start:end].reshape(self.N, self.Q)
|
||||||
|
start, end = end, end + self.X_variance.size
|
||||||
|
X_v = x[start:end].reshape(self.N, self.Q)
|
||||||
|
start, end = end, end + (self.M * self.Q)
|
||||||
|
Z = x[start:end].reshape(self.M, self.Q)
|
||||||
|
start, end = end, end + self.Q
|
||||||
|
theta = x[start:]
|
||||||
|
return X, X_v, Z, theta
|
||||||
|
|
||||||
|
|
||||||
|
def _debug_get_axis(self, figs):
|
||||||
|
if figs[-1].axes:
|
||||||
|
ax1 = figs[-1].axes[0]
|
||||||
|
ax1.cla()
|
||||||
|
else:
|
||||||
|
ax1 = figs[-1].add_subplot(111)
|
||||||
|
return ax1
|
||||||
|
|
||||||
|
def _debug_plot(self):
|
||||||
|
assert self._debug, "must enable _debug, to debug-plot"
|
||||||
|
import pylab
|
||||||
|
# from mpl_toolkits.mplot3d import Axes3D
|
||||||
|
figs = [pylab.figure('BGPLVM DEBUG', figsize=(12, 4))]
|
||||||
|
# fig.clf()
|
||||||
|
|
||||||
|
# log like
|
||||||
|
# splotshape = (6, 4)
|
||||||
|
# ax1 = pylab.subplot2grid(splotshape, (0, 0), 1, 4)
|
||||||
|
ax1 = self._debug_get_axis(figs)
|
||||||
|
ax1.text(.5, .5, "Optimization", alpha=.3, transform=ax1.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
kllls = np.array(self._savedklll)
|
||||||
|
LL, = ax1.plot(kllls[:, 0], kllls[:, 1] - kllls[:, 2], '-', label=r'$\log p(\mathbf{Y})$', mew=1.5)
|
||||||
|
KL, = ax1.plot(kllls[:, 0], kllls[:, 2], '-', label=r'$\mathcal{KL}(p||q)$', mew=1.5)
|
||||||
|
L, = ax1.plot(kllls[:, 0], kllls[:, 1], '-', label=r'$L$', mew=1.5) # \mathds{E}_{q(\mathbf{X})}[p(\mathbf{Y|X})\frac{p(\mathbf{X})}{q(\mathbf{X})}]
|
||||||
|
|
||||||
|
param_dict = dict(self._savedparams)
|
||||||
|
gradient_dict = dict(self._savedgradients)
|
||||||
|
kmm_dict = dict(self._savedpsiKmm)
|
||||||
|
iters = np.array(param_dict.keys())
|
||||||
|
self.showing = 0
|
||||||
|
|
||||||
|
# ax2 = pylab.subplot2grid(splotshape, (1, 0), 2, 4)
|
||||||
|
figs.append(pylab.figure("BGPLVM DEBUG X", figsize=(12, 4)))
|
||||||
|
ax2 = self._debug_get_axis(figs)
|
||||||
|
ax2.text(.5, .5, r"$\mathbf{X}$", alpha=.5, transform=ax2.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
figs[-1].canvas.draw()
|
||||||
|
figs[-1].tight_layout(rect=(0, 0, 1, .86))
|
||||||
|
# ax3 = pylab.subplot2grid(splotshape, (3, 0), 2, 4, sharex=ax2)
|
||||||
|
figs.append(pylab.figure("BGPLVM DEBUG S", figsize=(12, 4)))
|
||||||
|
ax3 = self._debug_get_axis(figs)
|
||||||
|
ax3.text(.5, .5, r"$\mathbf{S}$", alpha=.5, transform=ax3.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
figs[-1].canvas.draw()
|
||||||
|
figs[-1].tight_layout(rect=(0, 0, 1, .86))
|
||||||
|
# ax4 = pylab.subplot2grid(splotshape, (5, 0), 2, 2)
|
||||||
|
figs.append(pylab.figure("BGPLVM DEBUG Z", figsize=(6, 4)))
|
||||||
|
ax4 = self._debug_get_axis(figs)
|
||||||
|
ax4.text(.5, .5, r"$\mathbf{Z}$", alpha=.5, transform=ax4.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
figs[-1].canvas.draw()
|
||||||
|
figs[-1].tight_layout(rect=(0, 0, 1, .86))
|
||||||
|
# ax5 = pylab.subplot2grid(splotshape, (5, 2), 2, 2)
|
||||||
|
figs.append(pylab.figure("BGPLVM DEBUG theta", figsize=(6, 4)))
|
||||||
|
ax5 = self._debug_get_axis(figs)
|
||||||
|
ax5.text(.5, .5, r"${\theta}$", alpha=.5, transform=ax5.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
figs[-1].canvas.draw()
|
||||||
|
figs[-1].tight_layout(rect=(.15, 0, 1, .86))
|
||||||
|
figs.append(pylab.figure("BGPLVM DEBUG Kmm", figsize=(12, 6)))
|
||||||
|
fig = figs[-1]
|
||||||
|
ax6 = fig.add_subplot(121)
|
||||||
|
ax6.text(.5, .5, r"${\mathbf{K}_{mm}}$", color='magenta', alpha=.5, transform=ax6.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
ax7 = fig.add_subplot(122)
|
||||||
|
ax7.text(.5, .5, r"${\frac{dL}{dK_{mm}}}$", color='magenta', alpha=.5, transform=ax7.transAxes,
|
||||||
|
ha='center', va='center')
|
||||||
|
|
||||||
|
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
|
||||||
|
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
|
||||||
|
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
|
||||||
|
|
||||||
|
quiver_units = 'xy'
|
||||||
|
quiver_scale = 1
|
||||||
|
quiver_scale_units = 'xy'
|
||||||
|
Xlatentplts = ax2.plot(X, ls="-", marker="x")
|
||||||
|
colors = colorConverter.to_rgba_array([p.get_color() for p in Xlatentplts], .4)
|
||||||
|
Ulatent = np.zeros_like(X)
|
||||||
|
xlatent = np.tile(np.arange(0, X.shape[0])[:, None], X.shape[1])
|
||||||
|
Xlatentgrads = ax2.quiver(xlatent, X, Ulatent, Xg, color=colors,
|
||||||
|
units=quiver_units, scale_units=quiver_scale_units,
|
||||||
|
scale=quiver_scale)
|
||||||
|
|
||||||
|
Slatentplts = ax3.plot(S, ls="-", marker="x")
|
||||||
|
Slatentgrads = ax3.quiver(xlatent, S, Ulatent, Sg, color=colors,
|
||||||
|
units=quiver_units, scale_units=quiver_scale_units,
|
||||||
|
scale=quiver_scale)
|
||||||
|
ax3.set_ylim(0, 1.)
|
||||||
|
|
||||||
|
xZ = np.tile(np.arange(0, Z.shape[0])[:, None], Z.shape[1])
|
||||||
|
UZ = np.zeros_like(Z)
|
||||||
|
Zplts = ax4.plot(Z, ls="-", marker="x")
|
||||||
|
Zgrads = ax4.quiver(xZ, Z, UZ, Zg, color=colors,
|
||||||
|
units=quiver_units, scale_units=quiver_scale_units,
|
||||||
|
scale=quiver_scale)
|
||||||
|
|
||||||
|
xtheta = np.arange(len(theta))
|
||||||
|
Utheta = np.zeros_like(theta)
|
||||||
|
thetaplts = ax5.bar(xtheta - .4, theta, color=colors)
|
||||||
|
thetagrads = ax5.quiver(xtheta, theta, Utheta, thetag, color=colors,
|
||||||
|
units=quiver_units, scale_units=quiver_scale_units,
|
||||||
|
scale=quiver_scale,
|
||||||
|
edgecolors=('k',), linewidths=[1])
|
||||||
|
pylab.setp(thetaplts, zorder=0)
|
||||||
|
pylab.setp(thetagrads, zorder=10)
|
||||||
|
ax5.set_xticks(np.arange(len(theta)))
|
||||||
|
ax5.set_xticklabels(self._get_param_names()[-len(theta):], rotation=17)
|
||||||
|
|
||||||
|
imkmm = ax6.imshow(kmm_dict[self.showing][0])
|
||||||
|
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
||||||
|
divider = make_axes_locatable(ax6)
|
||||||
|
caxkmm = divider.append_axes("right", "5%", pad="1%")
|
||||||
|
cbarkmm = pylab.colorbar(imkmm, cax=caxkmm)
|
||||||
|
|
||||||
|
imkmmdl = ax7.imshow(kmm_dict[self.showing][1])
|
||||||
|
divider = make_axes_locatable(ax7)
|
||||||
|
caxkmmdl = divider.append_axes("right", "5%", pad="1%")
|
||||||
|
cbarkmmdl = pylab.colorbar(imkmmdl, cax=caxkmmdl)
|
||||||
|
|
||||||
|
# Qleg = ax1.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||||
|
# loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.15, 1, 1.15),
|
||||||
|
# borderaxespad=0, mode="expand")
|
||||||
|
ax2.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||||
|
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||||
|
borderaxespad=0, mode="expand")
|
||||||
|
ax3.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||||
|
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||||
|
borderaxespad=0, mode="expand")
|
||||||
|
ax4.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||||
|
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||||
|
borderaxespad=0, mode="expand")
|
||||||
|
ax5.legend(Xlatentplts, [r"$Q_{}$".format(i + 1) for i in range(self.Q)],
|
||||||
|
loc=3, ncol=self.Q, bbox_to_anchor=(0, 1.1, 1, 1.1),
|
||||||
|
borderaxespad=0, mode="expand")
|
||||||
|
Lleg = ax1.legend()
|
||||||
|
Lleg.draggable()
|
||||||
|
# ax1.add_artist(Qleg)
|
||||||
|
|
||||||
|
indicatorKL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 2], 'o', c=KL.get_color())
|
||||||
|
indicatorLL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1] - kllls[self.showing, 2], 'o', c=LL.get_color())
|
||||||
|
indicatorL, = ax1.plot(kllls[self.showing, 0], kllls[self.showing, 1], 'o', c=L.get_color())
|
||||||
|
# for err in self._savederrors:
|
||||||
|
# if err < kllls.shape[0]:
|
||||||
|
# ax1.scatter(kllls[err, 0], kllls[err, 2], s=50, marker=(5, 2), c=KL.get_color())
|
||||||
|
# ax1.scatter(kllls[err, 0], kllls[err, 1] - kllls[err, 2], s=50, marker=(5, 2), c=LL.get_color())
|
||||||
|
# ax1.scatter(kllls[err, 0], kllls[err, 1], s=50, marker=(5, 2), c=L.get_color())
|
||||||
|
|
||||||
|
# try:
|
||||||
|
# for f in figs:
|
||||||
|
# f.canvas.draw()
|
||||||
|
# f.tight_layout(box=(0, .15, 1, .9))
|
||||||
|
# # pylab.draw()
|
||||||
|
# # pylab.tight_layout(box=(0, .1, 1, .9))
|
||||||
|
# except:
|
||||||
|
# pass
|
||||||
|
|
||||||
|
# parameter changes
|
||||||
|
# ax2 = pylab.subplot2grid((4, 1), (1, 0), 3, 1, projection='3d')
|
||||||
|
button_options = [0, 0] # [0]: clicked -- [1]: dragged
|
||||||
|
|
||||||
|
def update_plots(event):
|
||||||
|
if button_options[0] and not button_options[1]:
|
||||||
|
# event.button, event.x, event.y, event.xdata, event.ydata)
|
||||||
|
tmp = np.abs(iters - event.xdata)
|
||||||
|
closest_hit = iters[tmp == tmp.min()][0]
|
||||||
|
|
||||||
|
if closest_hit != self.showing:
|
||||||
|
self.showing = closest_hit
|
||||||
|
# print closest_hit, iters, event.xdata
|
||||||
|
|
||||||
|
indicatorLL.set_data(self.showing, kllls[self.showing, 1] - kllls[self.showing, 2])
|
||||||
|
indicatorKL.set_data(self.showing, kllls[self.showing, 2])
|
||||||
|
indicatorL.set_data(self.showing, kllls[self.showing, 1])
|
||||||
|
|
||||||
|
X, S, Z, theta = self._debug_filter_params(param_dict[self.showing])
|
||||||
|
Xg, Sg, Zg, thetag = self._debug_filter_params(gradient_dict[self.showing])
|
||||||
|
# Xg, Sg, Zg, thetag = -Xg, -Sg, -Zg, -thetag
|
||||||
|
|
||||||
|
for i, Xlatent in enumerate(Xlatentplts):
|
||||||
|
Xlatent.set_ydata(X[:, i])
|
||||||
|
Xlatentgrads.set_offsets(np.array([xlatent.ravel(), X.ravel()]).T)
|
||||||
|
Xlatentgrads.set_UVC(Ulatent, Xg)
|
||||||
|
|
||||||
|
for i, Slatent in enumerate(Slatentplts):
|
||||||
|
Slatent.set_ydata(S[:, i])
|
||||||
|
Slatentgrads.set_offsets(np.array([xlatent.ravel(), S.ravel()]).T)
|
||||||
|
Slatentgrads.set_UVC(Ulatent, Sg)
|
||||||
|
|
||||||
|
for i, Zlatent in enumerate(Zplts):
|
||||||
|
Zlatent.set_ydata(Z[:, i])
|
||||||
|
Zgrads.set_offsets(np.array([xZ.ravel(), Z.ravel()]).T)
|
||||||
|
Zgrads.set_UVC(UZ, Zg)
|
||||||
|
|
||||||
|
for p, t in zip(thetaplts, theta):
|
||||||
|
p.set_height(t)
|
||||||
|
thetagrads.set_offsets(np.array([xtheta.ravel(), theta.ravel()]).T)
|
||||||
|
thetagrads.set_UVC(Utheta, thetag)
|
||||||
|
|
||||||
|
imkmm.set_data(kmm_dict[self.showing][0])
|
||||||
|
imkmm.autoscale()
|
||||||
|
cbarkmm.update_normal(imkmm)
|
||||||
|
|
||||||
|
imkmmdl.set_data(kmm_dict[self.showing][1])
|
||||||
|
imkmmdl.autoscale()
|
||||||
|
cbarkmmdl.update_normal(imkmmdl)
|
||||||
|
|
||||||
|
ax2.relim()
|
||||||
|
# ax3.relim()
|
||||||
|
ax4.relim()
|
||||||
|
ax5.relim()
|
||||||
|
ax2.autoscale()
|
||||||
|
# ax3.autoscale()
|
||||||
|
ax4.autoscale()
|
||||||
|
ax5.autoscale()
|
||||||
|
|
||||||
|
[fig.canvas.draw() for fig in figs]
|
||||||
|
button_options[0] = 0
|
||||||
|
button_options[1] = 0
|
||||||
|
|
||||||
|
def onclick(event):
|
||||||
|
if event.inaxes is ax1 and event.button == 1:
|
||||||
|
button_options[0] = 1
|
||||||
|
def motion(event):
|
||||||
|
if button_options[0]:
|
||||||
|
button_options[1] = 1
|
||||||
|
|
||||||
|
cidr = figs[0].canvas.mpl_connect('button_release_event', update_plots)
|
||||||
|
cidp = figs[0].canvas.mpl_connect('button_press_event', onclick)
|
||||||
|
cidd = figs[0].canvas.mpl_connect('motion_notify_event', motion)
|
||||||
|
|
||||||
|
return ax1, ax2, ax3, ax4, ax5, ax6, ax7
|
||||||
|
|
|
||||||
222
GPy/models/GP.py
222
GPy/models/GP.py
|
|
@ -6,8 +6,8 @@ import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from ..core import model
|
from ..core import model
|
||||||
from ..util.linalg import pdinv,mdot
|
from ..util.linalg import pdinv, mdot
|
||||||
from ..util.plot import gpplot,x_frame1D,x_frame2D, Tango
|
from ..util.plot import gpplot, x_frame1D, x_frame2D, Tango
|
||||||
from ..likelihoods import EP
|
from ..likelihoods import EP
|
||||||
|
|
||||||
class GP(model):
|
class GP(model):
|
||||||
|
|
@ -19,9 +19,6 @@ class GP(model):
|
||||||
:parm likelihood: a GPy likelihood
|
:parm likelihood: a GPy likelihood
|
||||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
|
||||||
:type normalize_Y: False|True
|
|
||||||
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
|
||||||
:rtype: model object
|
:rtype: model object
|
||||||
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
|
:param epsilon_ep: convergence criterion for the Expectation Propagation algorithm, defaults to 0.1
|
||||||
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
|
:param powerep: power-EP parameters [$\eta$,$\delta$], defaults to [1.,1.]
|
||||||
|
|
@ -30,33 +27,31 @@ class GP(model):
|
||||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, X, likelihood, kernel, normalize_X=False, Xslices=None):
|
def __init__(self, X, likelihood, kernel, normalize_X=False):
|
||||||
|
|
||||||
# parse arguments
|
# parse arguments
|
||||||
self.Xslices = Xslices
|
|
||||||
self.X = X
|
self.X = X
|
||||||
assert len(self.X.shape)==2
|
assert len(self.X.shape) == 2
|
||||||
self.N, self.Q = self.X.shape
|
self.N, self.Q = self.X.shape
|
||||||
assert isinstance(kernel, kern.kern)
|
assert isinstance(kernel, kern.kern)
|
||||||
self.kern = kernel
|
self.kern = kernel
|
||||||
|
|
||||||
#here's some simple normalization for the inputs
|
|
||||||
if normalize_X:
|
|
||||||
self._Xmean = X.mean(0)[None,:]
|
|
||||||
self._Xstd = X.std(0)[None,:]
|
|
||||||
self.X = (X.copy() - self._Xmean) / self._Xstd
|
|
||||||
if hasattr(self,'Z'):
|
|
||||||
self.Z = (self.Z - self._Xmean) / self._Xstd
|
|
||||||
else:
|
|
||||||
self._Xmean = np.zeros((1,self.X.shape[1]))
|
|
||||||
self._Xstd = np.ones((1,self.X.shape[1]))
|
|
||||||
|
|
||||||
self.likelihood = likelihood
|
self.likelihood = likelihood
|
||||||
#assert self.X.shape[0] == self.likelihood.Y.shape[0]
|
|
||||||
#self.N, self.D = self.likelihood.Y.shape
|
|
||||||
assert self.X.shape[0] == self.likelihood.data.shape[0]
|
assert self.X.shape[0] == self.likelihood.data.shape[0]
|
||||||
self.N, self.D = self.likelihood.data.shape
|
self.N, self.D = self.likelihood.data.shape
|
||||||
|
|
||||||
|
# here's some simple normalization for the inputs
|
||||||
|
if normalize_X:
|
||||||
|
self._Xmean = X.mean(0)[None, :]
|
||||||
|
self._Xstd = X.std(0)[None, :]
|
||||||
|
self.X = (X.copy() - self._Xmean) / self._Xstd
|
||||||
|
if hasattr(self, 'Z'):
|
||||||
|
self.Z = (self.Z - self._Xmean) / self._Xstd
|
||||||
|
else:
|
||||||
|
self._Xmean = np.zeros((1, self.X.shape[1]))
|
||||||
|
self._Xstd = np.ones((1, self.X.shape[1]))
|
||||||
|
|
||||||
|
if not hasattr(self,'has_uncertain_inputs'):
|
||||||
|
self.has_uncertain_inputs = False
|
||||||
model.__init__(self)
|
model.__init__(self)
|
||||||
|
|
||||||
def dL_dZ(self):
|
def dL_dZ(self):
|
||||||
|
|
@ -65,24 +60,24 @@ class GP(model):
|
||||||
"""
|
"""
|
||||||
return np.zeros_like(self.Z)
|
return np.zeros_like(self.Z)
|
||||||
|
|
||||||
def _set_params(self,p):
|
def _set_params(self, p):
|
||||||
self.kern._set_params_transformed(p[:self.kern.Nparam])
|
self.kern._set_params_transformed(p[:self.kern.Nparam_transformed()])
|
||||||
#self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
|
# self.likelihood._set_params(p[self.kern.Nparam:]) # test by Nicolas
|
||||||
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
|
self.likelihood._set_params(p[self.kern.Nparam_transformed():]) # test by Nicolas
|
||||||
|
|
||||||
|
|
||||||
self.K = self.kern.K(self.X,slices1=self.Xslices,slices2=self.Xslices)
|
self.K = self.kern.K(self.X)
|
||||||
self.K += self.likelihood.covariance_matrix
|
self.K += self.likelihood.covariance_matrix
|
||||||
|
|
||||||
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
self.Ki, self.L, self.Li, self.K_logdet = pdinv(self.K)
|
||||||
|
|
||||||
#the gradient of the likelihood wrt the covariance matrix
|
# the gradient of the likelihood wrt the covariance matrix
|
||||||
if self.likelihood.YYT is None:
|
if self.likelihood.YYT is None:
|
||||||
alpha = np.dot(self.Ki,self.likelihood.Y)
|
alpha = np.dot(self.Ki, self.likelihood.Y)
|
||||||
self.dL_dK = 0.5*(np.dot(alpha,alpha.T)-self.D*self.Ki)
|
self.dL_dK = 0.5 * (np.dot(alpha, alpha.T) - self.D * self.Ki)
|
||||||
else:
|
else:
|
||||||
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
tmp = mdot(self.Ki, self.likelihood.YYT, self.Ki)
|
||||||
self.dL_dK = 0.5*(tmp - self.D*self.Ki)
|
self.dL_dK = 0.5 * (tmp - self.D * self.Ki)
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
return np.hstack((self.kern._get_params_transformed(), self.likelihood._get_params()))
|
||||||
|
|
@ -94,20 +89,20 @@ class GP(model):
|
||||||
"""
|
"""
|
||||||
Approximates a non-gaussian likelihood using Expectation Propagation
|
Approximates a non-gaussian likelihood using Expectation Propagation
|
||||||
|
|
||||||
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
|
For a Gaussian likelihood, no iteration is required:
|
||||||
this function does nothing
|
this function does nothing
|
||||||
"""
|
"""
|
||||||
self.likelihood.fit_full(self.kern.K(self.X))
|
self.likelihood.fit_full(self.kern.K(self.X))
|
||||||
self._set_params(self._get_params()) # update the GP
|
self._set_params(self._get_params()) # update the GP
|
||||||
|
|
||||||
def _model_fit_term(self):
|
def _model_fit_term(self):
|
||||||
"""
|
"""
|
||||||
Computes the model fit using YYT if it's available
|
Computes the model fit using YYT if it's available
|
||||||
"""
|
"""
|
||||||
if self.likelihood.YYT is None:
|
if self.likelihood.YYT is None:
|
||||||
return -0.5*np.sum(np.square(np.dot(self.Li,self.likelihood.Y)))
|
return -0.5 * np.sum(np.square(np.dot(self.Li, self.likelihood.Y)))
|
||||||
else:
|
else:
|
||||||
return -0.5*np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
return -0.5 * np.sum(np.multiply(self.Ki, self.likelihood.YYT))
|
||||||
|
|
||||||
def log_likelihood(self):
|
def log_likelihood(self):
|
||||||
"""
|
"""
|
||||||
|
|
@ -117,38 +112,40 @@ class GP(model):
|
||||||
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
model for a new variable Y* = v_tilde/tau_tilde, with a covariance
|
||||||
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
matrix K* = K + diag(1./tau_tilde) plus a normalization term.
|
||||||
"""
|
"""
|
||||||
return -0.5*self.D*self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
return -0.5 * self.D * self.K_logdet + self._model_fit_term() + self.likelihood.Z
|
||||||
|
|
||||||
|
|
||||||
def _log_likelihood_gradients(self):
|
def _log_likelihood_gradients(self):
|
||||||
"""
|
"""
|
||||||
The gradient of all parameters.
|
The gradient of all parameters.
|
||||||
|
|
||||||
For the kernel parameters, use the chain rule via dL_dK
|
Note, we use the chain rule: dL_dtheta = dL_dK * d_K_dtheta
|
||||||
|
|
||||||
For the likelihood parameters, pass in alpha = K^-1 y
|
|
||||||
"""
|
"""
|
||||||
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK, X=self.X), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
|
||||||
|
|
||||||
def _raw_predict(self,_Xnew,slices=None, full_cov=False):
|
def _raw_predict(self, _Xnew, which_parts='all', full_cov=False):
|
||||||
"""
|
"""
|
||||||
Internal helper function for making predictions, does not account
|
Internal helper function for making predictions, does not account
|
||||||
for normalization or likelihood
|
for normalization or likelihood
|
||||||
|
|
||||||
|
#TODO: which_parts does nothing
|
||||||
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Kx = self.kern.K(self.X,_Xnew, slices1=self.Xslices,slices2=slices)
|
Kx = self.kern.K(self.X, _Xnew,which_parts=which_parts)
|
||||||
mu = np.dot(np.dot(Kx.T,self.Ki),self.likelihood.Y)
|
mu = np.dot(np.dot(Kx.T, self.Ki), self.likelihood.Y)
|
||||||
KiKx = np.dot(self.Ki,Kx)
|
KiKx = np.dot(self.Ki, Kx)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(_Xnew, slices1=slices,slices2=slices)
|
Kxx = self.kern.K(_Xnew, which_parts=which_parts)
|
||||||
var = Kxx - np.dot(KiKx.T,Kx)
|
var = Kxx - np.dot(KiKx.T, Kx)
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(_Xnew, slices=slices)
|
Kxx = self.kern.Kdiag(_Xnew, which_parts=which_parts)
|
||||||
var = Kxx - np.sum(np.multiply(KiKx,Kx),0)
|
var = Kxx - np.sum(np.multiply(KiKx, Kx), 0)
|
||||||
var = var[:,None]
|
var = var[:, None]
|
||||||
return mu, var
|
return mu, var
|
||||||
|
|
||||||
|
|
||||||
def predict(self,Xnew, slices=None, full_cov=False):
|
def predict(self, Xnew, which_parts='all', full_cov=False):
|
||||||
"""
|
"""
|
||||||
Predict the function(s) at the new point(s) Xnew.
|
Predict the function(s) at the new point(s) Xnew.
|
||||||
|
|
||||||
|
|
@ -156,35 +153,30 @@ class GP(model):
|
||||||
---------
|
---------
|
||||||
:param Xnew: The points at which to make a prediction
|
:param Xnew: The points at which to make a prediction
|
||||||
:type Xnew: np.ndarray, Nnew x self.Q
|
:type Xnew: np.ndarray, Nnew x self.Q
|
||||||
:param slices: specifies which outputs kernel(s) the Xnew correspond to (see below)
|
:param which_parts: specifies which outputs kernel(s) to use in prediction
|
||||||
:type slices: (None, list of slice objects, list of ints)
|
:type which_parts: ('all', list of bools)
|
||||||
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
:param full_cov: whether to return the folll covariance matrix, or just the diagonal
|
||||||
:type full_cov: bool
|
:type full_cov: bool
|
||||||
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
:rtype: posterior mean, a Numpy array, Nnew x self.D
|
||||||
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
:rtype: posterior variance, a Numpy array, Nnew x 1 if full_cov=False, Nnew x Nnew otherwise
|
||||||
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
|
:rtype: lower and upper boundaries of the 95% confidence intervals, Numpy arrays, Nnew x self.D
|
||||||
|
|
||||||
.. Note:: "slices" specifies how the the points X_new co-vary wich the training points.
|
|
||||||
|
|
||||||
- If None, the new points covary throigh every kernel part (default)
|
|
||||||
- If a list of slices, the i^th slice specifies which data are affected by the i^th kernel part
|
|
||||||
- If a list of booleans, specifying which kernel parts are active
|
|
||||||
|
|
||||||
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
If full_cov and self.D > 1, the return shape of var is Nnew x Nnew x self.D. If self.D == 1, the return shape is Nnew x Nnew.
|
||||||
This is to allow for different normalizations of the output dimensions.
|
This is to allow for different normalizations of the output dimensions.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
#normalize X values
|
# normalize X values
|
||||||
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
Xnew = (Xnew.copy() - self._Xmean) / self._Xstd
|
||||||
mu, var = self._raw_predict(Xnew, slices, full_cov)
|
mu, var = self._raw_predict(Xnew, which_parts, full_cov)
|
||||||
|
|
||||||
#now push through likelihood TODO
|
# now push through likelihood
|
||||||
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
mean, var, _025pm, _975pm = self.likelihood.predictive_values(mu, var, full_cov)
|
||||||
|
|
||||||
return mean, var, _025pm, _975pm
|
return mean, var, _025pm, _975pm
|
||||||
|
|
||||||
|
|
||||||
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_functions='all', resolution=None, full_cov=False):
|
def plot_f(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, full_cov=False):
|
||||||
"""
|
"""
|
||||||
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
|
Plot the GP's view of the world, where the data is normalized and the likelihood is Gaussian
|
||||||
|
|
||||||
|
|
@ -192,8 +184,8 @@ class GP(model):
|
||||||
:param which_data: which if the training data to plot (default all)
|
:param which_data: which if the training data to plot (default all)
|
||||||
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
:type which_data: 'all' or a slice object to slice self.X, self.Y
|
||||||
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
:param plot_limits: The limits of the plot. If 1D [xmin,xmax], if 2D [[xmin,ymin],[xmax,ymax]]. Defaluts to data limits
|
||||||
:param which_functions: which of the kernel functions to plot (additively)
|
:param which_parts: which of the kernel functions to plot (additively)
|
||||||
:type which_functions: list of bools
|
:type which_parts: 'all', or list of bools
|
||||||
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
:param resolution: the number of intervals to sample the GP on. Defaults to 200 in 1D and 50 (a 50x50 grid) in 2D
|
||||||
|
|
||||||
Plot the posterior of the GP.
|
Plot the posterior of the GP.
|
||||||
|
|
@ -204,86 +196,86 @@ class GP(model):
|
||||||
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
Can plot only part of the data and part of the posterior functions using which_data and which_functions
|
||||||
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
|
Plot the data's view of the world, with non-normalized values and GP predictions passed through the likelihood
|
||||||
"""
|
"""
|
||||||
if which_functions=='all':
|
if which_data == 'all':
|
||||||
which_functions = [True]*self.kern.Nparts
|
|
||||||
if which_data=='all':
|
|
||||||
which_data = slice(None)
|
which_data = slice(None)
|
||||||
|
|
||||||
if self.X.shape[1] == 1:
|
if self.X.shape[1] == 1:
|
||||||
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
Xnew, xmin, xmax = x_frame1D(self.X, plot_limits=plot_limits)
|
||||||
if samples == 0:
|
if samples == 0:
|
||||||
m,v = self._raw_predict(Xnew, slices=which_functions)
|
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||||
gpplot(Xnew,m,m-2*np.sqrt(v),m+2*np.sqrt(v))
|
gpplot(Xnew, m, m - 2 * np.sqrt(v), m + 2 * np.sqrt(v))
|
||||||
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
|
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
||||||
else:
|
else:
|
||||||
m,v = self._raw_predict(Xnew, slices=which_functions,full_cov=True)
|
m, v = self._raw_predict(Xnew, which_parts=which_parts, full_cov=True)
|
||||||
Ysim = np.random.multivariate_normal(m.flatten(),v,samples)
|
Ysim = np.random.multivariate_normal(m.flatten(), v, samples)
|
||||||
gpplot(Xnew,m,m-2*np.sqrt(np.diag(v)[:,None]),m+2*np.sqrt(np.diag(v))[:,None])
|
gpplot(Xnew, m, m - 2 * np.sqrt(np.diag(v)[:, None]), m + 2 * np.sqrt(np.diag(v))[:, None])
|
||||||
for i in range(samples):
|
for i in range(samples):
|
||||||
pb.plot(Xnew,Ysim[i,:],Tango.colorsHex['darkBlue'],linewidth=0.25)
|
pb.plot(Xnew, Ysim[i, :], Tango.colorsHex['darkBlue'], linewidth=0.25)
|
||||||
pb.plot(self.X[which_data],self.likelihood.Y[which_data],'kx',mew=1.5)
|
pb.plot(self.X[which_data], self.likelihood.Y[which_data], 'kx', mew=1.5)
|
||||||
pb.xlim(xmin,xmax)
|
pb.xlim(xmin, xmax)
|
||||||
ymin,ymax = min(np.append(self.likelihood.Y,m-2*np.sqrt(np.diag(v)[:,None]))), max(np.append(self.likelihood.Y,m+2*np.sqrt(np.diag(v)[:,None])))
|
ymin, ymax = min(np.append(self.likelihood.Y, m - 2 * np.sqrt(np.diag(v)[:, None]))), max(np.append(self.likelihood.Y, m + 2 * np.sqrt(np.diag(v)[:, None])))
|
||||||
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
pb.ylim(ymin,ymax)
|
pb.ylim(ymin, ymax)
|
||||||
if hasattr(self,'Z'):
|
if hasattr(self, 'Z'):
|
||||||
pb.plot(self.Z,self.Z*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
|
pb.plot(self.Z, self.Z * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||||
|
|
||||||
elif self.X.shape[1] == 2:
|
elif self.X.shape[1] == 2:
|
||||||
resolution = resolution or 50
|
resolution = resolution or 50
|
||||||
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits,resolution)
|
Xnew, xmin, xmax, xx, yy = x_frame2D(self.X, plot_limits, resolution)
|
||||||
m,v = self._raw_predict(Xnew, slices=which_functions)
|
m, v = self._raw_predict(Xnew, which_parts=which_parts)
|
||||||
m = m.reshape(resolution,resolution).T
|
m = m.reshape(resolution, resolution).T
|
||||||
pb.contour(xx,yy,m,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
|
pb.contour(xx, yy, m, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
pb.scatter(Xorig[:,0],Xorig[:,1],40,Yorig,linewidth=0,cmap=pb.cm.jet,vmin=m.min(), vmax=m.max())
|
pb.scatter(Xorig[:, 0], Xorig[:, 1], 40, Yorig, linewidth=0, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max())
|
||||||
pb.xlim(xmin[0],xmax[0])
|
pb.xlim(xmin[0], xmax[0])
|
||||||
pb.ylim(xmin[1],xmax[1])
|
pb.ylim(xmin[1], xmax[1])
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
||||||
def plot(self,samples=0,plot_limits=None,which_data='all',which_functions='all',resolution=None,levels=20):
|
def plot(self, samples=0, plot_limits=None, which_data='all', which_parts='all', resolution=None, levels=20):
|
||||||
"""
|
"""
|
||||||
TODO: Docstrings!
|
TODO: Docstrings!
|
||||||
:param levels: for 2D plotting, the number of contour levels to use
|
:param levels: for 2D plotting, the number of contour levels to use
|
||||||
|
|
||||||
"""
|
"""
|
||||||
# TODO include samples
|
# TODO include samples
|
||||||
if which_functions=='all':
|
if which_data == 'all':
|
||||||
which_functions = [True]*self.kern.Nparts
|
|
||||||
if which_data=='all':
|
|
||||||
which_data = slice(None)
|
which_data = slice(None)
|
||||||
|
|
||||||
if self.X.shape[1] == 1:
|
if self.X.shape[1] == 1:
|
||||||
|
|
||||||
Xu = self.X * self._Xstd + self._Xmean #NOTE self.X are the normalized values now
|
Xu = self.X * self._Xstd + self._Xmean # NOTE self.X are the normalized values now
|
||||||
|
|
||||||
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
Xnew, xmin, xmax = x_frame1D(Xu, plot_limits=plot_limits)
|
||||||
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
|
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||||
gpplot(Xnew,m, lower, upper)
|
gpplot(Xnew, m, lower, upper)
|
||||||
pb.plot(Xu[which_data],self.likelihood.data[which_data],'kx',mew=1.5)
|
pb.plot(Xu[which_data], self.likelihood.data[which_data], 'kx', mew=1.5)
|
||||||
ymin,ymax = min(np.append(self.likelihood.data,lower)), max(np.append(self.likelihood.data,upper))
|
if self.has_uncertain_inputs:
|
||||||
ymin, ymax = ymin - 0.1*(ymax - ymin), ymax + 0.1*(ymax - ymin)
|
pb.errorbar(Xu[which_data, 0], self.likelihood.data[which_data, 0],
|
||||||
pb.xlim(xmin,xmax)
|
xerr=2 * np.sqrt(self.X_variance[which_data, 0]),
|
||||||
pb.ylim(ymin,ymax)
|
ecolor='k', fmt=None, elinewidth=.5, alpha=.5)
|
||||||
if hasattr(self,'Z'):
|
|
||||||
Zu = self.Z*self._Xstd + self._Xmean
|
|
||||||
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
|
|
||||||
|
|
||||||
elif self.X.shape[1]==2: #FIXME
|
ymin, ymax = min(np.append(self.likelihood.data, lower)), max(np.append(self.likelihood.data, upper))
|
||||||
|
ymin, ymax = ymin - 0.1 * (ymax - ymin), ymax + 0.1 * (ymax - ymin)
|
||||||
|
pb.xlim(xmin, xmax)
|
||||||
|
pb.ylim(ymin, ymax)
|
||||||
|
if hasattr(self, 'Z'):
|
||||||
|
Zu = self.Z * self._Xstd + self._Xmean
|
||||||
|
pb.plot(Zu, Zu * 0 + pb.ylim()[0], 'r|', mew=1.5, markersize=12)
|
||||||
|
# pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_variance.flatten()))
|
||||||
|
|
||||||
|
elif self.X.shape[1] == 2: # FIXME
|
||||||
resolution = resolution or 50
|
resolution = resolution or 50
|
||||||
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits,resolution)
|
Xnew, xx, yy, xmin, xmax = x_frame2D(self.X, plot_limits, resolution)
|
||||||
x, y = np.linspace(xmin[0],xmax[0],resolution), np.linspace(xmin[1],xmax[1],resolution)
|
x, y = np.linspace(xmin[0], xmax[0], resolution), np.linspace(xmin[1], xmax[1], resolution)
|
||||||
m, var, lower, upper = self.predict(Xnew, slices=which_functions)
|
m, var, lower, upper = self.predict(Xnew, which_parts=which_parts)
|
||||||
m = m.reshape(resolution,resolution).T
|
m = m.reshape(resolution, resolution).T
|
||||||
pb.contour(x,y,m,levels,vmin=m.min(),vmax=m.max(),cmap=pb.cm.jet)
|
pb.contour(x, y, m, levels, vmin=m.min(), vmax=m.max(), cmap=pb.cm.jet)
|
||||||
Yf = self.likelihood.Y.flatten()
|
Yf = self.likelihood.Y.flatten()
|
||||||
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
|
pb.scatter(self.X[:, 0], self.X[:, 1], 40, Yf, cmap=pb.cm.jet, vmin=m.min(), vmax=m.max(), linewidth=0.)
|
||||||
pb.xlim(xmin[0],xmax[0])
|
pb.xlim(xmin[0], xmax[0])
|
||||||
pb.ylim(xmin[1],xmax[1])
|
pb.ylim(xmin[1], xmax[1])
|
||||||
if hasattr(self,'Z'):
|
if hasattr(self, 'Z'):
|
||||||
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
|
pb.plot(self.Z[:, 0], self.Z[:, 1], 'wo')
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
raise NotImplementedError, "Cannot define a frame with more than two input dimensions"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
### Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -24,12 +24,12 @@ class GPLVM(GP):
|
||||||
:type init: 'PCA'|'random'
|
:type init: 'PCA'|'random'
|
||||||
|
|
||||||
"""
|
"""
|
||||||
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, **kwargs):
|
def __init__(self, Y, Q, init='PCA', X = None, kernel=None, normalize_Y=False, **kwargs):
|
||||||
if X is None:
|
if X is None:
|
||||||
X = self.initialise_latent(init, Q, Y)
|
X = self.initialise_latent(init, Q, Y)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(Q) + kern.bias(Q)
|
kernel = kern.rbf(Q) + kern.bias(Q)
|
||||||
likelihood = Gaussian(Y)
|
likelihood = Gaussian(Y, normalize=normalize_Y)
|
||||||
GP.__init__(self, X, likelihood, kernel, **kwargs)
|
GP.__init__(self, X, likelihood, kernel, **kwargs)
|
||||||
|
|
||||||
def initialise_latent(self, init, Q, Y):
|
def initialise_latent(self, init, Q, Y):
|
||||||
|
|
@ -91,8 +91,8 @@ class GPLVM(GP):
|
||||||
Xtest_full[:, :2] = Xtest
|
Xtest_full[:, :2] = Xtest
|
||||||
mu, var, low, up = self.predict(Xtest_full)
|
mu, var, low, up = self.predict(Xtest_full)
|
||||||
var = var[:, :1]
|
var = var[:, :1]
|
||||||
ax.imshow(var.reshape(resolution, resolution).T[::-1, :],
|
ax.imshow(var.reshape(resolution, resolution).T,
|
||||||
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear')
|
extent=[xmin[0], xmax[0], xmin[1], xmax[1]], cmap=pb.cm.binary,interpolation='bilinear',origin='lower')
|
||||||
|
|
||||||
for i,ul in enumerate(np.unique(labels)):
|
for i,ul in enumerate(np.unique(labels)):
|
||||||
if type(ul) is np.string_:
|
if type(ul) is np.string_:
|
||||||
|
|
|
||||||
|
|
@ -11,26 +11,24 @@ class GP_regression(GP):
|
||||||
"""
|
"""
|
||||||
Gaussian Process model for regression
|
Gaussian Process model for regression
|
||||||
|
|
||||||
This is a thin wrapper around the GP class, with a set of sensible defalts
|
This is a thin wrapper around the models.GP class, with a set of sensible defalts
|
||||||
|
|
||||||
:param X: input observations
|
:param X: input observations
|
||||||
:param Y: observed values
|
:param Y: observed values
|
||||||
:param kernel: a GPy kernel, defaults to rbf+white
|
:param kernel: a GPy kernel, defaults to rbf
|
||||||
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_X: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_Y: False|True
|
:type normalize_Y: False|True
|
||||||
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
|
||||||
:rtype: model object
|
|
||||||
|
|
||||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None):
|
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False):
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1])
|
kernel = kern.rbf(X.shape[1])
|
||||||
|
|
||||||
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,6 @@ from sparse_GP_regression import sparse_GP_regression
|
||||||
from GPLVM import GPLVM
|
from GPLVM import GPLVM
|
||||||
from warped_GP import warpedGP
|
from warped_GP import warpedGP
|
||||||
from sparse_GPLVM import sparse_GPLVM
|
from sparse_GPLVM import sparse_GPLVM
|
||||||
from uncollapsed_sparse_GP import uncollapsed_sparse_GP
|
|
||||||
from Bayesian_GPLVM import Bayesian_GPLVM
|
from Bayesian_GPLVM import Bayesian_GPLVM
|
||||||
from mrd import MRD
|
from mrd import MRD
|
||||||
from generalized_FITC import generalized_FITC
|
from generalized_FITC import generalized_FITC
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,12 @@ from .. import kern
|
||||||
from scipy import stats, linalg
|
from scipy import stats, linalg
|
||||||
from sparse_GP import sparse_GP
|
from sparse_GP import sparse_GP
|
||||||
|
|
||||||
|
def backsub_both_sides(L,X):
|
||||||
|
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
||||||
|
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
|
||||||
|
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
|
||||||
|
|
||||||
|
|
||||||
class generalized_FITC(sparse_GP):
|
class generalized_FITC(sparse_GP):
|
||||||
"""
|
"""
|
||||||
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
|
Naish-Guzman, A. and Holden, S. (2008) implemantation of EP with FITC.
|
||||||
|
|
@ -23,20 +29,19 @@ class generalized_FITC(sparse_GP):
|
||||||
:type X_variance: np.ndarray (N x Q) | None
|
:type X_variance: np.ndarray (N x Q) | None
|
||||||
:param Z: inducing inputs (optional, see note)
|
:param Z: inducing inputs (optional, see note)
|
||||||
:type Z: np.ndarray (M x Q) | None
|
:type Z: np.ndarray (M x Q) | None
|
||||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
|
||||||
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||||
:type M: int
|
:type M: int
|
||||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||||
:type normalize_(X|Y): bool
|
:type normalize_(X|Y): bool
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||||
|
|
||||||
self.Z = Z
|
self.Z = Z
|
||||||
self.M = self.Z.shape[0]
|
self.M = self.Z.shape[0]
|
||||||
self._precision = likelihood.precision
|
self.true_precision = likelihood.precision
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False)
|
sparse_GP.__init__(self, X, likelihood, kernel=kernel, Z=self.Z, X_variance=None, normalize_X=False)
|
||||||
|
|
||||||
def _set_params(self, p):
|
def _set_params(self, p):
|
||||||
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
self.Z = p[:self.M*self.Q].reshape(self.M, self.Q)
|
||||||
|
|
@ -52,13 +57,16 @@ class generalized_FITC(sparse_GP):
|
||||||
|
|
||||||
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
|
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
|
||||||
this function does nothing
|
this function does nothing
|
||||||
|
|
||||||
|
Diag(Knn - Qnn) is added to the noise term to use the tools already implemented in sparse_GP.
|
||||||
|
The true precison is now 'true_precision' not 'precision'.
|
||||||
"""
|
"""
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
|
raise NotImplementedError, "FITC approximation not implemented for uncertain inputs"
|
||||||
else:
|
else:
|
||||||
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
|
||||||
self._precision = self.likelihood.precision # Save the true precision
|
self.true_precision = self.likelihood.precision # Save the true precision
|
||||||
self.likelihood.precision = self._precision/(1. + self._precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
|
self.likelihood.precision = self.true_precision/(1. + self.true_precision*self.Diag0[:,None]) # Add the diagonal element of the FITC approximation
|
||||||
self._set_params(self._get_params()) # update the GP
|
self._set_params(self._get_params()) # update the GP
|
||||||
|
|
||||||
def _FITC_computations(self):
|
def _FITC_computations(self):
|
||||||
|
|
@ -70,23 +78,23 @@ class generalized_FITC(sparse_GP):
|
||||||
- removes the extra terms computed in the sparse_GP approximation
|
- removes the extra terms computed in the sparse_GP approximation
|
||||||
- computes the likelihood gradients wrt the true precision.
|
- computes the likelihood gradients wrt the true precision.
|
||||||
"""
|
"""
|
||||||
#NOTE the true precison is now '_precison' not 'precision'
|
#NOTE the true precison is now 'true_precision' not 'precision'
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
|
||||||
# Compute generalized FITC's diagonal term of the covariance
|
# Compute generalized FITC's diagonal term of the covariance
|
||||||
self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
|
self.Lmi,info = linalg.lapack.flapack.dtrtrs(self.Lm,np.eye(self.M),lower=1)
|
||||||
|
Lmipsi1 = np.dot(self.Lmi,self.psi1)
|
||||||
|
self.Qnn = np.dot(Lmipsi1.T,Lmipsi1)
|
||||||
|
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
|
||||||
|
#self.Qnn = mdot(self.psi1.T,self.Kmmi,self.psi1)
|
||||||
|
#a = kj
|
||||||
self.Diag0 = self.psi0 - np.diag(self.Qnn)
|
self.Diag0 = self.psi0 - np.diag(self.Qnn)
|
||||||
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self._precision.flatten())
|
Iplus_Dprod_i = 1./(1.+ self.Diag0 * self.true_precision.flatten())
|
||||||
self.Diag = self.Diag0 * Iplus_Dprod_i
|
self.Diag = self.Diag0 * Iplus_Dprod_i
|
||||||
#self.Diag = self.Diag0/(1.+ self.Diag0 * self._precision.flatten())
|
|
||||||
|
|
||||||
|
|
||||||
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
|
self.P = Iplus_Dprod_i[:,None] * self.psi1.T
|
||||||
#self.P = (self.Diag / self.Diag0)[:,None] * self.psi1.T
|
|
||||||
self.RPT0 = np.dot(self.Lmi,self.psi1)
|
self.RPT0 = np.dot(self.Lmi,self.psi1)
|
||||||
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
|
self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,((1. - Iplus_Dprod_i)/self.Diag0)[:,None]*self.RPT0.T))
|
||||||
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - Iplus_Dprod_i/self.Diag0)[:,None]*self.RPT0.T))
|
|
||||||
#self.L = np.linalg.cholesky(np.eye(self.M) + np.dot(self.RPT0,(1./self.Diag0 - self.Diag/(self.Diag0**2))[:,None]*self.RPT0.T))
|
|
||||||
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
|
self.R,info = linalg.flapack.dtrtrs(self.L,self.Lmi,lower=1)
|
||||||
self.RPT = np.dot(self.R,self.P.T)
|
self.RPT = np.dot(self.R,self.P.T)
|
||||||
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
|
self.Sigma = np.diag(self.Diag) + np.dot(self.RPT.T,self.RPT)
|
||||||
|
|
@ -95,7 +103,16 @@ class generalized_FITC(sparse_GP):
|
||||||
self.mu = self.w + np.dot(self.P,self.gamma)
|
self.mu = self.w + np.dot(self.P,self.gamma)
|
||||||
|
|
||||||
# Remove extra term from dL_dpsi1
|
# Remove extra term from dL_dpsi1
|
||||||
self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
self.dL_dpsi1 -= mdot(self.Lmi.T,Lmipsi1*self.likelihood.precision.flatten().reshape(1,self.N))
|
||||||
|
#self.Kmmi, Lm, Lmi, Kmm_logdet = pdinv(self.Kmm)
|
||||||
|
#self.dL_dpsi1 -= mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
||||||
|
|
||||||
|
#########333333
|
||||||
|
#self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
||||||
|
#########333333
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise NotImplementedError, "homoscedastic fitc not implemented"
|
raise NotImplementedError, "homoscedastic fitc not implemented"
|
||||||
# Remove extra term from dL_dpsi1
|
# Remove extra term from dL_dpsi1
|
||||||
|
|
@ -141,11 +158,14 @@ class generalized_FITC(sparse_GP):
|
||||||
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
|
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
|
||||||
else:
|
else:
|
||||||
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||||
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
|
||||||
D = 0.5*np.trace(self.Cpsi1VVpsi1)
|
#C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
||||||
|
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
|
||||||
|
#self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
|
||||||
|
#D_ = 0.5*np.trace(self.Cpsi1VVpsi1)
|
||||||
return A+C+D
|
return A+C+D
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, slices, full_cov=False):
|
def _raw_predict(self, Xnew, which_parts, full_cov=False):
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
"""
|
"""
|
||||||
Make a prediction for the generalized FITC model
|
Make a prediction for the generalized FITC model
|
||||||
|
|
@ -174,16 +194,16 @@ class generalized_FITC(sparse_GP):
|
||||||
self.mu_H = mu_H
|
self.mu_H = mu_H
|
||||||
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
|
Sigma_H = C + np.dot(mu_u,np.dot(self.Sigma,mu_u.T))
|
||||||
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
|
# q(f_star|y) = N(f_star|mu_star,sigma2_star)
|
||||||
Kx = self.kern.K(self.Z, Xnew)
|
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||||
KR0T = np.dot(Kx.T,self.Lmi.T)
|
KR0T = np.dot(Kx.T,self.Lmi.T)
|
||||||
mu_star = np.dot(KR0T,mu_H)
|
mu_star = np.dot(KR0T,mu_H)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(Xnew)
|
Kxx = self.kern.K(Xnew,which_parts=which_parts)
|
||||||
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
var = Kxx + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
|
||||||
Kxx_ = self.kern.K(Xnew)
|
Kxx_ = self.kern.K(Xnew,which_parts=which_parts) # TODO: RA, is this line needed?
|
||||||
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T))
|
var_ = Kxx_ + np.dot(KR0T,np.dot(Sigma_H - np.eye(self.M),KR0T.T)) # TODO: RA, is this line needed?
|
||||||
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
|
var = (Kxx + np.sum(KR0T.T*np.dot(Sigma_H - np.eye(self.M),KR0T.T),0))[:,None]
|
||||||
return mu_star[:,None],var
|
return mu_star[:,None],var
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -271,90 +271,52 @@ class MRD(model):
|
||||||
self.Z = Z
|
self.Z = Z
|
||||||
return Z
|
return Z
|
||||||
|
|
||||||
def plot_X_1d(self, colors=None):
|
def _handle_plotting(self, fig_num, axes, plotf):
|
||||||
fig = pylab.figure(num="MRD X 1d", figsize=(min(8, (3 * len(self.bgplvms))), min(12, (2 * self.X.shape[1]))))
|
if axes is None:
|
||||||
fig.clf()
|
fig = pylab.figure(num=fig_num, figsize=(4 * len(self.bgplvms), 3 * len(self.bgplvms)))
|
||||||
ax1 = fig.add_subplot(self.X.shape[1], 1, 1)
|
for i, g in enumerate(self.bgplvms):
|
||||||
if colors is None:
|
if axes is None:
|
||||||
colors = ax1._get_lines.color_cycle
|
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
||||||
ax1.plot(self.X, c='k', alpha=.3)
|
else:
|
||||||
plots = ax1.plot(self.X.T[0], c=colors.next())
|
ax = axes[i]
|
||||||
ax1.fill_between(numpy.arange(self.X.shape[0]),
|
plotf(i, g, ax)
|
||||||
self.X.T[0] - 2 * numpy.sqrt(self.gref.X_variance.T[0]),
|
|
||||||
self.X.T[0] + 2 * numpy.sqrt(self.gref.X_variance.T[0]),
|
|
||||||
facecolor=plots[-1].get_color(),
|
|
||||||
alpha=.3)
|
|
||||||
ax1.text(1, 1, r"$\mathbf{{X_{}}}".format(1),
|
|
||||||
horizontalalignment='right',
|
|
||||||
verticalalignment='top',
|
|
||||||
transform=ax1.transAxes)
|
|
||||||
for i in range(self.X.shape[1] - 1):
|
|
||||||
ax = fig.add_subplot(self.X.shape[1], 1, i + 2)
|
|
||||||
ax.plot(self.X, c='k', alpha=.3)
|
|
||||||
plots.extend(ax.plot(self.X.T[i + 1], c=colors.next()))
|
|
||||||
ax.fill_between(numpy.arange(self.X.shape[0]),
|
|
||||||
self.X.T[i + 1] - 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
|
|
||||||
self.X.T[i + 1] + 2 * numpy.sqrt(self.gref.X_variance.T[i + 1]),
|
|
||||||
facecolor=plots[-1].get_color(),
|
|
||||||
alpha=.3)
|
|
||||||
if i < self.X.shape[1] - 2:
|
|
||||||
ax.set_xticklabels('')
|
|
||||||
ax1.set_xticklabels('')
|
|
||||||
# ax1.legend(plots, [r"$\mathbf{{X_{}}}$".format(i + 1) for i in range(self.X.shape[1])],
|
|
||||||
# bbox_to_anchor=(0., 1 + .01 * self.X.shape[1],
|
|
||||||
# 1., 1. + .01 * self.X.shape[1]), loc=3,
|
|
||||||
# ncol=self.X.shape[1], mode="expand", borderaxespad=0.)
|
|
||||||
pylab.draw()
|
pylab.draw()
|
||||||
fig.tight_layout(h_pad=.01, rect=(0, 0, 1, .95))
|
if axes is None:
|
||||||
|
fig.tight_layout()
|
||||||
|
return fig
|
||||||
|
else:
|
||||||
|
return pylab.gcf()
|
||||||
|
|
||||||
|
def plot_X(self, fig_num="MRD Predictions", axes=None):
|
||||||
|
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.X))
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
def plot_X(self):
|
def plot_predict(self, fig_num="MRD Predictions", axes=None):
|
||||||
fig = pylab.figure("MRD X", figsize=(4 * len(self.bgplvms), 3))
|
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: ax.imshow(g.predict(g.X)[0]))
|
||||||
fig.clf()
|
|
||||||
for i, g in enumerate(self.bgplvms):
|
|
||||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
|
||||||
ax.imshow(g.X)
|
|
||||||
pylab.draw()
|
|
||||||
fig.tight_layout()
|
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
def plot_predict(self):
|
def plot_scales(self, fig_num="MRD Scales", axes=None, *args, **kwargs):
|
||||||
fig = pylab.figure("MRD Predictions", figsize=(4 * len(self.bgplvms), 3))
|
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.kern.plot_ARD(ax=ax, *args, **kwargs))
|
||||||
fig.clf()
|
|
||||||
for i, g in enumerate(self.bgplvms):
|
|
||||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
|
||||||
ax.imshow(g.predict(g.X)[0])
|
|
||||||
pylab.draw()
|
|
||||||
fig.tight_layout()
|
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
def plot_scales(self, *args, **kwargs):
|
def plot_latent(self, fig_num="MRD Latent Spaces", axes=None, *args, **kwargs):
|
||||||
fig = pylab.figure("MRD Scales", figsize=(4 * len(self.bgplvms), 3))
|
fig = self._handle_plotting(fig_num, axes, lambda i, g, ax: g.plot_latent(ax=ax, *args, **kwargs))
|
||||||
fig.clf()
|
|
||||||
for i, g in enumerate(self.bgplvms):
|
|
||||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
|
||||||
g.kern.plot_ARD(ax=ax, *args, **kwargs)
|
|
||||||
pylab.draw()
|
|
||||||
fig.tight_layout()
|
|
||||||
return fig
|
|
||||||
|
|
||||||
def plot_latent(self, *args, **kwargs):
|
|
||||||
fig = pylab.figure("MRD Latent Spaces", figsize=(4 * len(self.bgplvms), 3))
|
|
||||||
fig.clf()
|
|
||||||
for i, g in enumerate(self.bgplvms):
|
|
||||||
ax = fig.add_subplot(1, len(self.bgplvms), i + 1)
|
|
||||||
g.plot_latent(ax=ax, *args, **kwargs)
|
|
||||||
pylab.draw()
|
|
||||||
fig.tight_layout()
|
|
||||||
return fig
|
return fig
|
||||||
|
|
||||||
def _debug_plot(self):
|
def _debug_plot(self):
|
||||||
self.plot_X()
|
|
||||||
self.plot_X_1d()
|
self.plot_X_1d()
|
||||||
self.plot_latent()
|
fig = pylab.figure("MRD DEBUG PLOT", figsize=(4 * len(self.bgplvms), 9))
|
||||||
self.plot_scales()
|
fig.clf()
|
||||||
|
axes = [fig.add_subplot(3, len(self.bgplvms), i + 1) for i in range(len(self.bgplvms))]
|
||||||
|
self.plot_X(axes=axes)
|
||||||
|
axes = [fig.add_subplot(3, len(self.bgplvms), i + len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
|
||||||
|
self.plot_latent(axes=axes)
|
||||||
|
axes = [fig.add_subplot(3, len(self.bgplvms), i + 2 * len(self.bgplvms) + 1) for i in range(len(self.bgplvms))]
|
||||||
|
self.plot_scales(axes=axes)
|
||||||
|
pylab.draw()
|
||||||
|
fig.tight_layout()
|
||||||
|
|
||||||
def _debug_optimize(self, opt='scg', maxiters=500, itersteps=10):
|
def _debug_optimize(self, opt='scg', maxiters=5000, itersteps=10):
|
||||||
iters = 0
|
iters = 0
|
||||||
optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
|
optstep = lambda: self.optimize(opt, messages=1, max_f_eval=itersteps)
|
||||||
self._debug_plot()
|
self._debug_plot()
|
||||||
|
|
|
||||||
|
|
@ -3,15 +3,16 @@
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv, trace_dot
|
from ..util.linalg import mdot, jitchol, tdot, symmetrify
|
||||||
from ..util.plot import gpplot
|
from ..util.plot import gpplot
|
||||||
from .. import kern
|
from .. import kern
|
||||||
from GP import GP
|
from GP import GP
|
||||||
from scipy import linalg
|
from scipy import linalg
|
||||||
|
|
||||||
#Still TODO:
|
def backsub_both_sides(L,X):
|
||||||
# make use of slices properly (kernel can now do this)
|
""" Return L^-T * X * L^-1, assumuing X is symmetrical and L is lower cholesky"""
|
||||||
# enable heteroscedatic noise (kernel will need to compute psi2 as a (NxMxM) array)
|
tmp,_ = linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(X),lower=1,trans=1)
|
||||||
|
return linalg.lapack.flapack.dtrtrs(L,np.asfortranarray(tmp.T),lower=1,trans=1)[0].T
|
||||||
|
|
||||||
class sparse_GP(GP):
|
class sparse_GP(GP):
|
||||||
"""
|
"""
|
||||||
|
|
@ -27,19 +28,16 @@ class sparse_GP(GP):
|
||||||
:type X_variance: np.ndarray (N x Q) | None
|
:type X_variance: np.ndarray (N x Q) | None
|
||||||
:param Z: inducing inputs (optional, see note)
|
:param Z: inducing inputs (optional, see note)
|
||||||
:type Z: np.ndarray (M x Q) | None
|
:type Z: np.ndarray (M x Q) | None
|
||||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
|
||||||
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
:param M : Number of inducing points (optional, default 10. Ignored if Z is not None)
|
||||||
:type M: int
|
:type M: int
|
||||||
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
:param normalize_(X|Y) : whether to normalize the data before computing (predictions will be in original scales)
|
||||||
:type normalize_(X|Y): bool
|
:type normalize_(X|Y): bool
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, X_variance=None, Xslices=None,Zslices=None, normalize_X=False):
|
def __init__(self, X, likelihood, kernel, Z, X_variance=None, normalize_X=False):
|
||||||
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
|
self.scale_factor = 100.0# a scaling factor to help keep the algorithm stable
|
||||||
self.auto_scale_factor = False
|
self.auto_scale_factor = False
|
||||||
self.Z = Z
|
self.Z = Z
|
||||||
self.Zslices = Zslices
|
|
||||||
self.Xslices = Xslices
|
|
||||||
self.M = Z.shape[0]
|
self.M = Z.shape[0]
|
||||||
self.likelihood = likelihood
|
self.likelihood = likelihood
|
||||||
|
|
||||||
|
|
@ -50,10 +48,7 @@ class sparse_GP(GP):
|
||||||
self.has_uncertain_inputs=True
|
self.has_uncertain_inputs=True
|
||||||
self.X_variance = X_variance
|
self.X_variance = X_variance
|
||||||
|
|
||||||
if not self.likelihood.is_heteroscedastic:
|
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X)
|
||||||
self.likelihood.trYYT = np.trace(np.dot(self.likelihood.Y, self.likelihood.Y.T)) # TODO: something more elegant here?
|
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel=kernel, normalize_X=normalize_X, Xslices=Xslices)
|
|
||||||
|
|
||||||
#normalize X uncertainty also
|
#normalize X uncertainty also
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
|
|
@ -68,87 +63,89 @@ class sparse_GP(GP):
|
||||||
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
|
self.psi1 = self.kern.psi1(self.Z,self.X, self.X_variance).T
|
||||||
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
|
self.psi2 = self.kern.psi2(self.Z,self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
|
self.psi0 = self.kern.Kdiag(self.X)
|
||||||
self.psi1 = self.kern.K(self.Z,self.X)
|
self.psi1 = self.kern.K(self.Z,self.X)
|
||||||
self.psi2 = None
|
self.psi2 = None
|
||||||
|
|
||||||
def _computations(self):
|
def _computations(self):
|
||||||
#TODO: find routine to multiply triangular matrices
|
|
||||||
#TODO: slices for psi statistics (easy enough)
|
|
||||||
|
|
||||||
sf = self.scale_factor
|
sf = self.scale_factor
|
||||||
sf2 = sf**2
|
sf2 = sf**2
|
||||||
|
|
||||||
#The rather complex computations of psi2_beta_scaled
|
#factor Kmm
|
||||||
|
self.Lm = jitchol(self.Kmm)
|
||||||
|
|
||||||
|
#The rather complex computations of self.A
|
||||||
if self.likelihood.is_heteroscedastic:
|
if self.likelihood.is_heteroscedastic:
|
||||||
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
|
assert self.likelihood.D == 1 #TODO: what if the likelihood is heterscedatic and there are multiple independent outputs?
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
|
psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
|
||||||
|
evals, evecs = linalg.eigh(psi2_beta_scaled)
|
||||||
|
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable
|
||||||
|
if not np.allclose(evals, clipped_evals):
|
||||||
|
print "Warning: clipping posterior eigenvalues"
|
||||||
|
tmp = evecs*np.sqrt(clipped_evals)
|
||||||
|
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||||
|
self.A = tdot(tmp)
|
||||||
else:
|
else:
|
||||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
|
tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
|
||||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||||
|
self.A = tdot(tmp)
|
||||||
else:
|
else:
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
|
psi2_beta_scaled = (self.psi2*(self.likelihood.precision/sf2)).sum(0)
|
||||||
|
evals, evecs = linalg.eigh(psi2_beta_scaled)
|
||||||
|
clipped_evals = np.clip(evals,0.,1e6) # TODO: make clipping configurable
|
||||||
|
if not np.allclose(evals, clipped_evals):
|
||||||
|
print "Warning: clipping posterior eigenvalues"
|
||||||
|
tmp = evecs*np.sqrt(clipped_evals)
|
||||||
|
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||||
|
self.A = tdot(tmp)
|
||||||
else:
|
else:
|
||||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
||||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
tmp, _ = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp),lower=1)
|
||||||
|
self.A = tdot(tmp)
|
||||||
|
|
||||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
#factor B
|
||||||
|
self.B = np.eye(self.M)/sf2 + self.A
|
||||||
|
self.LB = jitchol(self.B)
|
||||||
|
|
||||||
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
|
self.V = (self.likelihood.precision/self.scale_factor)*self.likelihood.Y
|
||||||
|
|
||||||
#Compute A = L^-1 psi2 beta L^-T
|
|
||||||
#self. A = mdot(self.Lmi,self.psi2_beta_scaled,self.Lmi.T)
|
|
||||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,self.psi2_beta_scaled.T,lower=1)[0]
|
|
||||||
self.A = linalg.lapack.flapack.dtrtrs(self.Lm,np.asarray(tmp.T,order='F'),lower=1)[0]
|
|
||||||
|
|
||||||
self.B = np.eye(self.M)/sf2 + self.A
|
|
||||||
|
|
||||||
self.Bi, self.LB, self.LBi, self.B_logdet = pdinv(self.B)
|
|
||||||
|
|
||||||
self.psi1V = np.dot(self.psi1, self.V)
|
self.psi1V = np.dot(self.psi1, self.V)
|
||||||
#tmp = np.dot(self.Lmi.T, self.LBi.T)
|
|
||||||
tmp = linalg.lapack.clapack.dtrtrs(self.Lm.T,np.asarray(self.LBi.T,order='C'),lower=0)[0]
|
|
||||||
self.C = np.dot(tmp,tmp.T) #TODO: tmp is triangular. replace with dtrmm (blas) when available
|
|
||||||
self.Cpsi1V = np.dot(self.C,self.psi1V)
|
|
||||||
self.Cpsi1VVpsi1 = np.dot(self.Cpsi1V,self.psi1V.T)
|
|
||||||
#self.E = np.dot(self.Cpsi1VVpsi1,self.C)/sf2
|
|
||||||
self.E = np.dot(self.Cpsi1V/sf,self.Cpsi1V.T/sf)
|
|
||||||
|
|
||||||
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
|
|
||||||
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
|
|
||||||
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
|
|
||||||
if self.likelihood.is_heteroscedastic:
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
|
|
||||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
|
|
||||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
|
|
||||||
else:
|
|
||||||
self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
|
|
||||||
self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
|
|
||||||
self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
|
|
||||||
self.dL_dpsi2 = None
|
|
||||||
|
|
||||||
else:
|
|
||||||
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
|
|
||||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision/sf2 * self.D * self.C # dC
|
|
||||||
self.dL_dpsi2 += - 0.5 * self.likelihood.precision * self.E # dD
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
#repeat for each of the N psi_2 matrices
|
|
||||||
self.dL_dpsi2 = np.repeat(self.dL_dpsi2[None,:,:],self.N,axis=0)
|
|
||||||
else:
|
|
||||||
self.dL_dpsi1 += 2.*np.dot(self.dL_dpsi2,self.psi1)
|
|
||||||
self.dL_dpsi2 = None
|
|
||||||
|
|
||||||
|
#back substutue C into psi1V
|
||||||
|
tmp,info1 = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.psi1V),lower=1,trans=0)
|
||||||
|
self._LBi_Lmi_psi1V,_ = linalg.lapack.flapack.dtrtrs(self.LB,np.asfortranarray(tmp),lower=1,trans=0)
|
||||||
|
tmp,info2 = linalg.lapack.flapack.dpotrs(self.LB,tmp,lower=1)
|
||||||
|
self.Cpsi1V,info3 = linalg.lapack.flapack.dtrtrs(self.Lm,tmp,lower=1,trans=1)
|
||||||
|
|
||||||
# Compute dL_dKmm
|
# Compute dL_dKmm
|
||||||
#self.dL_dKmm_old = -0.5 * self.D * mdot(self.Lmi.T, self.A, self.Lmi)*sf2 # dB
|
tmp = tdot(self._LBi_Lmi_psi1V)
|
||||||
#self.dL_dKmm += -0.5 * self.D * (- self.C/sf2 - 2.*mdot(self.C, self.psi2_beta_scaled, self.Kmmi) + self.Kmmi) # dC
|
self.DBi_plus_BiPBi = backsub_both_sides(self.LB, self.D*np.eye(self.M) + tmp)
|
||||||
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
|
tmp = -0.5*self.DBi_plus_BiPBi/sf2
|
||||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
|
tmp += -0.5*self.B*sf2*self.D
|
||||||
self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
|
tmp += self.D*np.eye(self.M)
|
||||||
self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
|
self.dL_dKmm = backsub_both_sides(self.Lm,tmp)
|
||||||
|
|
||||||
|
# Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertain inputs case
|
||||||
|
self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
|
||||||
|
self.dL_dpsi1 = np.dot(self.Cpsi1V,self.V.T)
|
||||||
|
dL_dpsi2_beta = 0.5*backsub_both_sides(self.Lm,self.D*np.eye(self.M) - self.DBi_plus_BiPBi)
|
||||||
|
if self.likelihood.is_heteroscedastic:
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
self.dL_dpsi2 = self.likelihood.precision[:,None,None]*dL_dpsi2_beta[None,:,:]
|
||||||
|
else:
|
||||||
|
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2_beta,self.psi1*self.likelihood.precision.reshape(1,self.N))
|
||||||
|
self.dL_dpsi2 = None
|
||||||
|
else:
|
||||||
|
dL_dpsi2 = self.likelihood.precision*dL_dpsi2_beta
|
||||||
|
if self.has_uncertain_inputs:
|
||||||
|
#repeat for each of the N psi_2 matrices
|
||||||
|
self.dL_dpsi2 = np.repeat(dL_dpsi2[None,:,:],self.N,axis=0)
|
||||||
|
else:
|
||||||
|
#subsume back into psi1 (==Kmn)
|
||||||
|
self.dL_dpsi1 += 2.*np.dot(dL_dpsi2,self.psi1)
|
||||||
|
self.dL_dpsi2 = None
|
||||||
|
|
||||||
|
|
||||||
#the partial derivative vector for the likelihood
|
#the partial derivative vector for the likelihood
|
||||||
if self.likelihood.Nparams ==0:
|
if self.likelihood.Nparams ==0:
|
||||||
|
|
@ -156,16 +153,11 @@ class sparse_GP(GP):
|
||||||
self.partial_for_likelihood = None
|
self.partial_for_likelihood = None
|
||||||
elif self.likelihood.is_heteroscedastic:
|
elif self.likelihood.is_heteroscedastic:
|
||||||
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
raise NotImplementedError, "heteroscedatic derivates not implemented"
|
||||||
#self.partial_for_likelihood = - 0.5 * self.D*self.likelihood.precision + 0.5 * (self.likelihood.Y**2).sum(1)*self.likelihood.precision**2 #dA
|
|
||||||
#self.partial_for_likelihood += 0.5 * self.D * (self.psi0*self.likelihood.precision**2 - (self.psi2*self.Kmmi[None,:,:]*self.likelihood.precision[:,None,None]**2).sum(1).sum(1)/sf2) #dB
|
|
||||||
#self.partial_for_likelihood += 0.5 * self.D * np.sum(self.Bi*self.A)*self.likelihood.precision #dC
|
|
||||||
#self.partial_for_likelihood += -np.diag(np.dot((self.C - 0.5 * mdot(self.C,self.psi2_beta_scaled,self.C) ) , self.psi1VVpsi1 ))*self.likelihood.precision #dD
|
|
||||||
else:
|
else:
|
||||||
#likelihood is not heterscedatic
|
#likelihood is not heterscedatic
|
||||||
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * np.sum(np.square(self.likelihood.Y))*self.likelihood.precision**2
|
self.partial_for_likelihood = - 0.5 * self.N*self.D*self.likelihood.precision + 0.5 * self.likelihood.trYYT*self.likelihood.precision**2
|
||||||
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
|
self.partial_for_likelihood += 0.5 * self.D * (self.psi0.sum()*self.likelihood.precision**2 - np.trace(self.A)*self.likelihood.precision*sf2)
|
||||||
self.partial_for_likelihood += 0.5 * self.D * trace_dot(self.Bi,self.A)*self.likelihood.precision
|
self.partial_for_likelihood += self.likelihood.precision*(0.5*np.sum(self.A*self.DBi_plus_BiPBi) - np.sum(np.square(self._LBi_Lmi_psi1V)))
|
||||||
self.partial_for_likelihood += self.likelihood.precision*(0.5*trace_dot(self.psi2_beta_scaled,self.E*sf2) - np.trace(self.Cpsi1VVpsi1))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -178,8 +170,8 @@ class sparse_GP(GP):
|
||||||
else:
|
else:
|
||||||
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
A = -0.5*self.N*self.D*(np.log(2.*np.pi) + np.log(self.likelihood._variance)) -0.5*self.likelihood.precision*self.likelihood.trYYT
|
||||||
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
|
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
|
||||||
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
|
C = -self.D * (np.sum(np.log(np.diag(self.LB))) + 0.5*self.M*np.log(sf2))
|
||||||
D = 0.5*np.trace(self.Cpsi1VVpsi1)
|
D = 0.5*np.sum(np.square(self._LBi_Lmi_psi1V))
|
||||||
return A+B+C+D
|
return A+B+C+D
|
||||||
|
|
||||||
def _set_params(self, p):
|
def _set_params(self, p):
|
||||||
|
|
@ -187,13 +179,14 @@ class sparse_GP(GP):
|
||||||
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
|
self.kern._set_params(p[self.Z.size:self.Z.size+self.kern.Nparam])
|
||||||
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
|
self.likelihood._set_params(p[self.Z.size+self.kern.Nparam:])
|
||||||
self._compute_kernel_matrices()
|
self._compute_kernel_matrices()
|
||||||
if self.auto_scale_factor:
|
|
||||||
self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
|
||||||
#if self.auto_scale_factor:
|
#if self.auto_scale_factor:
|
||||||
# if self.likelihood.is_heteroscedastic:
|
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||||
# self.scale_factor = max(1,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
|
#if self.auto_scale_factor:
|
||||||
# else:
|
#if self.likelihood.is_heteroscedastic:
|
||||||
# self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
#self.scale_factor = max(100,np.sqrt(self.psi2_beta_scaled.sum(0).mean()))
|
||||||
|
#else:
|
||||||
|
#self.scale_factor = np.sqrt(self.psi2.sum(0).mean()*self.likelihood.precision)
|
||||||
|
self.scale_factor = 1.
|
||||||
self._computations()
|
self._computations()
|
||||||
|
|
||||||
def _get_params(self):
|
def _get_params(self):
|
||||||
|
|
@ -239,24 +232,28 @@ class sparse_GP(GP):
|
||||||
"""
|
"""
|
||||||
The derivative of the bound wrt the inducing inputs Z
|
The derivative of the bound wrt the inducing inputs Z
|
||||||
"""
|
"""
|
||||||
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm,self.Z)#factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
dL_dZ = 2.*self.kern.dK_dX(self.dL_dKmm, self.Z) # factor of two becase of vertical and horizontal 'stripes' in dKmm_dZ
|
||||||
if self.has_uncertain_inputs:
|
if self.has_uncertain_inputs:
|
||||||
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1,self.Z,self.X, self.X_variance)
|
dL_dZ += self.kern.dpsi1_dZ(self.dL_dpsi1, self.Z, self.X, self.X_variance)
|
||||||
dL_dZ += 2.*self.kern.dpsi2_dZ(self.dL_dpsi2,self.Z,self.X, self.X_variance) # 'stripes'
|
dL_dZ += self.kern.dpsi2_dZ(self.dL_dpsi2, self.Z, self.X, self.X_variance)
|
||||||
else:
|
else:
|
||||||
dL_dZ += self.kern.dK_dX(self.dL_dpsi1,self.Z,self.X)
|
dL_dZ += self.kern.dK_dX(self.dL_dpsi1, self.Z, self.X)
|
||||||
return dL_dZ
|
return dL_dZ
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, slices, full_cov=False):
|
def _raw_predict(self, Xnew, which_parts='all', full_cov=False):
|
||||||
"""Internal helper function for making predictions, does not account for normalization"""
|
"""Internal helper function for making predictions, does not account for normalization"""
|
||||||
|
|
||||||
Kx = self.kern.K(self.Z, Xnew)
|
Bi,_ = linalg.lapack.flapack.dpotri(self.LB,lower=0) # WTH? this lower switch should be 1, but that doesn't work!
|
||||||
mu = mdot(Kx.T, self.C/self.scale_factor, self.psi1V)
|
symmetrify(Bi)
|
||||||
|
Kmmi_LmiBLmi = backsub_both_sides(self.Lm,np.eye(self.M) - Bi)
|
||||||
|
|
||||||
|
Kx = self.kern.K(self.Z, Xnew, which_parts=which_parts)
|
||||||
|
mu = np.dot(Kx.T, self.Cpsi1V/self.scale_factor)
|
||||||
if full_cov:
|
if full_cov:
|
||||||
Kxx = self.kern.K(Xnew)
|
Kxx = self.kern.K(Xnew,which_parts=which_parts)
|
||||||
var = Kxx - mdot(Kx.T, (self.Kmmi - self.C/self.scale_factor**2), Kx) #NOTE this won't work for plotting
|
var = Kxx - mdot(Kx.T, Kmmi_LmiBLmi, Kx) #NOTE this won't work for plotting
|
||||||
else:
|
else:
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
Kxx = self.kern.Kdiag(Xnew,which_parts=which_parts)
|
||||||
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
|
var = Kxx - np.sum(Kx*np.dot(Kmmi_LmiBLmi, Kx),0)
|
||||||
|
|
||||||
return mu,var[:,None]
|
return mu,var[:,None]
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ class sparse_GP_regression(sparse_GP):
|
||||||
"""
|
"""
|
||||||
Gaussian Process model for regression
|
Gaussian Process model for regression
|
||||||
|
|
||||||
This is a thin wrapper around the GP class, with a set of sensible defalts
|
This is a thin wrapper around the sparse_GP class, with a set of sensible defalts
|
||||||
|
|
||||||
:param X: input observations
|
:param X: input observations
|
||||||
:param Y: observed values
|
:param Y: observed values
|
||||||
|
|
@ -22,25 +22,25 @@ class sparse_GP_regression(sparse_GP):
|
||||||
:type normalize_X: False|True
|
:type normalize_X: False|True
|
||||||
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
:param normalize_Y: whether to normalize the input data before computing (predictions will be in original scales)
|
||||||
:type normalize_Y: False|True
|
:type normalize_Y: False|True
|
||||||
:param Xslices: how the X,Y data co-vary in the kernel (i.e. which "outputs" they correspond to). See (link:slicing)
|
|
||||||
:rtype: model object
|
:rtype: model object
|
||||||
|
|
||||||
.. Note:: Multiple independent outputs are allowed using columns of Y
|
.. Note:: Multiple independent outputs are allowed using columns of Y
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,X,Y,kernel=None,normalize_X=False,normalize_Y=False, Xslices=None,Z=None, M=10):
|
def __init__(self, X, Y, kernel=None, normalize_X=False, normalize_Y=False, Z=None, M=10):
|
||||||
#kern defaults to rbf
|
#kern defaults to rbf (plus white for stability)
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
kernel = kern.rbf(X.shape[1]) + kern.white(X.shape[1],1e-3)
|
||||||
|
|
||||||
#Z defaults to a subset of the data
|
#Z defaults to a subset of the data
|
||||||
if Z is None:
|
if Z is None:
|
||||||
Z = np.random.permutation(X.copy())[:M]
|
i = np.random.permutation(X.shape[0])[:M]
|
||||||
|
Z = X[i].copy()
|
||||||
else:
|
else:
|
||||||
assert Z.shape[1]==X.shape[1]
|
assert Z.shape[1]==X.shape[1]
|
||||||
|
|
||||||
#likelihood defaults to Gaussian
|
#likelihood defaults to Gaussian
|
||||||
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(Y,normalize=normalize_Y)
|
||||||
|
|
||||||
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X, Xslices=Xslices)
|
sparse_GP.__init__(self, X, likelihood, kernel, Z, normalize_X=normalize_X)
|
||||||
|
|
|
||||||
|
|
@ -1,151 +0,0 @@
|
||||||
# Copyright (c) 2012 James Hensman
|
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import pylab as pb
|
|
||||||
from ..util.linalg import mdot, jitchol, chol_inv, pdinv
|
|
||||||
from .. import kern
|
|
||||||
from ..likelihoods import likelihood
|
|
||||||
from sparse_GP import sparse_GP
|
|
||||||
|
|
||||||
class uncollapsed_sparse_GP(sparse_GP):
|
|
||||||
"""
|
|
||||||
Variational sparse GP model (Regression), where the approximating distribution q(u) is represented explicitly
|
|
||||||
|
|
||||||
:param X: inputs
|
|
||||||
:type X: np.ndarray (N x Q)
|
|
||||||
:param likelihood: GPy likelihood class, containing observed data
|
|
||||||
:param q_u: canonical parameters of the distribution squasehd into a 1D array
|
|
||||||
:type q_u: np.ndarray
|
|
||||||
:param kernel : the kernel/covariance function. See link kernels
|
|
||||||
:type kernel: a GPy kernel
|
|
||||||
:param Z: inducing inputs (optional, see note)
|
|
||||||
:type Z: np.ndarray (M x Q) | None
|
|
||||||
:param Zslices: slices for the inducing inputs (see slicing TODO: link)
|
|
||||||
:param normalize_X : whether to normalize the data before computing (predictions will be in original scales)
|
|
||||||
:type normalize_X: bool
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, X, likelihood, kernel, Z, q_u=None, **kwargs):
|
|
||||||
self.M = Z.shape[0]
|
|
||||||
if q_u is None:
|
|
||||||
q_u = np.hstack((np.random.randn(self.M*likelihood.D),-0.5*np.eye(self.M).flatten()))
|
|
||||||
self.likelihood = likelihood
|
|
||||||
self.set_vb_param(q_u)
|
|
||||||
sparse_GP.__init__(self, X, likelihood, kernel, Z, **kwargs)
|
|
||||||
|
|
||||||
def _computations(self):
|
|
||||||
# kernel computations, using BGPLVM notation
|
|
||||||
self.Kmm = self.kern.K(self.Z)
|
|
||||||
if self.has_uncertain_inputs:
|
|
||||||
raise NotImplementedError
|
|
||||||
else:
|
|
||||||
self.psi0 = self.kern.Kdiag(self.X,slices=self.Xslices)
|
|
||||||
self.psi1 = self.kern.K(self.Z,self.X)
|
|
||||||
if self.likelihood.is_heteroscedastic:
|
|
||||||
raise NotImplementedError
|
|
||||||
else:
|
|
||||||
tmp = self.psi1*(np.sqrt(self.likelihood.precision)/sf)
|
|
||||||
self.psi2_beta_scaled = np.dot(tmp,tmp.T)
|
|
||||||
self.psi2 = self.psi1.T[:,:,None]*self.psi1.T[:,None,:]
|
|
||||||
|
|
||||||
|
|
||||||
self.V = self.likelihood.precision*self.Y
|
|
||||||
self.VmT = np.dot(self.V,self.q_u_expectation[0].T)
|
|
||||||
self.psi1V = np.dot(self.psi1, self.V)
|
|
||||||
self.psi1VVpsi1 = np.dot(self.psi1V, self.psi1V.T)
|
|
||||||
self.Kmmi, self.Lm, self.Lmi, self.Kmm_logdet = pdinv(self.Kmm)
|
|
||||||
self.A = mdot(self.Lmi, self.beta*self.psi2, self.Lmi.T)
|
|
||||||
self.B = np.eye(self.M) + self.A
|
|
||||||
self.Lambda = mdot(self.Lmi.T,self.B,self.Lmi)
|
|
||||||
self.trace_K = self.psi0 - np.trace(self.A)/self.beta
|
|
||||||
self.projected_mean = mdot(self.psi1.T,self.Kmmi,self.q_u_expectation[0])
|
|
||||||
|
|
||||||
# Compute dL_dpsi
|
|
||||||
self.dL_dpsi0 = - 0.5 * self.likelihood.D * self.beta * np.ones(self.N)
|
|
||||||
self.dL_dpsi1 = np.dot(self.VmT,self.Kmmi).T # This is the correct term for E I think...
|
|
||||||
self.dL_dpsi2 = 0.5 * self.beta * self.likelihood.D * (self.Kmmi - mdot(self.Kmmi,self.q_u_expectation[1],self.Kmmi))
|
|
||||||
|
|
||||||
# Compute dL_dKmm
|
|
||||||
tmp = self.beta*mdot(self.psi2,self.Kmmi,self.q_u_expectation[1]) -np.dot(self.q_u_expectation[0],self.psi1V.T)
|
|
||||||
tmp += tmp.T
|
|
||||||
tmp += self.likelihood.D*(-self.beta*self.psi2 - self.Kmm + self.q_u_expectation[1])
|
|
||||||
self.dL_dKmm = 0.5*mdot(self.Kmmi,tmp,self.Kmmi)
|
|
||||||
|
|
||||||
#Compute the gradient of the log likelihood wrt noise variance
|
|
||||||
#TODO: suport heteroscedatic noise
|
|
||||||
dbeta = 0.5 * self.N*self.likelihood.D/self.beta
|
|
||||||
dbeta += - 0.5 * self.likelihood.D * self.trace_K
|
|
||||||
dbeta += - 0.5 * self.likelihood.D * np.sum(self.q_u_expectation[1]*mdot(self.Kmmi,self.psi2,self.Kmmi))
|
|
||||||
dbeta += - 0.5 * self.trYYT
|
|
||||||
dbeta += np.sum(np.dot(self.Y.T,self.projected_mean))
|
|
||||||
self.partial_for_likelihood = -dbeta*self.likelihood.precision**2
|
|
||||||
|
|
||||||
def log_likelihood(self):
|
|
||||||
"""
|
|
||||||
Compute the (lower bound on the) log marginal likelihood
|
|
||||||
"""
|
|
||||||
A = -0.5*self.N*self.likelihood.D*(np.log(2.*np.pi) - np.log(self.beta))
|
|
||||||
B = -0.5*self.beta*self.likelihood.D*self.trace_K
|
|
||||||
C = -0.5*self.likelihood.D *(self.Kmm_logdet-self.q_u_logdet + np.sum(self.Lambda * self.q_u_expectation[1]) - self.M)
|
|
||||||
D = -0.5*self.beta*self.trYYT
|
|
||||||
E = np.sum(np.dot(self.V.T,self.projected_mean))
|
|
||||||
return A+B+C+D+E
|
|
||||||
|
|
||||||
def _raw_predict(self, Xnew, slices,full_cov=False):
|
|
||||||
"""Internal helper function for making predictions, does not account for normalization"""
|
|
||||||
Kx = self.kern.K(Xnew,self.Z)
|
|
||||||
mu = mdot(Kx,self.Kmmi,self.q_u_expectation[0])
|
|
||||||
|
|
||||||
tmp = self.Kmmi- mdot(self.Kmmi,self.q_u_cov,self.Kmmi)
|
|
||||||
if full_cov:
|
|
||||||
Kxx = self.kern.K(Xnew)
|
|
||||||
var = Kxx - mdot(Kx,tmp,Kx.T)
|
|
||||||
else:
|
|
||||||
Kxx = self.kern.Kdiag(Xnew)
|
|
||||||
var = (Kxx - np.sum(Kx*np.dot(Kx,tmp),1))[:,None]
|
|
||||||
return mu,var
|
|
||||||
|
|
||||||
|
|
||||||
def set_vb_param(self,vb_param):
|
|
||||||
"""set the distribution q(u) from the canonical parameters"""
|
|
||||||
self.q_u_prec = -2.*vb_param[-self.M**2:].reshape(self.M, self.M)
|
|
||||||
self.q_u_cov, q_u_Li, q_u_L, tmp = pdinv(self.q_u_prec)
|
|
||||||
self.q_u_logdet = -tmp
|
|
||||||
self.q_u_mean = np.dot(self.q_u_cov,vb_param[:self.M*self.likelihood.D].reshape(self.M,self.likelihood.D))
|
|
||||||
|
|
||||||
self.q_u_expectation = (self.q_u_mean, np.dot(self.q_u_mean,self.q_u_mean.T)+self.q_u_cov*self.likelihood.D)
|
|
||||||
|
|
||||||
self.q_u_canonical = (np.dot(self.q_u_prec, self.q_u_mean),-0.5*self.q_u_prec)
|
|
||||||
#TODO: computations now?
|
|
||||||
|
|
||||||
def get_vb_param(self):
|
|
||||||
"""
|
|
||||||
Return the canonical parameters of the distribution q(u)
|
|
||||||
"""
|
|
||||||
return np.hstack([e.flatten() for e in self.q_u_canonical])
|
|
||||||
|
|
||||||
def vb_grad_natgrad(self):
|
|
||||||
"""
|
|
||||||
Compute the gradients of the lower bound wrt the canonical and
|
|
||||||
Expectation parameters of u.
|
|
||||||
|
|
||||||
Note that the natural gradient in either is given by the gradient in the other (See Hensman et al 2012 Fast Variational inference in the conjugate exponential Family)
|
|
||||||
"""
|
|
||||||
dL_dmmT_S = -0.5*self.Lambda-self.q_u_canonical[1]
|
|
||||||
dL_dm = np.dot(self.Kmmi,self.psi1V) - np.dot(self.Lambda,self.q_u_mean)
|
|
||||||
|
|
||||||
#dL_dSim =
|
|
||||||
#dL_dmhSi =
|
|
||||||
|
|
||||||
return np.hstack((dL_dm.flatten(),dL_dmmT_S.flatten())) # natgrad only, grad TODO
|
|
||||||
|
|
||||||
|
|
||||||
def plot(self, *args, **kwargs):
|
|
||||||
"""
|
|
||||||
add the distribution q(u) to the plot from sparse_GP
|
|
||||||
"""
|
|
||||||
sparse_GP.plot(self,*args,**kwargs)
|
|
||||||
if self.Q==1:
|
|
||||||
pb.errorbar(self.Z[:,0],self.q_u_expectation[0][:,0],yerr=2.*np.sqrt(np.diag(self.q_u_cov)),fmt=None,ecolor='b')
|
|
||||||
|
|
||||||
|
|
@ -14,7 +14,7 @@ from .. import likelihoods
|
||||||
from .. import kern
|
from .. import kern
|
||||||
|
|
||||||
class warpedGP(GP):
|
class warpedGP(GP):
|
||||||
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False, Xslices=None):
|
def __init__(self, X, Y, kernel=None, warping_function = None, warping_terms = 3, normalize_X=False, normalize_Y=False):
|
||||||
|
|
||||||
if kernel is None:
|
if kernel is None:
|
||||||
kernel = kern.rbf(X.shape[1])
|
kernel = kern.rbf(X.shape[1])
|
||||||
|
|
@ -29,7 +29,7 @@ class warpedGP(GP):
|
||||||
self.predict_in_warped_space = False
|
self.predict_in_warped_space = False
|
||||||
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
|
likelihood = likelihoods.Gaussian(self.transform_data(), normalize=normalize_Y)
|
||||||
|
|
||||||
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X, Xslices=Xslices)
|
GP.__init__(self, X, likelihood, kernel, normalize_X=normalize_X)
|
||||||
|
|
||||||
def _scale_data(self, Y):
|
def _scale_data(self, Y):
|
||||||
self._Ymax = Y.max()
|
self._Ymax = Y.max()
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,12 @@
|
||||||
|
"""
|
||||||
|
|
||||||
|
MaxZ
|
||||||
|
|
||||||
|
"""
|
||||||
|
import unittest
|
||||||
|
import sys
|
||||||
|
|
||||||
|
def deepTest(reason):
|
||||||
|
if 'deep' in sys.argv:
|
||||||
|
return lambda x:x
|
||||||
|
return unittest.skip("Not deep scanning, enable deepscan by adding 'deep' argument")
|
||||||
|
|
@ -60,7 +60,7 @@ class BGPLVMTests(unittest.TestCase):
|
||||||
|
|
||||||
#@unittest.skip('psi2 cross terms are NotImplemented for this combination')
|
#@unittest.skip('psi2 cross terms are NotImplemented for this combination')
|
||||||
def test_linear_bias_kern(self):
|
def test_linear_bias_kern(self):
|
||||||
N, M, Q, D = 10, 3, 2, 4
|
N, M, Q, D = 30, 5, 4, 30
|
||||||
X = np.random.rand(N, Q)
|
X = np.random.rand(N, Q)
|
||||||
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
K = k.K(X)
|
K = k.K(X)
|
||||||
|
|
|
||||||
112
GPy/testing/cgd_tests.py
Normal file
112
GPy/testing/cgd_tests.py
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
'''
|
||||||
|
Created on 26 Apr 2013
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
import unittest
|
||||||
|
import numpy
|
||||||
|
from GPy.inference.conjugate_gradient_descent import CGD, RUNNING
|
||||||
|
import pylab
|
||||||
|
import time
|
||||||
|
from scipy.optimize.optimize import rosen, rosen_der
|
||||||
|
|
||||||
|
|
||||||
|
class Test(unittest.TestCase):
|
||||||
|
|
||||||
|
def testMinimizeSquare(self):
|
||||||
|
N = 100
|
||||||
|
A = numpy.random.rand(N) * numpy.eye(N)
|
||||||
|
b = numpy.random.rand(N) * 0
|
||||||
|
f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
|
||||||
|
df = lambda x: numpy.dot(A, x) - b
|
||||||
|
|
||||||
|
opt = CGD()
|
||||||
|
|
||||||
|
restarts = 10
|
||||||
|
for _ in range(restarts):
|
||||||
|
try:
|
||||||
|
x0 = numpy.random.randn(N) * 300
|
||||||
|
res = opt.opt(f, df, x0, messages=0,
|
||||||
|
maxiter=1000, gtol=1e-10)
|
||||||
|
assert numpy.allclose(res[0], 0, atol=1e-3)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
# RESTART
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise AssertionError("Test failed for {} restarts".format(restarts))
|
||||||
|
|
||||||
|
def testRosen(self):
|
||||||
|
N = 20
|
||||||
|
f = rosen
|
||||||
|
df = rosen_der
|
||||||
|
|
||||||
|
opt = CGD()
|
||||||
|
|
||||||
|
restarts = 10
|
||||||
|
for _ in range(restarts):
|
||||||
|
try:
|
||||||
|
x0 = numpy.random.randn(N) * .5
|
||||||
|
res = opt.opt(f, df, x0, messages=0,
|
||||||
|
maxiter=5e2, gtol=1e-2)
|
||||||
|
assert numpy.allclose(res[0], 1, atol=.1)
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
# RESTART
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
raise AssertionError("Test failed for {} restarts".format(restarts))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# import sys;sys.argv = ['',
|
||||||
|
# 'Test.testMinimizeSquare',
|
||||||
|
# 'Test.testRosen',
|
||||||
|
# ]
|
||||||
|
# unittest.main()
|
||||||
|
|
||||||
|
N = 2
|
||||||
|
A = numpy.random.rand(N) * numpy.eye(N)
|
||||||
|
b = numpy.random.rand(N) * 0
|
||||||
|
# f = lambda x: numpy.dot(x.T.dot(A), x) - numpy.dot(x.T, b)
|
||||||
|
# df = lambda x: numpy.dot(A, x) - b
|
||||||
|
f = rosen
|
||||||
|
df = rosen_der
|
||||||
|
x0 = numpy.random.randn(N) * .5
|
||||||
|
|
||||||
|
opt = CGD()
|
||||||
|
|
||||||
|
fig = pylab.figure("cgd optimize")
|
||||||
|
if fig.axes:
|
||||||
|
ax = fig.axes[0]
|
||||||
|
ax.cla()
|
||||||
|
else:
|
||||||
|
ax = fig.add_subplot(111, projection='3d')
|
||||||
|
|
||||||
|
interpolation = 40
|
||||||
|
x, y = numpy.linspace(-1, 1, interpolation)[:, None], numpy.linspace(-1, 1, interpolation)[:, None]
|
||||||
|
X, Y = numpy.meshgrid(x, y)
|
||||||
|
fXY = numpy.array([f(numpy.array([x, y])) for x, y in zip(X.flatten(), Y.flatten())]).reshape(interpolation, interpolation)
|
||||||
|
|
||||||
|
ax.plot_wireframe(X, Y, fXY)
|
||||||
|
xopts = [x0.copy()]
|
||||||
|
optplts, = ax.plot3D([x0[0]], [x0[1]], zs=f(x0), marker='o', color='r')
|
||||||
|
|
||||||
|
raw_input("enter to start optimize")
|
||||||
|
res = [0]
|
||||||
|
|
||||||
|
def callback(*r):
|
||||||
|
xopts.append(r[0].copy())
|
||||||
|
# time.sleep(.3)
|
||||||
|
optplts._verts3d = [numpy.array(xopts)[:, 0], numpy.array(xopts)[:, 1], [f(xs) for xs in xopts]]
|
||||||
|
fig.canvas.draw()
|
||||||
|
if r[-1] != RUNNING:
|
||||||
|
res[0] = r
|
||||||
|
|
||||||
|
p, c = opt.opt_async(f, df, x0.copy(), callback, messages=True, maxiter=1000,
|
||||||
|
report_every=20, gtol=1e-12)
|
||||||
|
|
||||||
|
|
||||||
|
pylab.ion()
|
||||||
|
pylab.show()
|
||||||
|
|
||||||
|
pass
|
||||||
112
GPy/testing/kern_psi_stat_tests.py
Normal file
112
GPy/testing/kern_psi_stat_tests.py
Normal file
|
|
@ -0,0 +1,112 @@
|
||||||
|
'''
|
||||||
|
Created on 26 Apr 2013
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
import unittest
|
||||||
|
import GPy
|
||||||
|
import numpy as np
|
||||||
|
import sys
|
||||||
|
from .. import testing
|
||||||
|
|
||||||
|
__test__ = True
|
||||||
|
np.random.seed(0)
|
||||||
|
|
||||||
|
def ard(p):
|
||||||
|
try:
|
||||||
|
if p.ARD:
|
||||||
|
return "ARD"
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
return ""
|
||||||
|
|
||||||
|
@testing.deepTest
|
||||||
|
class Test(unittest.TestCase):
|
||||||
|
D = 9
|
||||||
|
M = 4
|
||||||
|
N = 3
|
||||||
|
Nsamples = 6e6
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.kerns = (
|
||||||
|
GPy.kern.rbf(self.D), GPy.kern.rbf(self.D, ARD=True),
|
||||||
|
GPy.kern.linear(self.D, ARD=False), GPy.kern.linear(self.D, ARD=True),
|
||||||
|
GPy.kern.linear(self.D) + GPy.kern.bias(self.D),
|
||||||
|
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D),
|
||||||
|
GPy.kern.linear(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
|
||||||
|
GPy.kern.rbf(self.D) + GPy.kern.bias(self.D) + GPy.kern.white(self.D),
|
||||||
|
GPy.kern.bias(self.D), GPy.kern.white(self.D),
|
||||||
|
)
|
||||||
|
self.q_x_mean = np.random.randn(self.D)
|
||||||
|
self.q_x_variance = np.exp(np.random.randn(self.D))
|
||||||
|
self.q_x_samples = np.random.randn(self.Nsamples, self.D) * np.sqrt(self.q_x_variance) + self.q_x_mean
|
||||||
|
self.Z = np.random.randn(self.M, self.D)
|
||||||
|
self.q_x_mean.shape = (1, self.D)
|
||||||
|
self.q_x_variance.shape = (1, self.D)
|
||||||
|
|
||||||
|
def test_psi0(self):
|
||||||
|
for kern in self.kerns:
|
||||||
|
psi0 = kern.psi0(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
|
Kdiag = kern.Kdiag(self.q_x_samples)
|
||||||
|
self.assertAlmostEqual(psi0, np.mean(Kdiag), 1)
|
||||||
|
# print kern.parts[0].name, np.allclose(psi0, np.mean(Kdiag))
|
||||||
|
|
||||||
|
def test_psi1(self):
|
||||||
|
for kern in self.kerns:
|
||||||
|
Nsamples = 100
|
||||||
|
psi1 = kern.psi1(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
|
K_ = np.zeros((Nsamples, self.M))
|
||||||
|
diffs = []
|
||||||
|
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
|
||||||
|
K = kern.K(q_x_sample_stripe, self.Z)
|
||||||
|
K_ += K
|
||||||
|
diffs.append(((psi1 - (K_ / (i + 1)))).mean())
|
||||||
|
K_ /= self.Nsamples / Nsamples
|
||||||
|
msg = "psi1: " + "+".join([p.name + ard(p) for p in kern.parts])
|
||||||
|
try:
|
||||||
|
# pylab.figure(msg)
|
||||||
|
# pylab.plot(diffs)
|
||||||
|
self.assertTrue(np.allclose(psi1.squeeze(), K_,
|
||||||
|
rtol=1e-1, atol=.1),
|
||||||
|
msg=msg + ": not matching")
|
||||||
|
# sys.stdout.write(".")
|
||||||
|
except:
|
||||||
|
# import ipdb;ipdb.set_trace()
|
||||||
|
# kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
|
# sys.stdout.write("E") # msg + ": not matching"
|
||||||
|
pass
|
||||||
|
|
||||||
|
def test_psi2(self):
|
||||||
|
for kern in self.kerns:
|
||||||
|
Nsamples = 100
|
||||||
|
psi2 = kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
|
K_ = np.zeros((self.M, self.M))
|
||||||
|
diffs = []
|
||||||
|
for i, q_x_sample_stripe in enumerate(np.array_split(self.q_x_samples, self.Nsamples / Nsamples)):
|
||||||
|
K = kern.K(q_x_sample_stripe, self.Z)
|
||||||
|
K = (K[:, :, None] * K[:, None, :]).mean(0)
|
||||||
|
K_ += K
|
||||||
|
diffs.append(((psi2 - (K_ / (i + 1)))).mean())
|
||||||
|
K_ /= self.Nsamples / Nsamples
|
||||||
|
msg = "psi2: {}".format("+".join([p.name + ard(p) for p in kern.parts]))
|
||||||
|
try:
|
||||||
|
# pylab.figure(msg)
|
||||||
|
# pylab.plot(diffs)
|
||||||
|
self.assertTrue(np.allclose(psi2.squeeze(), K_,
|
||||||
|
rtol=1e-1, atol=.1),
|
||||||
|
msg=msg + ": not matching")
|
||||||
|
# sys.stdout.write(".")
|
||||||
|
except:
|
||||||
|
# import ipdb;ipdb.set_trace()
|
||||||
|
# kern.psi2(self.Z, self.q_x_mean, self.q_x_variance)
|
||||||
|
# sys.stdout.write("E")
|
||||||
|
print msg + ": not matching"
|
||||||
|
pass
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys;sys.argv = ['',
|
||||||
|
'Test.test_psi0',
|
||||||
|
'Test.test_psi1',
|
||||||
|
'Test.test_psi2',
|
||||||
|
]
|
||||||
|
unittest.main()
|
||||||
157
GPy/testing/psi_stat_tests.py
Normal file
157
GPy/testing/psi_stat_tests.py
Normal file
|
|
@ -0,0 +1,157 @@
|
||||||
|
'''
|
||||||
|
Created on 22 Apr 2013
|
||||||
|
|
||||||
|
@author: maxz
|
||||||
|
'''
|
||||||
|
import unittest
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
import GPy
|
||||||
|
import itertools
|
||||||
|
from GPy.core import model
|
||||||
|
|
||||||
|
class PsiStatModel(model):
|
||||||
|
def __init__(self, which, X, X_variance, Z, M, kernel):
|
||||||
|
self.which = which
|
||||||
|
self.X = X
|
||||||
|
self.X_variance = X_variance
|
||||||
|
self.Z = Z
|
||||||
|
self.N, self.Q = X.shape
|
||||||
|
self.M, Q = Z.shape
|
||||||
|
assert self.Q == Q, "shape missmatch: Z:{!s} X:{!s}".format(Z.shape, X.shape)
|
||||||
|
self.kern = kernel
|
||||||
|
super(PsiStatModel, self).__init__()
|
||||||
|
self.psi_ = self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance)
|
||||||
|
def _get_param_names(self):
|
||||||
|
Xnames = ["{}_{}_{}".format(what, i, j) for what, i, j in itertools.product(['X', 'X_variance'], range(self.N), range(self.Q))]
|
||||||
|
Znames = ["Z_{}_{}".format(i, j) for i, j in itertools.product(range(self.M), range(self.Q))]
|
||||||
|
return Xnames + Znames + self.kern._get_param_names()
|
||||||
|
def _get_params(self):
|
||||||
|
return numpy.hstack([self.X.flatten(), self.X_variance.flatten(), self.Z.flatten(), self.kern._get_params()])
|
||||||
|
def _set_params(self, x, save_old=True, save_count=0):
|
||||||
|
start, end = 0, self.X.size
|
||||||
|
self.X = x[start:end].reshape(self.N, self.Q)
|
||||||
|
start, end = end, end + self.X_variance.size
|
||||||
|
self.X_variance = x[start: end].reshape(self.N, self.Q)
|
||||||
|
start, end = end, end + self.Z.size
|
||||||
|
self.Z = x[start: end].reshape(self.M, self.Q)
|
||||||
|
self.kern._set_params(x[end:])
|
||||||
|
def log_likelihood(self):
|
||||||
|
return self.kern.__getattribute__(self.which)(self.Z, self.X, self.X_variance).sum()
|
||||||
|
def _log_likelihood_gradients(self):
|
||||||
|
psimu, psiS = self.kern.__getattribute__("d" + self.which + "_dmuS")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
||||||
|
try:
|
||||||
|
psiZ = self.kern.__getattribute__("d" + self.which + "_dZ")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance)
|
||||||
|
except AttributeError:
|
||||||
|
psiZ = numpy.zeros(self.M * self.Q)
|
||||||
|
thetagrad = self.kern.__getattribute__("d" + self.which + "_dtheta")(numpy.ones_like(self.psi_), self.Z, self.X, self.X_variance).flatten()
|
||||||
|
return numpy.hstack((psimu.flatten(), psiS.flatten(), psiZ.flatten(), thetagrad))
|
||||||
|
|
||||||
|
class DPsiStatTest(unittest.TestCase):
|
||||||
|
Q = 5
|
||||||
|
N = 50
|
||||||
|
M = 10
|
||||||
|
D = 20
|
||||||
|
X = numpy.random.randn(N, Q)
|
||||||
|
X_var = .5 * numpy.ones_like(X) + .4 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
|
||||||
|
Z = numpy.random.permutation(X)[:M]
|
||||||
|
Y = X.dot(numpy.random.randn(Q, D))
|
||||||
|
# kernels = [GPy.kern.linear(Q, ARD=True, variances=numpy.random.rand(Q)), GPy.kern.rbf(Q, ARD=True), GPy.kern.bias(Q)]
|
||||||
|
|
||||||
|
kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
|
||||||
|
GPy.kern.linear(Q) + GPy.kern.bias(Q),
|
||||||
|
GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
|
||||||
|
|
||||||
|
def testPsi0(self):
|
||||||
|
for k in self.kernels:
|
||||||
|
m = PsiStatModel('psi0', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
|
M=self.M, kernel=k)
|
||||||
|
try:
|
||||||
|
assert m.checkgrad(), "{} x psi0".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
except:
|
||||||
|
import ipdb;ipdb.set_trace()
|
||||||
|
|
||||||
|
# def testPsi1(self):
|
||||||
|
# for k in self.kernels:
|
||||||
|
# m = PsiStatModel('psi1', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
|
# M=self.M, kernel=k)
|
||||||
|
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
|
||||||
|
def testPsi2_lin(self):
|
||||||
|
k = self.kernels[0]
|
||||||
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
|
M=self.M, kernel=k)
|
||||||
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
def testPsi2_lin_bia(self):
|
||||||
|
k = self.kernels[3]
|
||||||
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
|
M=self.M, kernel=k)
|
||||||
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
def testPsi2_rbf(self):
|
||||||
|
k = self.kernels[1]
|
||||||
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
|
M=self.M, kernel=k)
|
||||||
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
def testPsi2_rbf_bia(self):
|
||||||
|
k = self.kernels[-1]
|
||||||
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
|
M=self.M, kernel=k)
|
||||||
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
def testPsi2_bia(self):
|
||||||
|
k = self.kernels[2]
|
||||||
|
m = PsiStatModel('psi2', X=self.X, X_variance=self.X_var, Z=self.Z,
|
||||||
|
M=self.M, kernel=k)
|
||||||
|
assert m.checkgrad(), "{} x psi2".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
interactive = 'i' in sys.argv
|
||||||
|
if interactive:
|
||||||
|
# N, M, Q, D = 30, 5, 4, 30
|
||||||
|
# X = numpy.random.rand(N, Q)
|
||||||
|
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
|
# K = k.K(X)
|
||||||
|
# Y = numpy.random.multivariate_normal(numpy.zeros(N), K, D).T
|
||||||
|
# Y -= Y.mean(axis=0)
|
||||||
|
# k = GPy.kern.linear(Q) + GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
|
||||||
|
# m = GPy.models.Bayesian_GPLVM(Y, Q, kernel=k, M=M)
|
||||||
|
# m.ensure_default_constraints()
|
||||||
|
# m.randomize()
|
||||||
|
# # self.assertTrue(m.checkgrad())
|
||||||
|
numpy.random.seed(0)
|
||||||
|
Q = 5
|
||||||
|
N = 50
|
||||||
|
M = 10
|
||||||
|
D = 15
|
||||||
|
X = numpy.random.randn(N, Q)
|
||||||
|
X_var = .5 * numpy.ones_like(X) + .1 * numpy.clip(numpy.random.randn(*X.shape), 0, 1)
|
||||||
|
Z = numpy.random.permutation(X)[:M]
|
||||||
|
Y = X.dot(numpy.random.randn(Q, D))
|
||||||
|
# kernel = GPy.kern.bias(Q)
|
||||||
|
#
|
||||||
|
# kernels = [GPy.kern.linear(Q), GPy.kern.rbf(Q), GPy.kern.bias(Q),
|
||||||
|
# GPy.kern.linear(Q) + GPy.kern.bias(Q),
|
||||||
|
# GPy.kern.rbf(Q) + GPy.kern.bias(Q)]
|
||||||
|
|
||||||
|
# for k in kernels:
|
||||||
|
# m = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
|
||||||
|
# M=M, kernel=k)
|
||||||
|
# assert m.checkgrad(), "{} x psi1".format("+".join(map(lambda x: x.name, k.parts)))
|
||||||
|
#
|
||||||
|
# m0 = PsiStatModel('psi0', X=X, X_variance=X_var, Z=Z,
|
||||||
|
# M=M, kernel=GPy.kern.linear(Q))
|
||||||
|
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
|
||||||
|
# M=M, kernel=kernel)
|
||||||
|
# m1 = PsiStatModel('psi1', X=X, X_variance=X_var, Z=Z,
|
||||||
|
# M=M, kernel=kernel)
|
||||||
|
# m2 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
|
# M=M, kernel=GPy.kern.rbf(Q))
|
||||||
|
m3 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
|
M=M, kernel=GPy.kern.linear(Q, ARD=True, variances=numpy.random.rand(Q)))
|
||||||
|
m3.ensure_default_constraints()
|
||||||
|
# + GPy.kern.bias(Q))
|
||||||
|
# m4 = PsiStatModel('psi2', X=X, X_variance=X_var, Z=Z,
|
||||||
|
# M=M, kernel=GPy.kern.rbf(Q) + GPy.kern.bias(Q))
|
||||||
|
else:
|
||||||
|
unittest.main()
|
||||||
|
|
@ -112,6 +112,16 @@ class GradientTests(unittest.TestCase):
|
||||||
bias = GPy.kern.bias(2)
|
bias = GPy.kern.bias(2)
|
||||||
self.check_model_with_white(bias, model_type='GP_regression', dimension=2)
|
self.check_model_with_white(bias, model_type='GP_regression', dimension=2)
|
||||||
|
|
||||||
|
def test_GP_regression_linear_kern_1D_ARD(self):
|
||||||
|
''' Testing the GP regression with linear kernel on 1d data '''
|
||||||
|
linear = GPy.kern.linear(1,ARD=True)
|
||||||
|
self.check_model_with_white(linear, model_type='GP_regression', dimension=1)
|
||||||
|
|
||||||
|
def test_GP_regression_linear_kern_2D_ARD(self):
|
||||||
|
''' Testing the GP regression with linear kernel on 2d data '''
|
||||||
|
linear = GPy.kern.linear(2,ARD=True)
|
||||||
|
self.check_model_with_white(linear, model_type='GP_regression', dimension=2)
|
||||||
|
|
||||||
def test_GP_regression_linear_kern_1D(self):
|
def test_GP_regression_linear_kern_1D(self):
|
||||||
''' Testing the GP regression with linear kernel on 1d data '''
|
''' Testing the GP regression with linear kernel on 1d data '''
|
||||||
linear = GPy.kern.linear(1)
|
linear = GPy.kern.linear(1)
|
||||||
|
|
|
||||||
|
|
@ -4,14 +4,14 @@ import numpy as np
|
||||||
import GPy
|
import GPy
|
||||||
import scipy.sparse
|
import scipy.sparse
|
||||||
import scipy.io
|
import scipy.io
|
||||||
data_path = os.path.join(os.path.dirname(__file__),'datasets')
|
data_path = os.path.join(os.path.dirname(__file__), 'datasets')
|
||||||
default_seed =10000
|
default_seed = 10000
|
||||||
|
|
||||||
# Some general utilities.
|
# Some general utilities.
|
||||||
def sample_class(f):
|
def sample_class(f):
|
||||||
p = 1./(1.+np.exp(-f))
|
p = 1. / (1. + np.exp(-f))
|
||||||
c = np.random.binomial(1,p)
|
c = np.random.binomial(1, p)
|
||||||
c = np.where(c,1,-1)
|
c = np.where(c, 1, -1)
|
||||||
return c
|
return c
|
||||||
|
|
||||||
def della_gatta_TRP63_gene_expression(gene_number=None):
|
def della_gatta_TRP63_gene_expression(gene_number=None):
|
||||||
|
|
@ -25,6 +25,15 @@ def della_gatta_TRP63_gene_expression(gene_number=None):
|
||||||
Y = Y[:, None]
|
Y = Y[:, None]
|
||||||
return {'X': X, 'Y': Y, 'info': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA."}
|
return {'X': X, 'Y': Y, 'info': "The full gene expression data set from della Gatta et al (http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2413161/) processed by RMA."}
|
||||||
|
|
||||||
|
def simulation_BGPLVM():
|
||||||
|
mat_data = scipy.io.loadmat(os.path.join(data_path, 'BGPLVMSimulation.mat'))
|
||||||
|
Y = np.array(mat_data['Y'], dtype=float)
|
||||||
|
S = np.array(mat_data['initS'], dtype=float)
|
||||||
|
mu = np.array(mat_data['initMu'], dtype=float)
|
||||||
|
return {'Y': Y, 'S': S,
|
||||||
|
'mu' : mu,
|
||||||
|
'info': "Simulated test dataset generated in MATLAB to compare BGPLVM between python and MATLAB"}
|
||||||
|
|
||||||
|
|
||||||
# The data sets
|
# The data sets
|
||||||
def oil():
|
def oil():
|
||||||
|
|
@ -32,7 +41,7 @@ def oil():
|
||||||
X = np.fromfile(fid, sep='\t').reshape((-1, 12))
|
X = np.fromfile(fid, sep='\t').reshape((-1, 12))
|
||||||
fid.close()
|
fid.close()
|
||||||
fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt'))
|
fid = open(os.path.join(data_path, 'oil', 'DataTrnLbls.txt'))
|
||||||
Y = np.fromfile(fid, sep='\t').reshape((-1, 3))*2.-1.
|
Y = np.fromfile(fid, sep='\t').reshape((-1, 3)) * 2. - 1.
|
||||||
fid.close()
|
fid.close()
|
||||||
return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."}
|
return {'X': X, 'Y': Y, 'info': "The oil data from Bishop and James (1993)."}
|
||||||
|
|
||||||
|
|
@ -74,9 +83,9 @@ def silhouette():
|
||||||
inMean = np.mean(mat_data['Y'])
|
inMean = np.mean(mat_data['Y'])
|
||||||
inScales = np.sqrt(np.var(mat_data['Y']))
|
inScales = np.sqrt(np.var(mat_data['Y']))
|
||||||
X = mat_data['Y'] - inMean
|
X = mat_data['Y'] - inMean
|
||||||
X = X/inScales
|
X = X / inScales
|
||||||
Xtest = mat_data['Y_test'] - inMean
|
Xtest = mat_data['Y_test'] - inMean
|
||||||
Xtest = Xtest/inScales
|
Xtest = Xtest / inScales
|
||||||
Y = mat_data['Z']
|
Y = mat_data['Z']
|
||||||
Ytest = mat_data['Z_test']
|
Ytest = mat_data['Z_test']
|
||||||
return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Artificial silhouette simulation data developed from Agarwal and Triggs (2004)."}
|
return {'X': X, 'Y': Y, 'Xtest': Xtest, 'Ytest': Ytest, 'info': "Artificial silhouette simulation data developed from Agarwal and Triggs (2004)."}
|
||||||
|
|
@ -102,13 +111,13 @@ def toy_rbf_1d(seed=default_seed):
|
||||||
np.random.seed(seed=seed)
|
np.random.seed(seed=seed)
|
||||||
numIn = 1
|
numIn = 1
|
||||||
N = 500
|
N = 500
|
||||||
X = np.random.uniform(low=-1.0, high=1.0, size=(N, numIn))
|
X = np.random.uniform(low= -1.0, high=1.0, size=(N, numIn))
|
||||||
X.sort(axis=0)
|
X.sort(axis=0)
|
||||||
rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
|
rbf = GPy.kern.rbf(numIn, variance=1., lengthscale=np.array((0.25,)))
|
||||||
white = GPy.kern.white(numIn, variance=1e-2)
|
white = GPy.kern.white(numIn, variance=1e-2)
|
||||||
kernel = rbf + white
|
kernel = rbf + white
|
||||||
K = kernel.K(X)
|
K = kernel.K(X)
|
||||||
y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N,1))
|
y = np.reshape(np.random.multivariate_normal(np.zeros(N), K), (N, 1))
|
||||||
return {'X':X, 'Y':y, 'info': "Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1."}
|
return {'X':X, 'Y':y, 'info': "Samples 500 values of a function from an RBF covariance with very small noise for inputs uniformly distributed between -1 and 1."}
|
||||||
|
|
||||||
def toy_rbf_1d_50(seed=default_seed):
|
def toy_rbf_1d_50(seed=default_seed):
|
||||||
|
|
@ -124,15 +133,15 @@ def toy_rbf_1d_50(seed=default_seed):
|
||||||
|
|
||||||
def toy_linear_1d_classification(seed=default_seed):
|
def toy_linear_1d_classification(seed=default_seed):
|
||||||
np.random.seed(seed=seed)
|
np.random.seed(seed=seed)
|
||||||
x1 = np.random.normal(-3,5,20)
|
x1 = np.random.normal(-3, 5, 20)
|
||||||
x2 = np.random.normal(3,5,20)
|
x2 = np.random.normal(3, 5, 20)
|
||||||
X = (np.r_[x1,x2])[:,None]
|
X = (np.r_[x1, x2])[:, None]
|
||||||
return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X}
|
return {'X': X, 'Y': sample_class(2.*X), 'F': 2.*X}
|
||||||
|
|
||||||
def rogers_girolami_olympics():
|
def rogers_girolami_olympics():
|
||||||
olympic_data = scipy.io.loadmat(os.path.join(data_path, 'olympics.mat'))['male100']
|
olympic_data = scipy.io.loadmat(os.path.join(data_path, 'olympics.mat'))['male100']
|
||||||
X = olympic_data[:, 0][:, None]
|
X = olympic_data[:, 0][:, None]
|
||||||
Y= olympic_data[:, 1][:, None]
|
Y = olympic_data[:, 1][:, None]
|
||||||
return {'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}
|
return {'X': X, 'Y': Y, 'info': "Olympic sprint times for 100 m men from 1896 until 2008. Example is from Rogers and Girolami's First Course in Machine Learning."}
|
||||||
# def movielens_small(partNo=1,seed=default_seed):
|
# def movielens_small(partNo=1,seed=default_seed):
|
||||||
# np.random.seed(seed=seed)
|
# np.random.seed(seed=seed)
|
||||||
|
|
@ -169,7 +178,7 @@ def rogers_girolami_olympics():
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def crescent_data(num_data=200,seed=default_seed):
|
def crescent_data(num_data=200, seed=default_seed):
|
||||||
"""Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
|
"""Data set formed from a mixture of four Gaussians. In each class two of the Gaussians are elongated at right angles to each other and offset to form an approximation to the crescent data that is popular in semi-supervised learning as a toy problem.
|
||||||
:param num_data_part: number of data to be sampled (default is 200).
|
:param num_data_part: number of data to be sampled (default is 200).
|
||||||
:type num_data: int
|
:type num_data: int
|
||||||
|
|
@ -178,7 +187,7 @@ def crescent_data(num_data=200,seed=default_seed):
|
||||||
np.random.seed(seed=seed)
|
np.random.seed(seed=seed)
|
||||||
sqrt2 = np.sqrt(2)
|
sqrt2 = np.sqrt(2)
|
||||||
# Rotation matrix
|
# Rotation matrix
|
||||||
R = np.array([[sqrt2/2, -sqrt2/2], [sqrt2/2, sqrt2/2]])
|
R = np.array([[sqrt2 / 2, -sqrt2 / 2], [sqrt2 / 2, sqrt2 / 2]])
|
||||||
# Scaling matrices
|
# Scaling matrices
|
||||||
scales = []
|
scales = []
|
||||||
scales.append(np.array([[3, 0], [0, 1]]))
|
scales.append(np.array([[3, 0], [0, 1]]))
|
||||||
|
|
@ -195,9 +204,9 @@ def crescent_data(num_data=200,seed=default_seed):
|
||||||
num_data_part = []
|
num_data_part = []
|
||||||
num_data_total = 0
|
num_data_total = 0
|
||||||
for i in range(0, 4):
|
for i in range(0, 4):
|
||||||
num_data_part.append(round(((i+1)*num_data)/4.))
|
num_data_part.append(round(((i + 1) * num_data) / 4.))
|
||||||
num_data_part[i] -= num_data_total
|
num_data_part[i] -= num_data_total
|
||||||
#print num_data_part[i]
|
# print num_data_part[i]
|
||||||
part = np.random.normal(size=(num_data_part[i], 2))
|
part = np.random.normal(size=(num_data_part[i], 2))
|
||||||
part = np.dot(np.dot(part, scales[i]), R) + means[i]
|
part = np.dot(np.dot(part, scales[i]), R) + means[i]
|
||||||
Xparts.append(part)
|
Xparts.append(part)
|
||||||
|
|
@ -205,15 +214,103 @@ def crescent_data(num_data=200,seed=default_seed):
|
||||||
X = np.vstack((Xparts[0], Xparts[1], Xparts[2], Xparts[3]))
|
X = np.vstack((Xparts[0], Xparts[1], Xparts[2], Xparts[3]))
|
||||||
|
|
||||||
|
|
||||||
Y = np.vstack((np.ones((num_data_part[0]+num_data_part[1], 1)), -np.ones((num_data_part[2]+num_data_part[3], 1))))
|
Y = np.vstack((np.ones((num_data_part[0] + num_data_part[1], 1)), -np.ones((num_data_part[2] + num_data_part[3], 1))))
|
||||||
return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}
|
return {'X':X, 'Y':Y, 'info': "Two separate classes of data formed approximately in the shape of two crescents."}
|
||||||
|
|
||||||
|
|
||||||
def creep_data():
|
def creep_data():
|
||||||
all_data = np.loadtxt(os.path.join(data_path, 'creep', 'taka'))
|
all_data = np.loadtxt(os.path.join(data_path, 'creep', 'taka'))
|
||||||
y = all_data[:, 1:2].copy()
|
y = all_data[:, 1:2].copy()
|
||||||
features = [0]
|
features = [0]
|
||||||
features.extend(range(2, 31))
|
features.extend(range(2, 31))
|
||||||
X = all_data[:,features].copy()
|
X = all_data[:, features].copy()
|
||||||
return {'X': X, 'y' : y}
|
return {'X': X, 'y' : y}
|
||||||
|
|
||||||
|
def cmu_mocap_49_balance():
|
||||||
|
"""Load CMU subject 49's one legged balancing motion that was used by Alvarez, Luengo and Lawrence at AISTATS 2009."""
|
||||||
|
train_motions = ['18', '19']
|
||||||
|
test_motions = ['20']
|
||||||
|
data = cmu_mocap('49', train_motions, test_motions, sample_every=4)
|
||||||
|
data['info'] = "One legged balancing motions from CMU data base subject 49. As used in Alvarez, Luengo and Lawrence at AISTATS 2009. It consists of " + data['info']
|
||||||
|
return data
|
||||||
|
|
||||||
|
def cmu_mocap_35_walk_jog():
|
||||||
|
"""Load CMU subject 35's walking and jogging motions, the same data that was used by Taylor, Roweis and Hinton at NIPS 2007. but without their preprocessing. Also used by Lawrence at AISTATS 2007."""
|
||||||
|
train_motions = ['01', '02', '03', '04', '05', '06',
|
||||||
|
'07', '08', '09', '10', '11', '12',
|
||||||
|
'13', '14', '15', '16', '17', '19',
|
||||||
|
'20', '21', '22', '23', '24', '25',
|
||||||
|
'26', '28', '30', '31', '32', '33', '34']
|
||||||
|
test_motions = ['18', '29']
|
||||||
|
data = cmu_mocap('35', train_motions, test_motions, sample_every=4)
|
||||||
|
data['info'] = "Walk and jog data from CMU data base subject 35. As used in Tayor, Roweis and Hinton at NIPS 2007, but without their pre-processing (i.e. as used by Lawrence at AISTATS 2007). It consists of " + data['info']
|
||||||
|
return data
|
||||||
|
|
||||||
|
def cmu_mocap(subject, train_motions, test_motions=[], sample_every=4):
|
||||||
|
"""Load a given subject's training and test motions from the CMU motion capture data."""
|
||||||
|
|
||||||
|
# Load in subject skeleton.
|
||||||
|
subject_dir = os.path.join(data_path, 'mocap', 'cmu', subject)
|
||||||
|
skel = GPy.util.mocap.acclaim_skeleton(os.path.join(subject_dir, subject + '.asf'))
|
||||||
|
|
||||||
|
# Set up labels for each sequence
|
||||||
|
exlbls = np.eye(len(train_motions))
|
||||||
|
|
||||||
|
# Load sequences
|
||||||
|
tot_length = 0
|
||||||
|
temp_Y = []
|
||||||
|
temp_lbls = []
|
||||||
|
for i in range(len(train_motions)):
|
||||||
|
temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + train_motions[i] + '.amc'))
|
||||||
|
temp_Y.append(temp_chan[::sample_every, :])
|
||||||
|
temp_lbls.append(np.tile(exlbls[i, :], (temp_Y[i].shape[0], 1)))
|
||||||
|
tot_length += temp_Y[i].shape[0]
|
||||||
|
|
||||||
|
Y = np.zeros((tot_length, temp_Y[0].shape[1]))
|
||||||
|
lbls = np.zeros((tot_length, temp_lbls[0].shape[1]))
|
||||||
|
|
||||||
|
end_ind = 0
|
||||||
|
for i in range(len(temp_Y)):
|
||||||
|
start_ind = end_ind
|
||||||
|
end_ind += temp_Y[i].shape[0]
|
||||||
|
Y[start_ind:end_ind, :] = temp_Y[i]
|
||||||
|
lbls[start_ind:end_ind, :] = temp_lbls[i]
|
||||||
|
if len(test_motions)>0:
|
||||||
|
temp_Ytest = []
|
||||||
|
temp_lblstest = []
|
||||||
|
|
||||||
|
testexlbls = np.eye(len(test_motions))
|
||||||
|
tot_test_length = 0
|
||||||
|
for i in range(len(test_motions)):
|
||||||
|
temp_chan = skel.load_channels(os.path.join(subject_dir, subject + '_' + test_motions[i] + '.amc'))
|
||||||
|
temp_Ytest.append(temp_chan[::sample_every, :])
|
||||||
|
temp_lblstest.append(np.tile(testexlbls[i, :], (temp_Ytest[i].shape[0], 1)))
|
||||||
|
tot_test_length += temp_Ytest[i].shape[0]
|
||||||
|
|
||||||
|
# Load test data
|
||||||
|
Ytest = np.zeros((tot_test_length, temp_Ytest[0].shape[1]))
|
||||||
|
lblstest = np.zeros((tot_test_length, temp_lblstest[0].shape[1]))
|
||||||
|
|
||||||
|
end_ind = 0
|
||||||
|
for i in range(len(temp_Ytest)):
|
||||||
|
start_ind = end_ind
|
||||||
|
end_ind += temp_Ytest[i].shape[0]
|
||||||
|
Ytest[start_ind:end_ind, :] = temp_Ytest[i]
|
||||||
|
lblstest[start_ind:end_ind, :] = temp_lblstest[i]
|
||||||
|
else:
|
||||||
|
Ytest = None
|
||||||
|
lblstest = None
|
||||||
|
|
||||||
|
info = 'Subject: ' + subject + '. Training motions: '
|
||||||
|
for motion in train_motions:
|
||||||
|
info += motion + ', '
|
||||||
|
info = info[:-2]
|
||||||
|
if len(test_motions)>0:
|
||||||
|
info += '. Test motions: '
|
||||||
|
for motion in test_motions:
|
||||||
|
info += motion + ', '
|
||||||
|
info = info[:-2] + '.'
|
||||||
|
else:
|
||||||
|
info += '.'
|
||||||
|
if sample_every != 1:
|
||||||
|
info += ' Data is sub-sampled to every ' + str(sample_every) + ' frames.'
|
||||||
|
return {'Y': Y, 'lbls' : lbls, 'Ytest': Ytest, 'lblstest' : lblstest, 'info': info, 'skel': skel}
|
||||||
|
|
|
||||||
BIN
GPy/util/datasets/BGPLVMSimulation.mat
Normal file
BIN
GPy/util/datasets/BGPLVMSimulation.mat
Normal file
Binary file not shown.
|
|
@ -1,9 +1,12 @@
|
||||||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||||
|
|
||||||
|
#tdot function courtesy of Ian Murray:
|
||||||
|
# Iain Murray, April 2013. iain contactable via iainmurray.net
|
||||||
|
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot.py
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from scipy import linalg, optimize
|
from scipy import linalg, optimize, weave
|
||||||
import pylab as pb
|
import pylab as pb
|
||||||
import Tango
|
import Tango
|
||||||
import sys
|
import sys
|
||||||
|
|
@ -11,9 +14,17 @@ import re
|
||||||
import pdb
|
import pdb
|
||||||
import cPickle
|
import cPickle
|
||||||
import types
|
import types
|
||||||
|
import ctypes
|
||||||
|
from ctypes import byref, c_char, c_int, c_double # TODO
|
||||||
#import scipy.lib.lapack.flapack
|
#import scipy.lib.lapack.flapack
|
||||||
import scipy as sp
|
import scipy as sp
|
||||||
|
|
||||||
|
try:
|
||||||
|
_blaslib = ctypes.cdll.LoadLibrary(np.core._dotblas.__file__)
|
||||||
|
_blas_available = True
|
||||||
|
except:
|
||||||
|
_blas_available = False
|
||||||
|
|
||||||
def trace_dot(a,b):
|
def trace_dot(a,b):
|
||||||
"""
|
"""
|
||||||
efficiently compute the trace of the matrix product of a and b
|
efficiently compute the trace of the matrix product of a and b
|
||||||
|
|
@ -61,7 +72,7 @@ def jitchol(A,maxtries=5):
|
||||||
raise linalg.LinAlgError, "not pd: negative diagonal elements"
|
raise linalg.LinAlgError, "not pd: negative diagonal elements"
|
||||||
jitter= diagA.mean()*1e-6
|
jitter= diagA.mean()*1e-6
|
||||||
for i in range(1,maxtries+1):
|
for i in range(1,maxtries+1):
|
||||||
print '\rWarning: adding jitter of {:.10e} '.format(jitter),
|
print 'Warning: adding jitter of {:.10e}'.format(jitter)
|
||||||
try:
|
try:
|
||||||
return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
|
return linalg.cholesky(A+np.eye(A.shape[0]).T*jitter, lower = True)
|
||||||
except:
|
except:
|
||||||
|
|
@ -97,7 +108,7 @@ def jitchol_old(A,maxtries=5):
|
||||||
|
|
||||||
raise linalg.LinAlgError,"not positive definite, even with jitter."
|
raise linalg.LinAlgError,"not positive definite, even with jitter."
|
||||||
|
|
||||||
def pdinv(A):
|
def pdinv(A, *args):
|
||||||
"""
|
"""
|
||||||
:param A: A DxD pd numpy array
|
:param A: A DxD pd numpy array
|
||||||
|
|
||||||
|
|
@ -110,7 +121,7 @@ def pdinv(A):
|
||||||
:rval logdet: the log of the determinant of A
|
:rval logdet: the log of the determinant of A
|
||||||
:rtype logdet: float64
|
:rtype logdet: float64
|
||||||
"""
|
"""
|
||||||
L = jitchol(A)
|
L = jitchol(A, *args)
|
||||||
logdet = 2.*np.sum(np.log(np.diag(L)))
|
logdet = 2.*np.sum(np.log(np.diag(L)))
|
||||||
Li = chol_inv(L)
|
Li = chol_inv(L)
|
||||||
Ai = linalg.lapack.flapack.dpotri(L)[0]
|
Ai = linalg.lapack.flapack.dpotri(L)[0]
|
||||||
|
|
@ -175,3 +186,120 @@ def PCA(Y, Q):
|
||||||
X /= v;
|
X /= v;
|
||||||
W *= v;
|
W *= v;
|
||||||
return X, W.T
|
return X, W.T
|
||||||
|
|
||||||
|
|
||||||
|
def tdot_numpy(mat,out=None):
|
||||||
|
return np.dot(mat,mat.T,out)
|
||||||
|
|
||||||
|
def tdot_blas(mat, out=None):
|
||||||
|
"""returns np.dot(mat, mat.T), but faster for large 2D arrays of doubles."""
|
||||||
|
if (mat.dtype != 'float64') or (len(mat.shape) != 2):
|
||||||
|
return np.dot(mat, mat.T)
|
||||||
|
nn = mat.shape[0]
|
||||||
|
if out is None:
|
||||||
|
out = np.zeros((nn,nn))
|
||||||
|
else:
|
||||||
|
assert(out.dtype == 'float64')
|
||||||
|
assert(out.shape == (nn,nn))
|
||||||
|
# FIXME: should allow non-contiguous out, and copy output into it:
|
||||||
|
assert(8 in out.strides)
|
||||||
|
# zeroing needed because of dumb way I copy across triangular answer
|
||||||
|
out[:] = 0.0
|
||||||
|
|
||||||
|
## Call to DSYRK from BLAS
|
||||||
|
# If already in Fortran order (rare), and has the right sorts of strides I
|
||||||
|
# could avoid the copy. I also thought swapping to cblas API would allow use
|
||||||
|
# of C order. However, I tried that and had errors with large matrices:
|
||||||
|
# http://homepages.inf.ed.ac.uk/imurray2/code/tdot/tdot_broken.py
|
||||||
|
mat = np.asfortranarray(mat)
|
||||||
|
TRANS = c_char('n')
|
||||||
|
N = c_int(mat.shape[0])
|
||||||
|
K = c_int(mat.shape[1])
|
||||||
|
LDA = c_int(mat.shape[0])
|
||||||
|
UPLO = c_char('l')
|
||||||
|
ALPHA = c_double(1.0)
|
||||||
|
A = mat.ctypes.data_as(ctypes.c_void_p)
|
||||||
|
BETA = c_double(0.0)
|
||||||
|
C = out.ctypes.data_as(ctypes.c_void_p)
|
||||||
|
LDC = c_int(np.max(out.strides) / 8)
|
||||||
|
_blaslib.dsyrk_(byref(UPLO), byref(TRANS), byref(N), byref(K),
|
||||||
|
byref(ALPHA), A, byref(LDA), byref(BETA), C, byref(LDC))
|
||||||
|
|
||||||
|
symmetrify(out,upper=True)
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
def tdot(*args, **kwargs):
|
||||||
|
if _blas_available:
|
||||||
|
return tdot_blas(*args,**kwargs)
|
||||||
|
else:
|
||||||
|
return tdot_numpy(*args,**kwargs)
|
||||||
|
|
||||||
|
def symmetrify(A,upper=False):
|
||||||
|
"""
|
||||||
|
Take the square matrix A and make it symmetrical by copting elements from the lower half to the upper
|
||||||
|
|
||||||
|
works IN PLACE.
|
||||||
|
"""
|
||||||
|
N,M = A.shape
|
||||||
|
assert N==M
|
||||||
|
c_contig_code = """
|
||||||
|
for (int i=1; i<N; i++){
|
||||||
|
for (int j=0; j<i; j++){
|
||||||
|
A[i+j*N] = A[i*N+j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
f_contig_code = """
|
||||||
|
for (int i=1; i<N; i++){
|
||||||
|
for (int j=0; j<i; j++){
|
||||||
|
A[i*N+j] = A[i+j*N];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
if A.flags['C_CONTIGUOUS'] and upper:
|
||||||
|
weave.inline(f_contig_code,['A','N'])
|
||||||
|
elif A.flags['C_CONTIGUOUS'] and not upper:
|
||||||
|
weave.inline(c_contig_code,['A','N'])
|
||||||
|
elif A.flags['F_CONTIGUOUS'] and upper:
|
||||||
|
weave.inline(c_contig_code,['A','N'])
|
||||||
|
elif A.flags['F_CONTIGUOUS'] and not upper:
|
||||||
|
weave.inline(f_contig_code,['A','N'])
|
||||||
|
else:
|
||||||
|
tmp = np.tril(A)
|
||||||
|
A[:] = 0.0
|
||||||
|
A += tmp
|
||||||
|
A += np.tril(tmp,-1).T
|
||||||
|
|
||||||
|
def symmetrify_murray(A):
|
||||||
|
A += A.T
|
||||||
|
nn = A.shape[0]
|
||||||
|
A[[range(nn),range(nn)]] /= 2.0
|
||||||
|
|
||||||
|
def cholupdate(L,x):
|
||||||
|
"""
|
||||||
|
update the LOWER cholesky factor of a pd matrix IN PLACE
|
||||||
|
|
||||||
|
if L is the lower chol. of K, then this function computes L_
|
||||||
|
where L_ is the lower chol of K + x*x^T
|
||||||
|
"""
|
||||||
|
support_code = """
|
||||||
|
#include <math.h>
|
||||||
|
"""
|
||||||
|
code="""
|
||||||
|
double r,c,s;
|
||||||
|
int j,i;
|
||||||
|
for(j=0; j<N; j++){
|
||||||
|
r = sqrt(L(j,j)*L(j,j) + x(j)*x(j));
|
||||||
|
c = r / L(j,j);
|
||||||
|
s = x(j) / L(j,j);
|
||||||
|
L(j,j) = r;
|
||||||
|
for (i=j+1; i<N; i++){
|
||||||
|
L(i,j) = (L(i,j) + s*x(i))/c;
|
||||||
|
x(i) = c*x(i) - s*L(i,j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"""
|
||||||
|
x = x.copy()
|
||||||
|
N = x.size
|
||||||
|
weave.inline(code, support_code=support_code, arg_names=['N','L','x'], type_converters=weave.converters.blitz)
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,620 @@
|
||||||
import os
|
import os
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import math
|
||||||
|
|
||||||
|
class vertex:
|
||||||
|
def __init__(self, name, id, parents=[], children=[], meta = {}):
|
||||||
|
self.name = name
|
||||||
|
self.id = id
|
||||||
|
self.parents = parents
|
||||||
|
self.children = children
|
||||||
|
self.meta = meta
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name + '(' + str(self.id) + ').'
|
||||||
|
|
||||||
|
class tree:
|
||||||
|
def __init__(self):
|
||||||
|
self.vertices = []
|
||||||
|
self.vertices.append(vertex(name='root', id=0))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
index = self.find_root()
|
||||||
|
return self.branch_str(index)
|
||||||
|
|
||||||
|
def branch_str(self, index, indent=''):
|
||||||
|
out = indent + str(self.vertices[index]) + '\n'
|
||||||
|
for child in self.vertices[index].children:
|
||||||
|
out+=self.branch_str(child, indent+' ')
|
||||||
|
return out
|
||||||
|
|
||||||
|
def find_children(self):
|
||||||
|
"""Take a tree and set the children according to the parents.
|
||||||
|
|
||||||
|
Takes a tree structure which lists the parents of each vertex
|
||||||
|
and computes the children for each vertex and places them in."""
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
self.vertices[i].children = []
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
for parent in self.vertices[i].parents:
|
||||||
|
if i not in self.vertices[parent].children:
|
||||||
|
self.vertices[parent].children.append(i)
|
||||||
|
|
||||||
|
def find_parents(self):
|
||||||
|
"""Take a tree and set the parents according to the children
|
||||||
|
|
||||||
|
Takes a tree structure which lists the children of each vertex
|
||||||
|
and computes the parents for each vertex and places them in."""
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
self.vertices[i].parents = []
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
for child in self.vertices[i].children:
|
||||||
|
if i not in self.vertices[child].parents:
|
||||||
|
self.vertices[child].parents.append(i)
|
||||||
|
|
||||||
|
def find_root(self):
|
||||||
|
"""Finds the index of the root node of the tree."""
|
||||||
|
self.find_parents()
|
||||||
|
index = 0
|
||||||
|
while len(self.vertices[index].parents)>0:
|
||||||
|
index = self.vertices[index].parents[0]
|
||||||
|
return index
|
||||||
|
|
||||||
|
def get_index_by_id(self, id):
|
||||||
|
"""Give the index associated with a given vertex id."""
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
if self.vertices[i].id == id:
|
||||||
|
return i
|
||||||
|
raise Error, 'Reverse look up of id failed.'
|
||||||
|
|
||||||
|
def get_index_by_name(self, name):
|
||||||
|
"""Give the index associated with a given vertex name."""
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
if self.vertices[i].name == name:
|
||||||
|
return i
|
||||||
|
raise Error, 'Reverse look up of name failed.'
|
||||||
|
|
||||||
|
def order_vertices(self):
|
||||||
|
"""Order vertices in the graph such that parents always have a lower index than children."""
|
||||||
|
|
||||||
|
ordered = False
|
||||||
|
while ordered == False:
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
ordered = True
|
||||||
|
for parent in self.vertices[i].parents:
|
||||||
|
if parent>i:
|
||||||
|
ordered = False
|
||||||
|
self.swap_vertices(i, parent)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def swap_vertices(self, i, j):
|
||||||
|
"""Swap two vertices in the tree structure array.
|
||||||
|
swap_vertex swaps the location of two vertices in a tree structure array.
|
||||||
|
ARG tree : the tree for which two vertices are to be swapped.
|
||||||
|
ARG i : the index of the first vertex to be swapped.
|
||||||
|
ARG j : the index of the second vertex to be swapped.
|
||||||
|
RETURN tree : the tree structure with the two vertex locations
|
||||||
|
swapped.
|
||||||
|
"""
|
||||||
|
store_vertex_i = self.vertices[i]
|
||||||
|
store_vertex_j = self.vertices[j]
|
||||||
|
self.vertices[j] = store_vertex_i
|
||||||
|
self.vertices[i] = store_vertex_j
|
||||||
|
for k in range(len(self.vertices)):
|
||||||
|
for swap_list in [self.vertices[k].children, self.vertices[k].parents]:
|
||||||
|
if i in swap_list:
|
||||||
|
swap_list[swap_list.index(i)] = -1
|
||||||
|
if j in swap_list:
|
||||||
|
swap_list[swap_list.index(j)] = i
|
||||||
|
if -1 in swap_list:
|
||||||
|
swap_list[swap_list.index(-1)] = j
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def rotation_matrix(xangle, yangle, zangle, order='zxy', degrees=False):
|
||||||
|
|
||||||
|
"""Compute the rotation matrix for an angle in each direction.
|
||||||
|
This is a helper function for computing the rotation matrix for a given set of angles in a given order.
|
||||||
|
ARG xangle : rotation for x-axis.
|
||||||
|
ARG yangle : rotation for y-axis.
|
||||||
|
ARG zangle : rotation for z-axis.
|
||||||
|
ARG order : the order for the rotations."""
|
||||||
|
if degrees:
|
||||||
|
xangle = math.radians(xangle)
|
||||||
|
yangle = math.radians(yangle)
|
||||||
|
zangle = math.radians(zangle)
|
||||||
|
|
||||||
|
# Here we assume we rotate z, then x then y.
|
||||||
|
c1 = math.cos(xangle) # The x angle
|
||||||
|
c2 = math.cos(yangle) # The y angle
|
||||||
|
c3 = math.cos(zangle) # the z angle
|
||||||
|
s1 = math.sin(xangle)
|
||||||
|
s2 = math.sin(yangle)
|
||||||
|
s3 = math.sin(zangle)
|
||||||
|
|
||||||
|
# see http://en.wikipedia.org/wiki/Rotation_matrix for
|
||||||
|
# additional info.
|
||||||
|
|
||||||
|
if order=='zxy':
|
||||||
|
rot_mat = np.array([[c2*c3-s1*s2*s3, c2*s3+s1*s2*c3, -s2*c1],[-c1*s3, c1*c3, s1],[s2*c3+c2*s1*s3, s2*s3-c2*s1*c3, c2*c1]])
|
||||||
|
else:
|
||||||
|
rot_mat = np.eye(3)
|
||||||
|
for i in range(len(order)):
|
||||||
|
if order[i]=='x':
|
||||||
|
rot_mat = np.dot(np.array([[1, 0, 0], [0, c1, s1], [0, -s1, c1]]),rot_mat)
|
||||||
|
elif order[i] == 'y':
|
||||||
|
rot_mat = np.dot(np.array([[c2, 0, -s2], [0, 1, 0], [s2, 0, c2]]),rot_mat)
|
||||||
|
elif order[i] == 'z':
|
||||||
|
rot_mat = np.dot(np.array([[c3, s3, 0], [-s3, c3, 0], [0, 0, 1]]),rot_mat)
|
||||||
|
|
||||||
|
return rot_mat
|
||||||
|
|
||||||
|
|
||||||
|
# Motion capture data routines.
|
||||||
|
class skeleton(tree):
|
||||||
|
def __init__(self):
|
||||||
|
tree.__init__(self)
|
||||||
|
|
||||||
|
def connection_matrix(self):
|
||||||
|
connection = np.zeros((len(self.vertices), len(self.vertices)), dtype=bool)
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
for j in range(len(self.vertices[i].children)):
|
||||||
|
connection[i, self.vertices[i].children[j]] = True
|
||||||
|
return connection
|
||||||
|
|
||||||
|
def to_xyz(self, channels):
|
||||||
|
raise NotImplementedError, "this needs to be implemented to use the skeleton class"
|
||||||
|
|
||||||
|
|
||||||
|
def finalize(self):
|
||||||
|
"""After loading in a skeleton ensure parents are correct, vertex orders are correct and rotation matrices are correct."""
|
||||||
|
|
||||||
|
self.find_parents()
|
||||||
|
self.order_vertices()
|
||||||
|
self.set_rotation_matrices()
|
||||||
|
|
||||||
|
def smooth_angle_channels(self, channels):
|
||||||
|
"""Remove discontinuities in angle channels so that they don't cause artifacts in algorithms that rely on the smoothness of the functions."""
|
||||||
|
for vertex in self.vertices:
|
||||||
|
for col in vertex.meta['rot_ind']:
|
||||||
|
if col:
|
||||||
|
for k in range(1, channels.shape[0]):
|
||||||
|
diff=channels[k, col]-channels[k-1, col]
|
||||||
|
if abs(diff+360.)<abs(diff):
|
||||||
|
channels[k:, col]=channels[k:, col]+360.
|
||||||
|
elif abs(diff-360.)<abs(diff):
|
||||||
|
channels[k:, col]=channels[k:, col]-360.
|
||||||
|
|
||||||
|
# class bvh_skeleton(skeleton):
|
||||||
|
# def __init__(self):
|
||||||
|
# skeleton.__init__(self)
|
||||||
|
|
||||||
|
# def to_xyz(self, channels):
|
||||||
|
|
||||||
|
class acclaim_skeleton(skeleton):
|
||||||
|
def __init__(self, file_name=None):
|
||||||
|
skeleton.__init__(self)
|
||||||
|
self.documentation = []
|
||||||
|
self.angle = 'deg'
|
||||||
|
self.length = 1.0
|
||||||
|
self.mass = 1.0
|
||||||
|
self.type = 'acclaim'
|
||||||
|
self.vertices[0] = vertex(name='root', id=0,
|
||||||
|
parents = [0], children=[],
|
||||||
|
meta = {'orientation': [],
|
||||||
|
'axis': [0., 0., 0.],
|
||||||
|
'axis_order': [],
|
||||||
|
'C': np.eye(3),
|
||||||
|
'Cinv': np.eye(3),
|
||||||
|
'channels': [],
|
||||||
|
'bodymass': [],
|
||||||
|
'confmass': [],
|
||||||
|
'order': [],
|
||||||
|
'rot_ind': [],
|
||||||
|
'pos_ind': [],
|
||||||
|
'limits': [],
|
||||||
|
'xyz': np.array([0., 0., 0.]),
|
||||||
|
'rot': np.eye(3)})
|
||||||
|
|
||||||
|
if file_name:
|
||||||
|
self.load_skel(file_name)
|
||||||
|
|
||||||
|
def to_xyz(self, channels):
|
||||||
|
rot_val = list(self.vertices[0].meta['orientation'])
|
||||||
|
for i in range(len(self.vertices[0].meta['rot_ind'])):
|
||||||
|
rind = self.vertices[0].meta['rot_ind'][i]
|
||||||
|
if rind != -1:
|
||||||
|
rot_val[i] += channels[rind]
|
||||||
|
|
||||||
|
self.vertices[0].meta['rot'] = rotation_matrix(rot_val[0],
|
||||||
|
rot_val[1],
|
||||||
|
rot_val[2],
|
||||||
|
self.vertices[0].meta['axis_order'],
|
||||||
|
degrees=True)
|
||||||
|
# vertex based store of the xyz location
|
||||||
|
self.vertices[0].meta['xyz'] = list(self.vertices[0].meta['offset'])
|
||||||
|
|
||||||
|
for i in range(len(self.vertices[0].meta['pos_ind'])):
|
||||||
|
pind = self.vertices[0].meta['pos_ind'][i]
|
||||||
|
if pind != -1:
|
||||||
|
self.vertices[0].meta['xyz'][i] += channels[pind]
|
||||||
|
|
||||||
|
|
||||||
|
for i in range(len(self.vertices[0].children)):
|
||||||
|
ind = self.vertices[0].children[i]
|
||||||
|
self.get_child_xyz(ind, channels)
|
||||||
|
|
||||||
|
xyz = []
|
||||||
|
for vertex in self.vertices:
|
||||||
|
xyz.append(vertex.meta['xyz'])
|
||||||
|
return np.array(xyz)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_child_xyz(self, ind, channels):
|
||||||
|
|
||||||
|
parent = self.vertices[ind].parents[0]
|
||||||
|
children = self.vertices[ind].children
|
||||||
|
rot_val = np.zeros(3)
|
||||||
|
for j in range(len(self.vertices[ind].meta['rot_ind'])):
|
||||||
|
rind = self.vertices[ind].meta['rot_ind'][j]
|
||||||
|
if rind != -1:
|
||||||
|
rot_val[j] = channels[rind]
|
||||||
|
else:
|
||||||
|
rot_val[j] = 0
|
||||||
|
tdof = rotation_matrix(rot_val[0], rot_val[1], rot_val[2],
|
||||||
|
self.vertices[ind].meta['order'],
|
||||||
|
degrees=True)
|
||||||
|
|
||||||
|
torient = rotation_matrix(self.vertices[ind].meta['axis'][0],
|
||||||
|
self.vertices[ind].meta['axis'][1],
|
||||||
|
self.vertices[ind].meta['axis'][2],
|
||||||
|
self.vertices[ind].meta['axis_order'],
|
||||||
|
degrees=True)
|
||||||
|
|
||||||
|
torient_inv = rotation_matrix(-self.vertices[ind].meta['axis'][0],
|
||||||
|
-self.vertices[ind].meta['axis'][1],
|
||||||
|
-self.vertices[ind].meta['axis'][2],
|
||||||
|
self.vertices[ind].meta['axis_order'][::-1],
|
||||||
|
degrees=True)
|
||||||
|
|
||||||
|
self.vertices[ind].meta['rot'] = np.dot(np.dot(np.dot(torient_inv,tdof),torient),self.vertices[parent].meta['rot'])
|
||||||
|
|
||||||
|
|
||||||
|
self.vertices[ind].meta['xyz'] = self.vertices[parent].meta['xyz'] + np.dot(self.vertices[ind].meta['offset'],self.vertices[ind].meta['rot'])
|
||||||
|
|
||||||
|
for i in range(len(children)):
|
||||||
|
cind = children[i]
|
||||||
|
self.get_child_xyz(cind, channels)
|
||||||
|
|
||||||
|
|
||||||
|
def load_channels(self, file_name):
|
||||||
|
|
||||||
|
fid=open(file_name, 'r')
|
||||||
|
channels = self.read_channels(fid)
|
||||||
|
fid.close()
|
||||||
|
return channels
|
||||||
|
|
||||||
|
def load_skel(self, file_name):
|
||||||
|
|
||||||
|
"""Loads an ASF file into a skeleton structure.
|
||||||
|
loads skeleton structure from an acclaim skeleton file.
|
||||||
|
ARG file_name : the file name to load in.
|
||||||
|
RETURN skel : the skeleton for the file."""
|
||||||
|
|
||||||
|
fid = open(file_name, 'r')
|
||||||
|
self.read_skel(fid)
|
||||||
|
fid.close()
|
||||||
|
self.name = file_name
|
||||||
|
|
||||||
|
|
||||||
|
def read_bonedata(self, fid):
|
||||||
|
"""Read bone data from an acclaim skeleton file stream."""
|
||||||
|
|
||||||
|
bone_count = 0
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
while lin[0]!=':':
|
||||||
|
parts = lin.split()
|
||||||
|
if parts[0] == 'begin':
|
||||||
|
bone_count += 1
|
||||||
|
self.vertices.append(vertex(name = '', id=np.NaN,
|
||||||
|
meta={'name': [],
|
||||||
|
'id': [],
|
||||||
|
'offset': [],
|
||||||
|
'orientation': [],
|
||||||
|
'axis': [0., 0., 0.],
|
||||||
|
'axis_order': [],
|
||||||
|
'C': np.eye(3),
|
||||||
|
'Cinv': np.eye(3),
|
||||||
|
'channels': [],
|
||||||
|
'bodymass': [],
|
||||||
|
'confmass': [],
|
||||||
|
'order': [],
|
||||||
|
'rot_ind': [],
|
||||||
|
'pos_ind': [],
|
||||||
|
'limits': [],
|
||||||
|
'xyz': np.array([0., 0., 0.]),
|
||||||
|
'rot': np.eye(3)}))
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
|
||||||
|
elif parts[0]=='id':
|
||||||
|
self.vertices[bone_count].id = int(parts[1])
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
self.vertices[bone_count].children = []
|
||||||
|
|
||||||
|
elif parts[0]=='name':
|
||||||
|
self.vertices[bone_count].name = parts[1]
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
|
||||||
|
elif parts[0]=='direction':
|
||||||
|
direction = np.array([float(parts[1]), float(parts[2]), float(parts[3])])
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
|
||||||
|
elif parts[0]=='length':
|
||||||
|
lgth = float(parts[1])
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
|
||||||
|
elif parts[0]=='axis':
|
||||||
|
self.vertices[bone_count].meta['axis'] = np.array([float(parts[1]),
|
||||||
|
float(parts[2]),
|
||||||
|
float(parts[3])])
|
||||||
|
# order is reversed compared to bvh
|
||||||
|
self.vertices[bone_count].meta['axis_order'] = parts[-1][::-1].lower()
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
elif parts[0]=='dof':
|
||||||
|
order = []
|
||||||
|
for i in range(1, len(parts)):
|
||||||
|
if parts[i]== 'rx':
|
||||||
|
chan = 'Xrotation'
|
||||||
|
order.append('x')
|
||||||
|
elif parts[i] =='ry':
|
||||||
|
chan = 'Yrotation'
|
||||||
|
order.append('y')
|
||||||
|
elif parts[i] == 'rz':
|
||||||
|
chan = 'Zrotation'
|
||||||
|
order.append('z')
|
||||||
|
elif parts[i] == 'tx':
|
||||||
|
chan = 'Xposition'
|
||||||
|
elif parts[i] == 'ty':
|
||||||
|
chan = 'Yposition'
|
||||||
|
elif parts[i] == 'tz':
|
||||||
|
chan = 'Zposition'
|
||||||
|
elif parts[i] == 'l':
|
||||||
|
chan = 'length'
|
||||||
|
self.vertices[bone_count].meta['channels'].append(chan)
|
||||||
|
# order is reversed compared to bvh
|
||||||
|
self.vertices[bone_count].meta['order'] = order[::-1]
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
elif parts[0]=='limits':
|
||||||
|
self.vertices[bone_count].meta['limits'] = [[float(parts[1][1:]), float(parts[2][:-1])]]
|
||||||
|
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
while lin !='end':
|
||||||
|
parts = lin.split()
|
||||||
|
|
||||||
|
self.vertices[bone_count].meta['limits'].append([float(parts[0][1:]), float(parts[1][:-1])])
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
self.vertices[bone_count].meta['limits'] = np.array(self.vertices[bone_count].meta['limits'])
|
||||||
|
|
||||||
|
elif parts[0]=='end':
|
||||||
|
self.vertices[bone_count].meta['offset'] = direction*lgth
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
return lin
|
||||||
|
|
||||||
|
def read_channels(self, fid):
|
||||||
|
"""Read channels from an acclaim file."""
|
||||||
|
bones = [[] for i in self.vertices]
|
||||||
|
num_channels = 0
|
||||||
|
for vertex in self.vertices:
|
||||||
|
num_channels = num_channels + len(vertex.meta['channels'])
|
||||||
|
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
while lin != ':DEGREES':
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
counter = 0
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
while lin:
|
||||||
|
parts = lin.split()
|
||||||
|
if len(parts)==1:
|
||||||
|
frame_no = int(parts[0])
|
||||||
|
if frame_no:
|
||||||
|
counter += 1
|
||||||
|
if counter != frame_no:
|
||||||
|
raise Error, 'Unexpected frame number.'
|
||||||
|
else:
|
||||||
|
raise Error, 'Single bone name ...'
|
||||||
|
else:
|
||||||
|
ind = self.get_index_by_name(parts[0])
|
||||||
|
bones[ind].append(np.array([float(channel) for channel in parts[1:]]))
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
num_frames = counter
|
||||||
|
|
||||||
|
channels = np.zeros((num_frames, num_channels))
|
||||||
|
|
||||||
|
end_val = 0
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
vertex = self.vertices[i]
|
||||||
|
if len(vertex.meta['channels'])>0:
|
||||||
|
start_val = end_val
|
||||||
|
end_val = end_val + len(vertex.meta['channels'])
|
||||||
|
for j in range(num_frames):
|
||||||
|
channels[j, start_val:end_val] = bones[i][j]
|
||||||
|
self.resolve_indices(i, start_val)
|
||||||
|
|
||||||
|
self.smooth_angle_channels(channels)
|
||||||
|
return channels
|
||||||
|
|
||||||
|
|
||||||
|
def read_documentation(self, fid):
|
||||||
|
"""Read documentation from an acclaim skeleton file stream."""
|
||||||
|
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
while lin[0] != ':':
|
||||||
|
self.documentation.append(lin)
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
return lin
|
||||||
|
|
||||||
|
def read_hierarchy(self, fid):
|
||||||
|
"""Read hierarchy information from acclaim skeleton file stream."""
|
||||||
|
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
|
||||||
|
while lin != 'end':
|
||||||
|
parts = lin.split()
|
||||||
|
if lin != 'begin':
|
||||||
|
ind = self.get_index_by_name(parts[0])
|
||||||
|
for i in range(1, len(parts)):
|
||||||
|
self.vertices[ind].children.append(self.get_index_by_name(parts[i]))
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
return lin
|
||||||
|
|
||||||
|
def read_line(self, fid):
|
||||||
|
"""Read a line from a file string and check it isn't either empty or commented before returning."""
|
||||||
|
lin = '#'
|
||||||
|
while lin[0] == '#':
|
||||||
|
lin = fid.readline().strip()
|
||||||
|
if lin == '':
|
||||||
|
return lin
|
||||||
|
return lin
|
||||||
|
|
||||||
|
|
||||||
|
def read_root(self, fid):
|
||||||
|
"""Read the root node from an acclaim skeleton file stream."""
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
while lin[0] != ':':
|
||||||
|
parts = lin.split()
|
||||||
|
if parts[0]=='order':
|
||||||
|
order = []
|
||||||
|
for i in range(1, len(parts)):
|
||||||
|
if parts[i].lower()=='rx':
|
||||||
|
chan = 'Xrotation'
|
||||||
|
order.append('x')
|
||||||
|
elif parts[i].lower()=='ry':
|
||||||
|
chan = 'Yrotation'
|
||||||
|
order.append('y')
|
||||||
|
elif parts[i].lower()=='rz':
|
||||||
|
chan = 'Zrotation'
|
||||||
|
order.append('z')
|
||||||
|
elif parts[i].lower()=='tx':
|
||||||
|
chan = 'Xposition'
|
||||||
|
elif parts[i].lower()=='ty':
|
||||||
|
chan = 'Yposition'
|
||||||
|
elif parts[i].lower()=='tz':
|
||||||
|
chan = 'Zposition'
|
||||||
|
elif parts[i].lower()=='l':
|
||||||
|
chan = 'length'
|
||||||
|
self.vertices[0].meta['channels'].append(chan)
|
||||||
|
# order is reversed compared to bvh
|
||||||
|
self.vertices[0].meta['order'] = order[::-1]
|
||||||
|
|
||||||
|
elif parts[0]=='axis':
|
||||||
|
# order is reversed compared to bvh
|
||||||
|
self.vertices[0].meta['axis_order'] = parts[1][::-1].lower()
|
||||||
|
elif parts[0]=='position':
|
||||||
|
self.vertices[0].meta['offset'] = [float(parts[1]),
|
||||||
|
float(parts[2]),
|
||||||
|
float(parts[3])]
|
||||||
|
elif parts[0]=='orientation':
|
||||||
|
self.vertices[0].meta['orientation'] = [float(parts[1]),
|
||||||
|
float(parts[2]),
|
||||||
|
float(parts[3])]
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
return lin
|
||||||
|
|
||||||
|
def read_skel(self, fid):
|
||||||
|
"""Loads an acclaim skeleton format from a file stream."""
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
while lin:
|
||||||
|
if lin[0]==':':
|
||||||
|
if lin[1:]== 'name':
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
self.name = lin
|
||||||
|
elif lin[1:]=='units':
|
||||||
|
lin = self.read_units(fid)
|
||||||
|
elif lin[1:]=='documentation':
|
||||||
|
lin = self.read_documentation(fid)
|
||||||
|
elif lin[1:]=='root':
|
||||||
|
lin = self.read_root(fid)
|
||||||
|
elif lin[1:]=='bonedata':
|
||||||
|
lin = self.read_bonedata(fid)
|
||||||
|
elif lin[1:]=='hierarchy':
|
||||||
|
lin = self.read_hierarchy(fid)
|
||||||
|
elif lin[1:8]=='version':
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
if not lin:
|
||||||
|
self.finalize()
|
||||||
|
return
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
else:
|
||||||
|
raise Error, 'Unrecognised file format'
|
||||||
|
self.finalize()
|
||||||
|
|
||||||
|
def read_units(self, fid):
|
||||||
|
"""Read units from an acclaim skeleton file stream."""
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
while lin[0] != ':':
|
||||||
|
parts = lin.split()
|
||||||
|
if parts[0]=='mass':
|
||||||
|
self.mass = float(parts[1])
|
||||||
|
elif parts[0]=='length':
|
||||||
|
self.length = float(parts[1])
|
||||||
|
elif parts[0]=='angle':
|
||||||
|
self.angle = parts[1]
|
||||||
|
lin = self.read_line(fid)
|
||||||
|
return lin
|
||||||
|
|
||||||
|
def resolve_indices(self, index, start_val):
|
||||||
|
"""Get indices for the skeleton from the channels when loading in channel data."""
|
||||||
|
|
||||||
|
channels = self.vertices[index].meta['channels']
|
||||||
|
base_channel = start_val
|
||||||
|
rot_ind = -np.ones(3, dtype=int)
|
||||||
|
pos_ind = -np.ones(3, dtype=int)
|
||||||
|
for i in range(len(channels)):
|
||||||
|
if channels[i]== 'Xrotation':
|
||||||
|
rot_ind[0] = base_channel + i
|
||||||
|
elif channels[i]=='Yrotation':
|
||||||
|
rot_ind[1] = base_channel + i
|
||||||
|
elif channels[i]=='Zrotation':
|
||||||
|
rot_ind[2] = base_channel + i
|
||||||
|
elif channels[i]=='Xposition':
|
||||||
|
pos_ind[0] = base_channel + i
|
||||||
|
elif channels[i]=='Yposition':
|
||||||
|
pos_ind[1] = base_channel + i
|
||||||
|
elif channels[i]=='Zposition':
|
||||||
|
pos_ind[2] = base_channel + i
|
||||||
|
self.vertices[index].meta['rot_ind'] = list(rot_ind)
|
||||||
|
self.vertices[index].meta['pos_ind'] = list(pos_ind)
|
||||||
|
|
||||||
|
def set_rotation_matrices(self):
|
||||||
|
"""Set the meta information at each vertex to contain the correct matrices C and Cinv as prescribed by the rotations and rotation orders."""
|
||||||
|
for i in range(len(self.vertices)):
|
||||||
|
self.vertices[i].meta['C'] = rotation_matrix(self.vertices[i].meta['axis'][0],
|
||||||
|
self.vertices[i].meta['axis'][1],
|
||||||
|
self.vertices[i].meta['axis'][2],
|
||||||
|
self.vertices[i].meta['axis_order'],
|
||||||
|
degrees=True)
|
||||||
|
# Todo: invert this by applying angle operations in reverse order
|
||||||
|
self.vertices[i].meta['Cinv'] = np.linalg.inv(self.vertices[i].meta['C'])
|
||||||
|
|
||||||
|
|
||||||
|
# Utilities for loading in x,y,z data.
|
||||||
def load_text_data(dataset, directory, centre=True):
|
def load_text_data(dataset, directory, centre=True):
|
||||||
"""Load in a data set of marker points from the Ohio State University C3D motion capture files (http://accad.osu.edu/research/mocap/mocap_data.htm)."""
|
"""Load in a data set of marker points from the Ohio State University C3D motion capture files (http://accad.osu.edu/research/mocap/mocap_data.htm)."""
|
||||||
|
|
||||||
|
|
@ -72,3 +686,4 @@ def read_connections(file_name, point_names):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
skel = acclaim_skeleton()
|
||||||
|
|
|
||||||
|
|
@ -3,121 +3,7 @@ from mpl_toolkits.mplot3d import Axes3D
|
||||||
import GPy
|
import GPy
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import matplotlib as mpl
|
import matplotlib as mpl
|
||||||
|
import time
|
||||||
class lvm:
|
|
||||||
def __init__(self, model, data_visualize, latent_axes, latent_index=[0,1]):
|
|
||||||
if isinstance(latent_axes,mpl.axes.Axes):
|
|
||||||
self.cid = latent_axes.figure.canvas.mpl_connect('button_press_event', self.on_click)
|
|
||||||
self.cid = latent_axes.figure.canvas.mpl_connect('motion_notify_event', self.on_move)
|
|
||||||
self.cid = latent_axes.figure.canvas.mpl_connect('axes_leave_event', self.on_leave)
|
|
||||||
self.cid = latent_axes.figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
|
|
||||||
else:
|
|
||||||
self.cid = latent_axes[0].figure.canvas.mpl_connect('button_press_event', self.on_click)
|
|
||||||
self.cid = latent_axes[0].figure.canvas.mpl_connect('motion_notify_event', self.on_move)
|
|
||||||
self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_leave_event', self.on_leave)
|
|
||||||
self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
|
|
||||||
self.data_visualize = data_visualize
|
|
||||||
self.model = model
|
|
||||||
self.latent_axes = latent_axes
|
|
||||||
|
|
||||||
self.called = False
|
|
||||||
self.move_on = False
|
|
||||||
self.latent_index = latent_index
|
|
||||||
self.latent_dim = model.Q
|
|
||||||
|
|
||||||
def on_enter(self,event):
|
|
||||||
pass
|
|
||||||
def on_leave(self,event):
|
|
||||||
pass
|
|
||||||
|
|
||||||
def on_click(self, event):
|
|
||||||
#print 'click', event.xdata, event.ydata
|
|
||||||
if event.inaxes!=self.latent_axes: return
|
|
||||||
self.move_on = not self.move_on
|
|
||||||
# if self.called:
|
|
||||||
# self.xs.append(event.xdata)
|
|
||||||
# self.ys.append(event.ydata)
|
|
||||||
# self.line.set_data(self.xs, self.ys)
|
|
||||||
# self.line.figure.canvas.draw()
|
|
||||||
# else:
|
|
||||||
# self.xs = [event.xdata]
|
|
||||||
# self.ys = [event.ydata]
|
|
||||||
# self.line, = self.latent_axes.plot(event.xdata, event.ydata)
|
|
||||||
self.called = True
|
|
||||||
def on_move(self, event):
|
|
||||||
if event.inaxes!=self.latent_axes: return
|
|
||||||
if self.called and self.move_on:
|
|
||||||
# Call modify code on move
|
|
||||||
#print 'move', event.xdata, event.ydata
|
|
||||||
latent_values = np.zeros((1,self.latent_dim))
|
|
||||||
latent_values[0,self.latent_index] = np.array([event.xdata, event.ydata])
|
|
||||||
y = self.model.predict(latent_values)[0]
|
|
||||||
self.data_visualize.modify(y)
|
|
||||||
#print 'y', y
|
|
||||||
|
|
||||||
class lvm_subplots(lvm):
|
|
||||||
"""
|
|
||||||
latent_axes is a np array of dimension np.ceil(Q/2) + 1,
|
|
||||||
one for each pair of the axes, and the last one for the sensitiity histogram
|
|
||||||
"""
|
|
||||||
def __init__(self, model, data_visualize, latent_axes=None, latent_index=[0,1]):
|
|
||||||
self.nplots = int(np.ceil(model.Q/2.))+1
|
|
||||||
lvm.__init__(self,model,data_visualize,latent_axes,latent_index)
|
|
||||||
self.latent_values = np.zeros(2*np.ceil(self.model.Q/2.)) # possibly an extra dimension on this
|
|
||||||
assert latent_axes.size == self.nplots
|
|
||||||
|
|
||||||
|
|
||||||
class lvm_dimselect(lvm):
|
|
||||||
"""
|
|
||||||
A visualizer for latent variable models
|
|
||||||
with selection by clicking on the histogram
|
|
||||||
"""
|
|
||||||
def __init__(self, model, data_visualize):
|
|
||||||
self.fig,(latent_axes,self.hist_axes) = plt.subplots(1,2)
|
|
||||||
|
|
||||||
lvm.__init__(self,model,data_visualize,latent_axes,[0,1])
|
|
||||||
self.latent_values_clicked = np.zeros(model.Q)
|
|
||||||
self.clicked_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
|
|
||||||
print "use left and right mouse butons to select dimensions"
|
|
||||||
|
|
||||||
def on_click(self, event):
|
|
||||||
#print "click"
|
|
||||||
if event.inaxes==self.hist_axes:
|
|
||||||
self.hist_axes.cla()
|
|
||||||
self.hist_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b')
|
|
||||||
new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.Q-1))
|
|
||||||
self.latent_index[(0 if event.button==1 else 1)] = new_index
|
|
||||||
self.hist_axes.bar(np.array(self.latent_index),1./self.model.input_sensitivity()[self.latent_index],color='r')
|
|
||||||
self.latent_axes.cla()
|
|
||||||
self.model.plot_latent(which_indices = self.latent_index,ax=self.latent_axes)
|
|
||||||
self.clicked_handle = self.latent_axes.plot([self.latent_values_clicked[self.latent_index[0]]],self.latent_values_clicked[self.latent_index[1]],'rx',mew=2)[0]
|
|
||||||
if event.inaxes==self.latent_axes:
|
|
||||||
self.clicked_handle.set_visible(False)
|
|
||||||
self.latent_values_clicked[self.latent_index] = np.array([event.xdata,event.ydata])
|
|
||||||
self.clicked_handle = self.latent_axes.plot([self.latent_values_clicked[self.latent_index[0]]],self.latent_values_clicked[self.latent_index[1]],'rx',mew=2)[0]
|
|
||||||
self.fig.canvas.draw()
|
|
||||||
self.move_on=True
|
|
||||||
self.called = True
|
|
||||||
|
|
||||||
|
|
||||||
def on_move(self, event):
|
|
||||||
#print "move"
|
|
||||||
if event.inaxes!=self.latent_axes: return
|
|
||||||
if self.called and self.move_on:
|
|
||||||
latent_values = self.latent_values_clicked.copy()
|
|
||||||
latent_values[self.latent_index] = np.array([event.xdata, event.ydata])
|
|
||||||
y = self.model.predict(latent_values[None,:])[0]
|
|
||||||
self.data_visualize.modify(y)
|
|
||||||
|
|
||||||
def on_leave(self,event):
|
|
||||||
latent_values = self.latent_values_clicked.copy()
|
|
||||||
y = self.model.predict(latent_values[None,:])[0]
|
|
||||||
self.data_visualize.modify(y)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class data_show:
|
class data_show:
|
||||||
"""
|
"""
|
||||||
|
|
@ -155,6 +41,160 @@ class vector_show(data_show):
|
||||||
self.handle.set_data(xdata, self.vals)
|
self.handle.set_data(xdata, self.vals)
|
||||||
self.axes.figure.canvas.draw()
|
self.axes.figure.canvas.draw()
|
||||||
|
|
||||||
|
|
||||||
|
class lvm(data_show):
|
||||||
|
def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]):
|
||||||
|
"""Visualize a latent variable model
|
||||||
|
|
||||||
|
:param model: the latent variable model to visualize.
|
||||||
|
:param data_visualize: the object used to visualize the data which has been modelled.
|
||||||
|
:type data_visualize: visualize.data_show type.
|
||||||
|
:param latent_axes: the axes where the latent visualization should be plotted.
|
||||||
|
"""
|
||||||
|
if vals == None:
|
||||||
|
vals = model.X[0]
|
||||||
|
|
||||||
|
data_show.__init__(self, vals, axes=latent_axes)
|
||||||
|
|
||||||
|
if isinstance(latent_axes,mpl.axes.Axes):
|
||||||
|
self.cid = latent_axes.figure.canvas.mpl_connect('button_press_event', self.on_click)
|
||||||
|
self.cid = latent_axes.figure.canvas.mpl_connect('motion_notify_event', self.on_move)
|
||||||
|
self.cid = latent_axes.figure.canvas.mpl_connect('axes_leave_event', self.on_leave)
|
||||||
|
self.cid = latent_axes.figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
|
||||||
|
else:
|
||||||
|
self.cid = latent_axes[0].figure.canvas.mpl_connect('button_press_event', self.on_click)
|
||||||
|
self.cid = latent_axes[0].figure.canvas.mpl_connect('motion_notify_event', self.on_move)
|
||||||
|
self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_leave_event', self.on_leave)
|
||||||
|
self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
|
||||||
|
|
||||||
|
self.data_visualize = data_visualize
|
||||||
|
self.model = model
|
||||||
|
self.latent_axes = latent_axes
|
||||||
|
|
||||||
|
self.called = False
|
||||||
|
self.move_on = False
|
||||||
|
self.latent_index = latent_index
|
||||||
|
self.latent_dim = model.Q
|
||||||
|
|
||||||
|
# The red cross which shows current latent point.
|
||||||
|
self.latent_values = vals
|
||||||
|
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
|
||||||
|
self.modify(vals)
|
||||||
|
|
||||||
|
def modify(self, vals):
|
||||||
|
"""When latent values are modified update the latent representation and ulso update the output visualization."""
|
||||||
|
|
||||||
|
y = self.model.predict(vals)[0]
|
||||||
|
self.data_visualize.modify(y)
|
||||||
|
self.latent_handle.set_data(vals[self.latent_index[0]], vals[self.latent_index[1]])
|
||||||
|
self.axes.figure.canvas.draw()
|
||||||
|
|
||||||
|
|
||||||
|
def on_enter(self,event):
|
||||||
|
pass
|
||||||
|
def on_leave(self,event):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def on_click(self, event):
|
||||||
|
if event.inaxes!=self.latent_axes: return
|
||||||
|
self.move_on = not self.move_on
|
||||||
|
self.called = True
|
||||||
|
def on_move(self, event):
|
||||||
|
if event.inaxes!=self.latent_axes: return
|
||||||
|
if self.called and self.move_on:
|
||||||
|
# Call modify code on move
|
||||||
|
self.latent_values[self.latent_index[0]]=event.xdata
|
||||||
|
self.latent_values[self.latent_index[1]]=event.ydata
|
||||||
|
self.modify(self.latent_values)
|
||||||
|
|
||||||
|
class lvm_subplots(lvm):
|
||||||
|
"""
|
||||||
|
latent_axes is a np array of dimension np.ceil(Q/2) + 1,
|
||||||
|
one for each pair of the axes, and the last one for the sensitiity bar chart
|
||||||
|
"""
|
||||||
|
def __init__(self, vals, model, data_visualize, latent_axes=None, latent_index=[0,1]):
|
||||||
|
lvm.__init__(self, vals, model,data_visualize,latent_axes,[0,1])
|
||||||
|
self.nplots = int(np.ceil(model.Q/2.))+1
|
||||||
|
lvm.__init__(self,model,data_visualize,latent_axes,latent_index)
|
||||||
|
self.latent_values = np.zeros(2*np.ceil(self.model.Q/2.)) # possibly an extra dimension on this
|
||||||
|
assert latent_axes.size == self.nplots
|
||||||
|
|
||||||
|
|
||||||
|
class lvm_dimselect(lvm):
|
||||||
|
"""
|
||||||
|
A visualizer for latent variable models which allows selection of the latent dimensions to use by clicking on a bar chart of their length scales.
|
||||||
|
"""
|
||||||
|
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0, 1]):
|
||||||
|
if latent_axes==None and sense_axes==None:
|
||||||
|
self.fig,(latent_axes,self.sense_axes) = plt.subplots(1,2)
|
||||||
|
elif sense_axes==None:
|
||||||
|
fig=plt.figure()
|
||||||
|
self.sense_axes = fig.add_subplot(111)
|
||||||
|
else:
|
||||||
|
self.sense_axes = sense_axes
|
||||||
|
|
||||||
|
lvm.__init__(self,vals,model,data_visualize,latent_axes,latent_index)
|
||||||
|
self.show_sensitivities()
|
||||||
|
print "use left and right mouse butons to select dimensions"
|
||||||
|
|
||||||
|
def show_sensitivities(self):
|
||||||
|
# A click in the bar chart axis for selection a dimension.
|
||||||
|
self.sense_axes.cla()
|
||||||
|
self.sense_axes.bar(np.arange(self.model.Q),1./self.model.input_sensitivity(),color='b')
|
||||||
|
|
||||||
|
if self.latent_index[1] == self.latent_index[0]:
|
||||||
|
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='y')
|
||||||
|
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='y')
|
||||||
|
|
||||||
|
else:
|
||||||
|
self.sense_axes.bar(np.array(self.latent_index[0]),1./self.model.input_sensitivity()[self.latent_index[0]],color='g')
|
||||||
|
self.sense_axes.bar(np.array(self.latent_index[1]),1./self.model.input_sensitivity()[self.latent_index[1]],color='r')
|
||||||
|
|
||||||
|
self.sense_axes.figure.canvas.draw()
|
||||||
|
|
||||||
|
def on_click(self, event):
|
||||||
|
|
||||||
|
if event.inaxes==self.sense_axes:
|
||||||
|
new_index = max(0,min(int(np.round(event.xdata-0.5)),self.model.Q-1))
|
||||||
|
if event.button == 1:
|
||||||
|
# Make it red if and y-axis (red=port=left) if it is a left button click
|
||||||
|
self.latent_index[1] = new_index
|
||||||
|
else:
|
||||||
|
# Make it green and x-axis (green=starboard=right) if it is a right button click
|
||||||
|
self.latent_index[0] = new_index
|
||||||
|
|
||||||
|
self.show_sensitivities()
|
||||||
|
|
||||||
|
self.latent_axes.cla()
|
||||||
|
self.model.plot_latent(which_indices=self.latent_index,
|
||||||
|
ax=self.latent_axes)
|
||||||
|
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
|
||||||
|
self.modify(self.latent_values)
|
||||||
|
|
||||||
|
elif event.inaxes==self.latent_axes:
|
||||||
|
self.move_on = not self.move_on
|
||||||
|
|
||||||
|
self.called = True
|
||||||
|
|
||||||
|
|
||||||
|
def on_move(self, event):
|
||||||
|
if event.inaxes!=self.latent_axes: return
|
||||||
|
if self.called and self.move_on:
|
||||||
|
self.latent_values[self.latent_index[0]]=event.xdata
|
||||||
|
self.latent_values[self.latent_index[1]]=event.ydata
|
||||||
|
self.modify(self.latent_values)
|
||||||
|
|
||||||
|
def on_leave(self,event):
|
||||||
|
latent_values = self.latent_values.copy()
|
||||||
|
y = self.model.predict(latent_values[None,:])[0]
|
||||||
|
self.data_visualize.modify(y)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class image_show(data_show):
|
class image_show(data_show):
|
||||||
"""Show a data vector as an image."""
|
"""Show a data vector as an image."""
|
||||||
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False):
|
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, invert=False, scale=False):
|
||||||
|
|
@ -184,71 +224,151 @@ class image_show(data_show):
|
||||||
#if self.invert:
|
#if self.invert:
|
||||||
# self.vals = -self.vals
|
# self.vals = -self.vals
|
||||||
|
|
||||||
class stick_show(data_show):
|
|
||||||
"""Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
|
class mocap_data_show(data_show):
|
||||||
|
"""Base class for visualizing motion capture data."""
|
||||||
|
|
||||||
def __init__(self, vals, axes=None, connect=None):
|
def __init__(self, vals, axes=None, connect=None):
|
||||||
if axes==None:
|
if axes==None:
|
||||||
fig = plt.figure()
|
fig = plt.figure()
|
||||||
axes = fig.add_subplot(111, projection='3d')
|
axes = fig.add_subplot(111, projection='3d')
|
||||||
data_show.__init__(self, vals, axes)
|
data_show.__init__(self, vals, axes)
|
||||||
self.vals = vals.reshape((3, vals.shape[1]/3)).T
|
|
||||||
self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
|
|
||||||
self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
|
|
||||||
self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
|
|
||||||
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
|
|
||||||
self.axes.set_xlim(self.x_lim)
|
|
||||||
self.axes.set_ylim(self.y_lim)
|
|
||||||
self.axes.set_zlim(self.z_lim)
|
|
||||||
self.axes.set_aspect(1)
|
|
||||||
self.axes.autoscale(enable=False)
|
|
||||||
|
|
||||||
self.connect = connect
|
self.connect = connect
|
||||||
if not self.connect==None:
|
self.process_values(vals)
|
||||||
x = []
|
self.initialize_axes()
|
||||||
y = []
|
self.draw_vertices()
|
||||||
z = []
|
self.finalize_axes()
|
||||||
self.I, self.J = np.nonzero(self.connect)
|
self.draw_edges()
|
||||||
for i in range(len(self.I)):
|
|
||||||
x.append(self.vals[self.I[i], 0])
|
|
||||||
x.append(self.vals[self.J[i], 0])
|
|
||||||
x.append(np.NaN)
|
|
||||||
y.append(self.vals[self.I[i], 1])
|
|
||||||
y.append(self.vals[self.J[i], 1])
|
|
||||||
y.append(np.NaN)
|
|
||||||
z.append(self.vals[self.I[i], 2])
|
|
||||||
z.append(self.vals[self.J[i], 2])
|
|
||||||
z.append(np.NaN)
|
|
||||||
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
|
|
||||||
self.axes.figure.canvas.draw()
|
self.axes.figure.canvas.draw()
|
||||||
|
|
||||||
def modify(self, vals):
|
def draw_vertices(self):
|
||||||
self.points_handle.remove()
|
|
||||||
self.line_handle[0].remove()
|
|
||||||
self.vals = vals.reshape((3, vals.shape[1]/3)).T
|
|
||||||
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
|
self.points_handle = self.axes.scatter(self.vals[:, 0], self.vals[:, 1], self.vals[:, 2])
|
||||||
self.axes.set_xlim(self.x_lim)
|
|
||||||
self.axes.set_ylim(self.y_lim)
|
def draw_edges(self):
|
||||||
self.axes.set_zlim(self.z_lim)
|
|
||||||
self.line_handle = []
|
self.line_handle = []
|
||||||
if not self.connect==None:
|
if not self.connect==None:
|
||||||
x = []
|
x = []
|
||||||
y = []
|
y = []
|
||||||
z = []
|
z = []
|
||||||
self.I, self.J = np.nonzero(self.connect)
|
self.I, self.J = np.nonzero(self.connect)
|
||||||
for i in range(len(self.I)):
|
for i, j in zip(self.I, self.J):
|
||||||
x.append(self.vals[self.I[i], 0])
|
x.append(self.vals[i, 0])
|
||||||
x.append(self.vals[self.J[i], 0])
|
x.append(self.vals[j, 0])
|
||||||
x.append(np.NaN)
|
x.append(np.NaN)
|
||||||
y.append(self.vals[self.I[i], 1])
|
y.append(self.vals[i, 1])
|
||||||
y.append(self.vals[self.J[i], 1])
|
y.append(self.vals[j, 1])
|
||||||
y.append(np.NaN)
|
y.append(np.NaN)
|
||||||
z.append(self.vals[self.I[i], 2])
|
z.append(self.vals[i, 2])
|
||||||
z.append(self.vals[self.J[i], 2])
|
z.append(self.vals[j, 2])
|
||||||
z.append(np.NaN)
|
z.append(np.NaN)
|
||||||
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
|
self.line_handle = self.axes.plot(np.array(x), np.array(y), np.array(z), 'b-')
|
||||||
|
|
||||||
|
def modify(self, vals):
|
||||||
|
self.process_values(vals)
|
||||||
|
self.initialize_axes_modify()
|
||||||
|
self.draw_vertices()
|
||||||
|
self.finalize_axes_modify()
|
||||||
|
self.draw_edges()
|
||||||
self.axes.figure.canvas.draw()
|
self.axes.figure.canvas.draw()
|
||||||
|
|
||||||
|
def process_values(self, vals):
|
||||||
|
raise NotImplementedError, "this needs to be implemented to use the data_show class"
|
||||||
|
|
||||||
|
def initialize_axes(self):
|
||||||
|
"""Set up the axes with the right limits and scaling."""
|
||||||
|
self.x_lim = np.array([self.vals[:, 0].min(), self.vals[:, 0].max()])
|
||||||
|
self.y_lim = np.array([self.vals[:, 1].min(), self.vals[:, 1].max()])
|
||||||
|
self.z_lim = np.array([self.vals[:, 2].min(), self.vals[:, 2].max()])
|
||||||
|
|
||||||
|
def initialize_axes_modify(self):
|
||||||
|
self.points_handle.remove()
|
||||||
|
self.line_handle[0].remove()
|
||||||
|
|
||||||
|
def finalize_axes(self):
|
||||||
|
self.axes.set_xlim(self.x_lim)
|
||||||
|
self.axes.set_ylim(self.y_lim)
|
||||||
|
self.axes.set_zlim(self.z_lim)
|
||||||
|
self.axes.set_aspect(1)
|
||||||
|
self.axes.autoscale(enable=False)
|
||||||
|
|
||||||
|
def finalize_axes_modify(self):
|
||||||
|
self.axes.set_xlim(self.x_lim)
|
||||||
|
self.axes.set_ylim(self.y_lim)
|
||||||
|
self.axes.set_zlim(self.z_lim)
|
||||||
|
|
||||||
|
|
||||||
|
class stick_show(mocap_data_show):
|
||||||
|
"""Show a three dimensional point cloud as a figure. Connect elements of the figure together using the matrix connect."""
|
||||||
|
def __init__(self, vals, axes=None, connect=None):
|
||||||
|
mocap_data_show.__init__(self, vals, axes, connect)
|
||||||
|
|
||||||
|
def process_values(self, vals):
|
||||||
|
self.vals = vals.reshape((3, vals.shape[1]/3)).T
|
||||||
|
|
||||||
|
class skeleton_show(mocap_data_show):
|
||||||
|
"""data_show class for visualizing motion capture data encoded as a skeleton with angles."""
|
||||||
|
def __init__(self, vals, skel, padding=0, axes=None):
|
||||||
|
"""data_show class for visualizing motion capture data encoded as a skeleton with angles.
|
||||||
|
:param vals: set of modeled angles to use for printing in the axis when it's first created.
|
||||||
|
:type vals: np.array
|
||||||
|
:param skel: skeleton object that has the parameters of the motion capture skeleton associated with it.
|
||||||
|
:type skel: mocap.skeleton object
|
||||||
|
:param padding:
|
||||||
|
:type int
|
||||||
|
"""
|
||||||
|
self.skel = skel
|
||||||
|
self.padding = padding
|
||||||
|
connect = skel.connection_matrix()
|
||||||
|
mocap_data_show.__init__(self, vals, axes, connect)
|
||||||
|
|
||||||
|
def process_values(self, vals):
|
||||||
|
"""Takes a set of angles and converts them to the x,y,z coordinates in the internal prepresentation of the class, ready for plotting.
|
||||||
|
|
||||||
|
:param vals: the values that are being modelled."""
|
||||||
|
|
||||||
|
if self.padding>0:
|
||||||
|
channels = np.zeros((vals.shape[0], vals.shape[1]+self.padding))
|
||||||
|
channels[:, 0:vals.shape[0]] = vals
|
||||||
|
else:
|
||||||
|
channels = vals
|
||||||
|
vals_mat = self.skel.to_xyz(channels.flatten())
|
||||||
|
self.vals = vals_mat
|
||||||
|
# Flip the Y and Z axes
|
||||||
|
self.vals[:, 0] = vals_mat[:, 0]
|
||||||
|
self.vals[:, 1] = vals_mat[:, 2]
|
||||||
|
self.vals[:, 2] = vals_mat[:, 1]
|
||||||
|
|
||||||
|
def wrap_around(vals, lim, connect):
|
||||||
|
quot = lim[1] - lim[0]
|
||||||
|
vals = rem(vals, quot)+lim[0]
|
||||||
|
nVals = floor(vals/quot)
|
||||||
|
for i in range(connect.shape[0]):
|
||||||
|
for j in find(connect[i, :]):
|
||||||
|
if nVals[i] != nVals[j]:
|
||||||
|
connect[i, j] = False
|
||||||
|
return vals, connect
|
||||||
|
|
||||||
|
|
||||||
|
def data_play(Y, visualizer, frame_rate=30):
|
||||||
|
"""Play a data set using the data_show object given.
|
||||||
|
|
||||||
|
:Y: the data set to be visualized.
|
||||||
|
:param visualizer: the data show objectwhether to display during optimisation
|
||||||
|
:type visualizer: data_show
|
||||||
|
|
||||||
|
Example usage:
|
||||||
|
|
||||||
|
This example loads in the CMU mocap database (http://mocap.cs.cmu.edu) subject number 35 motion number 01. It then plays it using the mocap_show visualize object.
|
||||||
|
|
||||||
|
data = GPy.util.datasets.cmu_mocap(subject='35', train_motions=['01'])
|
||||||
|
Y = data['Y']
|
||||||
|
Y[:, 0:3] = 0. # Make figure walk in place
|
||||||
|
visualize = GPy.util.visualize.skeleton_show(Y[0, :], data['skel'])
|
||||||
|
GPy.util.visualize.data_play(Y, visualize)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
for y in Y:
|
||||||
|
visualizer.modify(y)
|
||||||
|
time.sleep(1./float(frame_rate))
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue