mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-21 14:05:14 +02:00
Revert "Minor edits to reading Lee data in datasets.py"
This reverts commit 730e229238.
This commit is contained in:
parent
1dfe7ed0a8
commit
0812a0e15c
57 changed files with 5 additions and 5969 deletions
|
|
@ -1,5 +0,0 @@
|
|||
Combination covariances, with a particular flag (partition?) allow interogation of the individual covariance contribution. So if we have k_3 = k_1 + k_2 because f_3 = f_1 + f_2 then we allow a multi output covariance over f_1, f_2 and f_3 jointly. Then users can make observations of f_3 and f_1 and build a posterior over f_2 using multiple output encoding. When this flag is set an extra input is added to deal with it. We allow for a new 'combination covariance' of the form scale and add so that
|
||||
|
||||
f_3 = f_1*a + f_2
|
||||
|
||||
Where a can also be negative.
|
||||
|
|
@ -1,52 +0,0 @@
|
|||
try:
|
||||
import sympy as sym
|
||||
sympy_available=True
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
|
||||
import numpy as np
|
||||
from GPy.util.symbolic import differfln
|
||||
from symbolic import Symbolic
|
||||
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
|
||||
|
||||
class Ode1_eq_lfm(Symbolic):
|
||||
"""
|
||||
A symbolic covariance based on a first order differential equation being driven by a latent force that is an exponentiated quadratic.
|
||||
|
||||
"""
|
||||
def __init__(self, output_dim=1, param=None, name='Ode1_eq_lfm'):
|
||||
|
||||
input_dim = 2
|
||||
x_0, z_0, decay_i, decay_j, lengthscale = sym.symbols('x_0, z_0, decay_i, decay_j, lengthscale', positive=True)
|
||||
scale_i, scale_j = sym.symbols('scale_i, scale_j')
|
||||
# note that covariance only valid for positive time.
|
||||
|
||||
class sim_h(Function):
|
||||
nargs = 5
|
||||
@classmethod
|
||||
def eval(cls, t, tprime, d_i, d_j, l):
|
||||
half_l_di = 0.5*l*d_i
|
||||
arg_1 = half_l_di + tprime/l
|
||||
arg_2 = half_l_di - (t-tprime)/l
|
||||
ln_part_1 = differfln(arg_1, arg_2)
|
||||
arg_1 = half_l_di
|
||||
arg_2 = half_l_di - t/l
|
||||
ln_part_2 = differfln(half_l_di, half_l_di - t/l)
|
||||
|
||||
|
||||
return (exp(half_l_di*half_l_di
|
||||
- d_i*(t-tprime)
|
||||
+ ln_part_1
|
||||
- log(d_i + d_j))
|
||||
- exp(half_l_di*half_l_di
|
||||
- d_i*t - d_j*tprime
|
||||
+ ln_part_2
|
||||
- log(d_i + d_j)))
|
||||
|
||||
|
||||
f = scale_i*scale_j*(sim_h(x_0, z_0, decay_i, decay_j, lengthscale)
|
||||
+ sim_h(z_0, x_0, decay_j, decay_i, lengthscale))
|
||||
# extra input dim is to signify the output dimension.
|
||||
super(Ode1_eq_lfm, self).__init__(input_dim, k=f, output_dim=output_dim, name=name)
|
||||
self.lengthscale.constrain_positive()
|
||||
self.decay.constrain_positive()
|
||||
|
|
@ -1,445 +0,0 @@
|
|||
# Check Matthew Rocklin's blog post.
|
||||
try:
|
||||
import sympy as sym
|
||||
sympy_available=True
|
||||
from sympy.utilities.lambdify import lambdify
|
||||
from GPy.util.symbolic import stabilise
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
|
||||
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln, differfln
|
||||
from ...core.parameterization import Param
|
||||
|
||||
class Symbolic(Kern):
|
||||
"""
|
||||
A kernel object, where all the hard work is done by sympy.
|
||||
|
||||
:param k: the covariance function
|
||||
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||
|
||||
To construct a new sympy kernel, you'll need to define:
|
||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||
- that's it! we'll extract the variables from the function k.
|
||||
|
||||
Note:
|
||||
- to handle multiple inputs, call them x_1, z_1, etc
|
||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||
"""
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None, func_modules=[]):
|
||||
|
||||
if k is None:
|
||||
raise ValueError, "You must provide an argument for the covariance function."
|
||||
|
||||
self.func_modules = func_modules
|
||||
self.func_modules += [{'gamma':gamma,
|
||||
'gammaln':gammaln,
|
||||
'erf':erf, 'erfc':erfc,
|
||||
'erfcx':erfcx,
|
||||
'polygamma':polygamma,
|
||||
'differfln':differfln,
|
||||
'normcdf':normcdf,
|
||||
'normcdfln':normcdfln,
|
||||
'logistic':logistic,
|
||||
'logisticln':logisticln},
|
||||
'numpy']
|
||||
|
||||
super(Symbolic, self).__init__(input_dim, active_dims, name)
|
||||
|
||||
self._sym_k = k
|
||||
|
||||
# pull the variable names out of the symbolic covariance function.
|
||||
sym_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||
self._sym_x= sorted([e for e in sym_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||
self._sym_z= sorted([e for e in sym_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
||||
|
||||
# Check that variable names make sense.
|
||||
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sym_x)])
|
||||
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sym_z)])
|
||||
assert len(self._sym_x)==len(self._sym_z)
|
||||
x_dim=len(self._sym_x)
|
||||
|
||||
self._sym_kdiag = k
|
||||
for x, z in zip(self._sym_x, self._sym_z):
|
||||
self._sym_kdiag = self._sym_kdiag.subs(z, x)
|
||||
|
||||
# If it is a multi-output covariance, add an input for indexing the outputs.
|
||||
self._real_input_dim = x_dim
|
||||
# Check input dim is number of xs + 1 if output_dim is >1
|
||||
assert self.input_dim == x_dim + int(output_dim > 1)
|
||||
self.output_dim = output_dim
|
||||
|
||||
# extract parameter names from the covariance
|
||||
thetas = sorted([e for e in sym_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||
|
||||
# Look for parameters with index (subscripts), they are associated with different outputs.
|
||||
if self.output_dim>1:
|
||||
self._sym_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||
self._sym_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
||||
|
||||
# Make sure parameter appears with both indices!
|
||||
assert len(self._sym_theta_i)==len(self._sym_theta_j)
|
||||
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j)])
|
||||
|
||||
# Extract names of shared parameters (those without a subscript)
|
||||
self._sym_theta = [theta for theta in thetas if theta not in self._sym_theta_i and theta not in self._sym_theta_j]
|
||||
|
||||
self.num_split_params = len(self._sym_theta_i)
|
||||
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sym_theta_i]
|
||||
# Add split parameters to the model.
|
||||
for theta in self._split_theta_names:
|
||||
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
|
||||
setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
|
||||
self.add_parameter(getattr(self, theta))
|
||||
|
||||
|
||||
self.num_shared_params = len(self._sym_theta)
|
||||
for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
|
||||
self._sym_kdiag = self._sym_kdiag.subs(theta_j, theta_i)
|
||||
|
||||
else:
|
||||
self.num_split_params = 0
|
||||
self._split_theta_names = []
|
||||
self._sym_theta = thetas
|
||||
self.num_shared_params = len(self._sym_theta)
|
||||
|
||||
# Add parameters to the model.
|
||||
for theta in self._sym_theta:
|
||||
val = 1.0
|
||||
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
|
||||
if param is not None:
|
||||
if param.has_key(theta.name):
|
||||
val = param[theta.name]
|
||||
setattr(self, theta.name, Param(theta.name, val, None))
|
||||
self.add_parameters(getattr(self, theta.name))
|
||||
|
||||
# Differentiate with respect to parameters.
|
||||
derivative_arguments = self._sym_x + self._sym_theta
|
||||
if self.output_dim > 1:
|
||||
derivative_arguments += self._sym_theta_i
|
||||
|
||||
self.derivatives = {theta.name : stabilise(sym.diff(self._sym_k,theta)) for theta in derivative_arguments}
|
||||
self.diag_derivatives = {theta.name : stabilise(sym.diff(self._sym_kdiag,theta)) for theta in derivative_arguments}
|
||||
|
||||
# This gives the parameters for the arg list.
|
||||
self.arg_list = self._sym_x + self._sym_z + self._sym_theta
|
||||
self.diag_arg_list = self._sym_x + self._sym_theta
|
||||
if self.output_dim > 1:
|
||||
self.arg_list += self._sym_theta_i + self._sym_theta_j
|
||||
self.diag_arg_list += self._sym_theta_i
|
||||
|
||||
# Check if there are additional linear operators on the covariance.
|
||||
self._sym_operators = operators
|
||||
# TODO: Deal with linear operators
|
||||
#if self._sym_operators:
|
||||
# for operator in self._sym_operators:
|
||||
|
||||
# psi_stats aren't yet implemented.
|
||||
if False:
|
||||
self.compute_psi_stats()
|
||||
|
||||
# generate the code for the covariance functions
|
||||
self._gen_code()
|
||||
|
||||
def __add__(self,other):
|
||||
return spkern(self._sym_k+other._sym_k)
|
||||
|
||||
def _gen_code(self):
|
||||
#fn_theano = theano_function([self.arg_lists], [self._sym_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
|
||||
self._K_function = lambdify(self.arg_list, self._sym_k, self.func_modules)
|
||||
self._K_derivatives_code = {key: lambdify(self.arg_list, self.derivatives[key], self.func_modules) for key in self.derivatives.keys()}
|
||||
self._Kdiag_function = lambdify(self.diag_arg_list, self._sym_kdiag, self.func_modules)
|
||||
self._Kdiag_derivatives_code = {key: lambdify(self.diag_arg_list, self.diag_derivatives[key], self.func_modules) for key in self.diag_derivatives.keys()}
|
||||
|
||||
def K(self,X,X2=None):
|
||||
self._K_computations(X, X2)
|
||||
return self._K_function(**self._arguments)
|
||||
|
||||
|
||||
def Kdiag(self,X):
|
||||
self._K_computations(X)
|
||||
return self._Kdiag_function(**self._diag_arguments)
|
||||
|
||||
def _param_grad_helper(self,partial,X,Z,target):
|
||||
pass
|
||||
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
self._K_computations(X, X2)
|
||||
gradients_X = np.zeros_like(X)
|
||||
for i, x in enumerate(self._sym_x):
|
||||
gf = self._K_derivatives_code[x.name]
|
||||
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
|
||||
if X2 is None:
|
||||
gradients_X *= 2
|
||||
return gradients_X
|
||||
|
||||
def gradients_X_diag(self, dL_dK, X):
|
||||
self._K_computations(X)
|
||||
dX = np.zeros_like(X)
|
||||
for i, x in enumerate(self._sym_x):
|
||||
gf = self._Kdiag_derivatives_code[x.name]
|
||||
dX[:, i] = gf(**self._diag_arguments)*dL_dK
|
||||
return dX
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
# Need to extract parameters to local variables first
|
||||
self._K_computations(X, X2)
|
||||
for theta in self._sym_theta:
|
||||
parameter = getattr(self, theta.name)
|
||||
gf = self._K_derivatives_code[theta.name]
|
||||
gradient = (gf(**self._arguments)*dL_dK).sum()
|
||||
if X2 is not None:
|
||||
gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
|
||||
setattr(parameter, 'gradient', gradient)
|
||||
if self.output_dim>1:
|
||||
for theta in self._sym_theta_i:
|
||||
parameter = getattr(self, theta.name[:-2])
|
||||
gf = self._K_derivatives_code[theta.name]
|
||||
A = gf(**self._arguments)*dL_dK
|
||||
gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
|
||||
for i in np.arange(self.output_dim)])
|
||||
if X2 is None:
|
||||
gradient *= 2
|
||||
else:
|
||||
A = gf(**self._reverse_arguments)*dL_dK.T
|
||||
gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
|
||||
for i in np.arange(self.output_dim)])
|
||||
setattr(parameter, 'gradient', gradient)
|
||||
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self._K_computations(X)
|
||||
for theta in self._sym_theta:
|
||||
parameter = getattr(self, theta.name)
|
||||
gf = self._Kdiag_derivatives_code[theta.name]
|
||||
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
|
||||
if self.output_dim>1:
|
||||
for theta in self._sym_theta_i:
|
||||
parameter = getattr(self, theta.name[:-2])
|
||||
gf = self._Kdiag_derivatives_code[theta.name]
|
||||
a = gf(**self._diag_arguments)*dL_dKdiag
|
||||
setattr(parameter, 'gradient',
|
||||
np.asarray([a[np.where(self._output_ind==i)].sum()
|
||||
for i in np.arange(self.output_dim)]))
|
||||
|
||||
def _K_computations(self, X, X2=None):
|
||||
"""Set up argument lists for the derivatives."""
|
||||
# Could check if this needs doing or not, there could
|
||||
# definitely be some computational savings by checking for
|
||||
# parameter updates here.
|
||||
self._arguments = {}
|
||||
self._diag_arguments = {}
|
||||
for i, x in enumerate(self._sym_x):
|
||||
self._arguments[x.name] = X[:, i][:, None]
|
||||
self._diag_arguments[x.name] = X[:, i][:, None]
|
||||
if self.output_dim > 1:
|
||||
self._output_ind = np.asarray(X[:, -1], dtype='int')
|
||||
for i, theta in enumerate(self._sym_theta_i):
|
||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
|
||||
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
||||
for theta in self._sym_theta:
|
||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
|
||||
self._diag_arguments[theta.name] = self._arguments[theta.name]
|
||||
|
||||
if X2 is not None:
|
||||
for i, z in enumerate(self._sym_z):
|
||||
self._arguments[z.name] = X2[:, i][None, :]
|
||||
if self.output_dim > 1:
|
||||
self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
|
||||
for i, theta in enumerate(self._sym_theta_j):
|
||||
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
|
||||
else:
|
||||
for z in self._sym_z:
|
||||
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
|
||||
if self.output_dim > 1:
|
||||
self._output_ind2 = self._output_ind
|
||||
for theta in self._sym_theta_j:
|
||||
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
|
||||
if X2 is not None:
|
||||
# These arguments are needed in gradients when X2 is not equal to X.
|
||||
self._reverse_arguments = self._arguments
|
||||
for x, z in zip(self._sym_x, self._sym_z):
|
||||
self._reverse_arguments[x.name] = self._arguments[z.name].T
|
||||
self._reverse_arguments[z.name] = self._arguments[x.name].T
|
||||
if self.output_dim > 1:
|
||||
for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
|
||||
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
|
||||
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
|
||||
|
||||
if False:
|
||||
class Symcombine(CombinationKernel):
|
||||
"""
|
||||
Combine list of given sympy covariances together with the provided operations.
|
||||
"""
|
||||
def __init__(self, subkerns, operations, name='sympy_combine'):
|
||||
super(Symcombine, self).__init__(subkerns, name)
|
||||
for subkern, operation in zip(subkerns, operations):
|
||||
self._sym_k += self._k_double_operate(subkern._sym_k, operation)
|
||||
|
||||
#def _double_operate(self, k, operation):
|
||||
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def K(self, X, X2=None, which_parts=None):
|
||||
"""
|
||||
Combine covariances with a linear operator.
|
||||
"""
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
# if only one part is given
|
||||
which_parts = [which_parts]
|
||||
return reduce(np.add, (p.K(X, X2) for p in which_parts))
|
||||
|
||||
@Cache_this(limit=2, force_kwargs=['which_parts'])
|
||||
def Kdiag(self, X, which_parts=None):
|
||||
assert X.shape[1] == self.input_dim
|
||||
if which_parts is None:
|
||||
which_parts = self.parts
|
||||
elif not isinstance(which_parts, (list, tuple)):
|
||||
# if only one part is given
|
||||
which_parts = [which_parts]
|
||||
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
|
||||
|
||||
def update_gradients_diag(self, dL_dK, X):
|
||||
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
"""Compute the gradient of the objective function with respect to X.
|
||||
|
||||
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
|
||||
:type dL_dK: np.ndarray (num_samples x num_inducing)
|
||||
:param X: Observed data inputs
|
||||
:type X: np.ndarray (num_samples x input_dim)
|
||||
:param X2: Observed data inputs (optional, defaults to X)
|
||||
:type X2: np.ndarray (num_inducing x input_dim)"""
|
||||
|
||||
target = np.zeros(X.shape)
|
||||
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def gradients_X_diag(self, dL_dKdiag, X):
|
||||
target = np.zeros(X.shape)
|
||||
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
|
||||
return target
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
|
||||
#return psi2
|
||||
# compute the "cross" terms
|
||||
from static import White, Bias
|
||||
from rbf import RBF
|
||||
#from rbf_inv import RBFInv
|
||||
from linear import Linear
|
||||
#ffrom fixed import Fixed
|
||||
|
||||
for p1, p2 in itertools.combinations(self.parts, 2):
|
||||
# i1, i2 = p1.active_dims, p2.active_dims
|
||||
# white doesn;t combine with anything
|
||||
if isinstance(p1, White) or isinstance(p2, White):
|
||||
pass
|
||||
# rbf X bias
|
||||
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
|
||||
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
|
||||
tmp = p2.psi1(Z, variational_posterior)
|
||||
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
|
||||
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
|
||||
tmp = p1.psi1(Z, variational_posterior)
|
||||
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
|
||||
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
|
||||
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
|
||||
tmp1 = p1.psi1(Z, variational_posterior)
|
||||
tmp2 = p2.psi1(Z, variational_posterior)
|
||||
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
|
||||
else:
|
||||
raise NotImplementedError, "psi2 cannot be computed for this kernel"
|
||||
return psi2
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self.parts:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target = np.zeros(Z.shape)
|
||||
for p1 in self.parts:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self.parts:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
return target
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
from static import White, Bias
|
||||
target_mu = np.zeros(variational_posterior.shape)
|
||||
target_S = np.zeros(variational_posterior.shape)
|
||||
for p1 in self._parameters_:
|
||||
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
|
||||
eff_dL_dpsi1 = dL_dpsi1.copy()
|
||||
for p2 in self._parameters_:
|
||||
if p2 is p1:
|
||||
continue
|
||||
if isinstance(p2, White):
|
||||
continue
|
||||
elif isinstance(p2, Bias):
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
|
||||
else:
|
||||
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
|
||||
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
|
||||
target_mu += a
|
||||
target_S += b
|
||||
return target_mu, target_S
|
||||
|
||||
def _getstate(self):
|
||||
"""
|
||||
Get the current state of the class,
|
||||
here just all the indices, rest can get recomputed
|
||||
"""
|
||||
return super(Add, self)._getstate()
|
||||
|
||||
def _setstate(self, state):
|
||||
super(Add, self)._setstate(state)
|
||||
|
||||
def add(self, other, name='sum'):
|
||||
if isinstance(other, Add):
|
||||
other_params = other._parameters_.copy()
|
||||
for p in other_params:
|
||||
other.remove_parameter(p)
|
||||
self.add_parameters(*other_params)
|
||||
else: self.add_parameter(other)
|
||||
return self
|
||||
|
|
@ -1,564 +0,0 @@
|
|||
# Check Matthew Rocklin's blog post.
|
||||
try:
|
||||
import sympy as sp
|
||||
sympy_available=True
|
||||
from sympy.utilities.autowrap import ufuncify
|
||||
except ImportError:
|
||||
sympy_available=False
|
||||
exit()
|
||||
|
||||
from sympy.core.cache import clear_cache
|
||||
from sympy.utilities.codegen import codegen
|
||||
|
||||
try:
|
||||
from scipy import weave
|
||||
weave_available = True
|
||||
except ImportError:
|
||||
weave_available = False
|
||||
|
||||
import os
|
||||
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
||||
import sys
|
||||
import numpy as np
|
||||
import re
|
||||
import tempfile
|
||||
import pdb
|
||||
import ast
|
||||
|
||||
from kernpart import Kernpart
|
||||
from ...core.parameterization import Param
|
||||
from ...core.parameterization.transformations import Logexp
|
||||
# TODO have this set up in a set up file!
|
||||
user_code_storage = tempfile.gettempdir()
|
||||
|
||||
class spkern(Kern):
|
||||
"""
|
||||
A kernel object, where all the hard work in done by sympy.
|
||||
|
||||
:param k: the covariance function
|
||||
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||
|
||||
To construct a new sympy kernel, you'll need to define:
|
||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||
- that's it! we'll extract the variables from the function k.
|
||||
|
||||
Note:
|
||||
- to handle multiple inputs, call them x_1, z_1, etc
|
||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||
"""
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
|
||||
|
||||
if name is None:
|
||||
name='sympykern'
|
||||
if k is None:
|
||||
raise ValueError, "You must provide an argument for the covariance function."
|
||||
super(spkern, self).__init__(input_dim, name)
|
||||
|
||||
self._sp_k = k
|
||||
|
||||
# pull the variable names out of the symbolic covariance function.
|
||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
||||
|
||||
# Check that variable names make sense.
|
||||
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
|
||||
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
|
||||
assert len(self._sp_x)==len(self._sp_z)
|
||||
x_dim=len(self._sp_x)
|
||||
|
||||
# If it is a multi-output covariance, add an input for indexing the outputs.
|
||||
self._real_input_dim = x_dim
|
||||
# Check input dim is number of xs + 1 if output_dim is >1
|
||||
assert self.input_dim == x_dim + int(output_dim > 1)
|
||||
self.output_dim = output_dim
|
||||
|
||||
# extract parameter names from the covariance
|
||||
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||
|
||||
|
||||
# Look for parameters with index (subscripts), they are associated with different outputs.
|
||||
if self.output_dim>1:
|
||||
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
||||
|
||||
# Make sure parameter appears with both indices!
|
||||
assert len(self._sp_theta_i)==len(self._sp_theta_j)
|
||||
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
|
||||
|
||||
# Extract names of shared parameters (those without a subscript)
|
||||
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
|
||||
|
||||
self.num_split_params = len(self._sp_theta_i)
|
||||
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
|
||||
for theta in self._split_theta_names:
|
||||
setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
|
||||
self.add_parameters(getattr(self, theta))
|
||||
|
||||
#setattr(self, theta, np.ones(self.output_dim))
|
||||
|
||||
self.num_shared_params = len(self._sp_theta)
|
||||
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
|
||||
|
||||
else:
|
||||
self.num_split_params = 0
|
||||
self._split_theta_names = []
|
||||
self._sp_theta = thetas
|
||||
self.num_shared_params = len(self._sp_theta)
|
||||
#self.num_params = self.num_shared_params
|
||||
|
||||
# Add parameters to the model.
|
||||
for theta in self._sp_theta:
|
||||
val = 1.0
|
||||
if param is not None:
|
||||
if param.has_key(theta):
|
||||
val = param[theta]
|
||||
#setattr(self, theta.name, val)
|
||||
setattr(self, theta.name, Param(theta.name, val, None))
|
||||
self.add_parameters(getattr(self, theta.name))
|
||||
#deal with param
|
||||
#self._set_params(self._get_params())
|
||||
|
||||
# Differentiate with respect to parameters.
|
||||
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
||||
if self.output_dim > 1:
|
||||
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
|
||||
|
||||
# differentiate with respect to input variables.
|
||||
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
||||
|
||||
# psi_stats aren't yet implemented.
|
||||
if False:
|
||||
self.compute_psi_stats()
|
||||
|
||||
self._code = {}
|
||||
|
||||
# generate the code for the covariance functions
|
||||
self._gen_code()
|
||||
|
||||
if weave_available:
|
||||
if False:
|
||||
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
|
||||
else:
|
||||
extra_compile_args = []
|
||||
|
||||
self.weave_kwargs = {
|
||||
'support_code': None, #self._function_code,
|
||||
'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
|
||||
'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
|
||||
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
|
||||
'extra_compile_args':extra_compile_args,
|
||||
'extra_link_args':['-lgomp'],
|
||||
'verbose':True}
|
||||
self.parameters_changed() # initializes caches
|
||||
|
||||
|
||||
def __add__(self,other):
|
||||
return spkern(self._sp_k+other._sp_k)
|
||||
|
||||
def _gen_code(self):
|
||||
|
||||
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
|
||||
code_list = [('k',self._sp_k)]
|
||||
# gradients with respect to covariance input
|
||||
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
|
||||
# gradient with respect to parameters
|
||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
|
||||
# gradient with respect to multiple output parameters
|
||||
if self.output_dim > 1:
|
||||
argument_sequence += self._sp_theta_i + self._sp_theta_j
|
||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
|
||||
# generate c functions from sympy objects
|
||||
if weave_available:
|
||||
code_type = "C"
|
||||
else:
|
||||
code_type = "PYTHON"
|
||||
# Need to add the sympy_helpers header in here.
|
||||
(foo_c,self._function_code), (foo_h,self._function_header) = \
|
||||
codegen(code_list,
|
||||
code_type,
|
||||
self.name,
|
||||
argument_sequence=argument_sequence)
|
||||
|
||||
|
||||
# Use weave to compute the underlying functions.
|
||||
if weave_available:
|
||||
# put the header file where we can find it
|
||||
f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
|
||||
f.write(self._function_header)
|
||||
f.close()
|
||||
|
||||
|
||||
if weave_available:
|
||||
# Substitute any known derivatives which sympy doesn't compute
|
||||
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
||||
# put the cpp file in user code storage (defaults to temp file location)
|
||||
f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
|
||||
else:
|
||||
# put the python file in user code storage
|
||||
f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
|
||||
f.write(self._function_code)
|
||||
f.close()
|
||||
|
||||
if weave_available:
|
||||
# arg_list will store the arguments required for the C code.
|
||||
input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
|
||||
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
|
||||
|
||||
# for multiple outputs reverse argument list is also required
|
||||
if self.output_dim>1:
|
||||
reverse_input_arg_list = list(input_arg_list)
|
||||
reverse_input_arg_list.reverse()
|
||||
|
||||
# This gives the parameters for the arg list.
|
||||
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
|
||||
arg_list = input_arg_list + param_arg_list
|
||||
|
||||
precompute_list=[]
|
||||
if self.output_dim > 1:
|
||||
reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
|
||||
# For multiple outputs, also need the split parameters.
|
||||
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
|
||||
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
|
||||
arg_list += split_param_arg_list
|
||||
reverse_arg_list += split_param_reverse_arg_list
|
||||
# Extract the right output indices from the inputs.
|
||||
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
|
||||
precompute_list += c_define_output_indices
|
||||
reverse_arg_string = ", ".join(reverse_arg_list)
|
||||
arg_string = ", ".join(arg_list)
|
||||
precompute_string = "\n".join(precompute_list)
|
||||
|
||||
# Now we use the arguments in code that computes the separate parts.
|
||||
|
||||
# Any precomputations will be done here eventually.
|
||||
self._precompute = \
|
||||
"""
|
||||
// Precompute code would go here. It will be called when parameters are updated.
|
||||
"""
|
||||
|
||||
# Here's the code to do the looping for K
|
||||
self._code['K'] =\
|
||||
"""
|
||||
// _K_code
|
||||
// Code for computing the covariance function.
|
||||
int i;
|
||||
int j;
|
||||
int n = target_array->dimensions[0];
|
||||
int num_inducing = target_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<n;i++){
|
||||
for (j=0;j<num_inducing;j++){
|
||||
%s
|
||||
//target[i*num_inducing+j] =
|
||||
TARGET2(i, j) += k(%s);
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
|
||||
# adding a string representation of the function in the
|
||||
# comment forces recompile when needed
|
||||
self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
|
||||
|
||||
|
||||
# Code to compute diagonal of covariance.
|
||||
diag_arg_string = re.sub('Z','X',arg_string)
|
||||
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
|
||||
diag_arg_string = re.sub('j','i',diag_arg_string)
|
||||
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
|
||||
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
|
||||
diag_precompute_string = re.sub('j','i',diag_precompute_string)
|
||||
# Code to do the looping for Kdiag
|
||||
self._code['Kdiag'] =\
|
||||
"""
|
||||
// _code['Kdiag']
|
||||
// Code for computing diagonal of covariance function.
|
||||
int i;
|
||||
int n = target_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for
|
||||
for (i=0;i<n;i++){
|
||||
%s
|
||||
//target[i] =
|
||||
TARGET1(i)=k(%s);
|
||||
}
|
||||
%s
|
||||
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
# Code to compute gradients
|
||||
if self.output_dim>1:
|
||||
for i, theta in enumerate(self._sp_theta_i):
|
||||
grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
|
||||
grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
|
||||
grad_func_list = c_define_output_indices+grad_func_list
|
||||
|
||||
grad_func_string = '\n'.join(grad_func_list)
|
||||
self._code['dK_d' + theta.name] =\
|
||||
"""
|
||||
int i;
|
||||
int j;
|
||||
int n = partial_array->dimensions[0];
|
||||
int num_inducing = partial_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<n;i++){
|
||||
for (j=0;j<num_inducing;j++){
|
||||
%s
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
||||
self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
|
||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
||||
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
||||
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
||||
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
||||
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
|
||||
self._code['dKdiag_d' + theta.name] =\
|
||||
"""
|
||||
// _dKdiag_dtheta_code
|
||||
// Code for computing gradient of diagonal with respect to parameters.
|
||||
int i;
|
||||
int n = partial_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (i=0;i<n;i++){
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
for i, theta in enumerate(self._sp_theta):
|
||||
grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
|
||||
grad_func_string = '\n'.join(grad_func_list)
|
||||
|
||||
self._code['dK_d' + theta.name] =\
|
||||
"""
|
||||
// _dK_dtheta_code
|
||||
// Code for computing gradient of covariance with respect to parameters.
|
||||
int i;
|
||||
int j;
|
||||
int n = partial_array->dimensions[0];
|
||||
int num_inducing = partial_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<n;i++){
|
||||
for (j=0;j<num_inducing;j++){
|
||||
%s
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
||||
self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
|
||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
||||
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
||||
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
||||
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
||||
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
|
||||
self._code['dKdiag_d' + theta.name] =\
|
||||
"""
|
||||
// _dKdiag_dtheta_code
|
||||
// Code for computing gradient of diagonal with respect to parameters.
|
||||
int i;
|
||||
int n = partial_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (i=0;i<n;i++){
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
|
||||
|
||||
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
|
||||
gradX_func_list = []
|
||||
if self.output_dim>1:
|
||||
gradX_func_list += c_define_output_indices
|
||||
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
|
||||
gradX_func_string = "\n".join(gradX_func_list)
|
||||
|
||||
self._code['dK_dX'] = \
|
||||
"""
|
||||
// _dK_dX_code
|
||||
// Code for computing gradient of covariance with respect to inputs.
|
||||
int i;
|
||||
int j;
|
||||
int n = partial_array->dimensions[0];
|
||||
int num_inducing = partial_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<n; i++){
|
||||
for (j=0; j<num_inducing; j++){
|
||||
%s
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
|
||||
|
||||
|
||||
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
|
||||
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
|
||||
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
|
||||
diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
|
||||
|
||||
# Code for gradients of Kdiag wrt X
|
||||
self._code['dKdiag_dX'] = \
|
||||
"""
|
||||
// _dKdiag_dX_code
|
||||
// Code for computing gradient of diagonal with respect to inputs.
|
||||
int n = partial_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (int i=0;i<n; i++){
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
|
||||
# string representation forces recompile when needed Get rid
|
||||
# of Zs in argument for diagonal. TODO: Why wasn't
|
||||
# diag_func_string called here? Need to check that.
|
||||
|
||||
|
||||
|
||||
|
||||
#TODO: insert multiple functions here via string manipulation
|
||||
#TODO: similar functions for psi_stats
|
||||
#TODO: similar functions when cython available.
|
||||
#TODO: similar functions when only python available.
|
||||
|
||||
def _get_arg_names(self, target=None, Z=None, partial=None):
|
||||
arg_names = ['X']
|
||||
if target is not None:
|
||||
arg_names += ['target']
|
||||
for shared_params in self._sp_theta:
|
||||
arg_names += [shared_params.name]
|
||||
if Z is not None:
|
||||
arg_names += ['Z']
|
||||
if partial is not None:
|
||||
arg_names += ['partial']
|
||||
if self.output_dim>1:
|
||||
arg_names += self._split_theta_names
|
||||
arg_names += ['output_dim']
|
||||
return arg_names
|
||||
|
||||
def _generate_inline(self, code, X, target=None, Z=None, partial=None):
|
||||
output_dim = self.output_dim
|
||||
# Need to extract parameters to local variables first
|
||||
for shared_params in self._sp_theta:
|
||||
locals()[shared_params.name] = getattr(self, shared_params.name)
|
||||
|
||||
for split_params in self._split_theta_names:
|
||||
locals()[split_params] = np.asarray(getattr(self, split_params))
|
||||
arg_names = self._get_arg_names(target, Z, partial)
|
||||
|
||||
if weave_available:
|
||||
return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
|
||||
else:
|
||||
raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
|
||||
|
||||
def K(self,X,Z,target):
|
||||
if Z is None:
|
||||
self._generate_inline(self._code['K_X'], X, target)
|
||||
else:
|
||||
self._generate_inline(self._code['K'], X, target, Z)
|
||||
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
self._generate_inline(self._code['Kdiag'], X, target)
|
||||
|
||||
def _param_grad_helper(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
|
||||
else:
|
||||
self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
|
||||
|
||||
def dKdiag_dtheta(self,partial,X,target):
|
||||
self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
|
||||
|
||||
def gradients_X(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial)
|
||||
else:
|
||||
self._generate_inline(self._code['dK_dX'], X, target, Z, partial)
|
||||
|
||||
def dKdiag_dX(self,partial,X,target):
|
||||
self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial)
|
||||
|
||||
def compute_psi_stats(self):
|
||||
#define some normal distributions
|
||||
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
|
||||
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
|
||||
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
||||
|
||||
#do some integration!
|
||||
#self._sp_psi0 = ??
|
||||
self._sp_psi1 = self._sp_k
|
||||
for i in range(self.input_dim):
|
||||
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
|
||||
sys.stdout.flush()
|
||||
self._sp_psi1 *= normals[i]
|
||||
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
|
||||
clear_cache()
|
||||
self._sp_psi1 = self._sp_psi1.simplify()
|
||||
|
||||
#and here's psi2 (eek!)
|
||||
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
|
||||
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
|
||||
for i in range(self.input_dim):
|
||||
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
|
||||
sys.stdout.flush()
|
||||
self._sp_psi2 *= normals[i]
|
||||
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
|
||||
clear_cache()
|
||||
self._sp_psi2 = self._sp_psi2.simplify()
|
||||
|
||||
def parameters_changed(self):
|
||||
# Reset the caches
|
||||
self._cache, self._cache2 = np.empty(shape=(2, 1))
|
||||
self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1))
|
||||
|
||||
def update_gradients_full(self, dL_dK, X):
|
||||
# Need to extract parameters to local variables first
|
||||
self._K_computations(X, None)
|
||||
for shared_params in self._sp_theta:
|
||||
parameter = getattr(self, shared_params.name)
|
||||
code = self._code['dK_d' + shared_params.name]
|
||||
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
|
||||
|
||||
for split_params in self._split_theta_names:
|
||||
parameter = getattr(self, split_params.name)
|
||||
code = self._code['dK_d' + split_params.name]
|
||||
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
|
||||
|
||||
|
||||
# def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
|
||||
# #contributions from Kdiag
|
||||
# self.variance.gradient = np.sum(dL_dKdiag)
|
||||
|
||||
# #from Knm
|
||||
# self._K_computations(X, Z)
|
||||
# self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
|
||||
# if self.ARD:
|
||||
# self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
|
||||
|
||||
# else:
|
||||
# self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
|
||||
|
||||
# #from Kmm
|
||||
# self._K_computations(Z, None)
|
||||
# self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
|
||||
# if self.ARD:
|
||||
# self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
|
||||
# else:
|
||||
# self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
|
||||
|
||||
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, Z):
|
||||
if Z is None:
|
||||
self._generate_inline(self._precompute, X)
|
||||
else:
|
||||
self._generate_inline(self._precompute, X, Z=Z)
|
||||
|
|
@ -1,134 +0,0 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
four_over_tau = 2./np.pi
|
||||
|
||||
class POLY(Kernpart):
|
||||
"""
|
||||
|
||||
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
|
||||
|
||||
.. math::
|
||||
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
|
||||
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
|
||||
(degree). Only gradients of the first three are provided for
|
||||
kernel optimisation, it is assumed that polynomial degree would
|
||||
be set by hand.
|
||||
|
||||
The kernel is not recommended as it is badly behaved when the
|
||||
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
|
||||
there is an automatic relevance determination version of this
|
||||
kernel provided (NOTE YET IMPLEMENTED!).
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
|
||||
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param degree: the degree of the polynomial.
|
||||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
self.num_params=3
|
||||
if weight_variance is not None:
|
||||
weight_variance = np.asarray(weight_variance)
|
||||
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
|
||||
else:
|
||||
weight_variance = 1.*np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 2
|
||||
if weight_variance is not None:
|
||||
weight_variance = np.asarray(weight_variance)
|
||||
assert weight_variance.size == self.input_dim, "bad number of weight variances"
|
||||
else:
|
||||
weight_variance = np.ones(self.input_dim)
|
||||
raise NotImplementedError
|
||||
self.degree=degree
|
||||
self.name='poly_deg' + str(self.degree)
|
||||
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variance = x[0]
|
||||
self.weight_variance = x[1:-1]
|
||||
self.weight_std = np.sqrt(self.weight_variance)
|
||||
self.bias_variance = x[-1]
|
||||
|
||||
def _get_param_names(self):
|
||||
if self.num_params == 3:
|
||||
return ['variance', 'weight_variance', 'bias_variance']
|
||||
else:
|
||||
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
self._K_computations(X, X2)
|
||||
target += self.variance*self._K_dvar
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
self._K_diag_computations(X)
|
||||
target+= self.variance*self._K_diag_dvar
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
|
||||
base_cov_grad = base*dL_dK
|
||||
|
||||
|
||||
|
||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||
target[1] += (self._K_inner_prod*base_cov_grad).sum()
|
||||
target[2] += base_cov_grad.sum()
|
||||
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_poly_arg
|
||||
if X2 is None:
|
||||
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
self._K_diag_computations(X)
|
||||
arg = self._K_diag_poly_arg
|
||||
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
|
||||
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if self.ARD:
|
||||
pass
|
||||
else:
|
||||
if X2 is None:
|
||||
self._K_inner_prod = np.dot(X,X.T)
|
||||
else:
|
||||
self._K_inner_prod = np.dot(X,X2.T)
|
||||
self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
|
||||
self._K_dvar = self._K_poly_arg**self.degree
|
||||
|
||||
def _K_diag_computations(self, X):
|
||||
if self.ARD:
|
||||
pass
|
||||
else:
|
||||
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
|
||||
self._K_diag_dvar = self._K_diag_poly_arg**self.degree
|
||||
|
|
@ -1,70 +0,0 @@
|
|||
# Check Matthew Rocklin's blog post.
|
||||
import sympy as sym
|
||||
import numpy as np
|
||||
from kern import Kern
|
||||
from ...core.symbolic import Symbolic_core
|
||||
|
||||
|
||||
class Symbolic(Kern, Symbolic_core):
|
||||
"""
|
||||
"""
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
|
||||
|
||||
if k is None:
|
||||
raise ValueError, "You must provide an argument for the covariance function."
|
||||
|
||||
Kern.__init__(self, input_dim, active_dims, name=name)
|
||||
kdiag = k
|
||||
self.cacheable = ['X', 'Z']
|
||||
Symbolic_core.__init__(self, {'k':k,'kdiag':kdiag}, cacheable=self.cacheable, derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)
|
||||
self.output_dim = output_dim
|
||||
|
||||
def __add__(self,other):
|
||||
return spkern(self._sym_k+other._sym_k)
|
||||
|
||||
def _set_expressions(self, expressions):
|
||||
"""This method is overwritten because we need to modify kdiag by substituting z for x. We do this by calling the parent expression method to extract variables from expressions, then subsitute the z variables that are present with x."""
|
||||
Symbolic_core._set_expressions(self, expressions)
|
||||
Symbolic_core._set_variables(self, self.cacheable)
|
||||
# Substitute z with x to obtain kdiag.
|
||||
for x, z in zip(self.variables['X'], self.variables['Z']):
|
||||
expressions['kdiag'] = expressions['kdiag'].subs(z, x)
|
||||
Symbolic_core._set_expressions(self, expressions)
|
||||
|
||||
|
||||
def K(self,X,X2=None):
|
||||
if X2 is None:
|
||||
return self.eval_function('k', X=X, Z=X)
|
||||
else:
|
||||
return self.eval_function('k', X=X, Z=X2)
|
||||
|
||||
|
||||
def Kdiag(self,X):
|
||||
d = self.eval_function('kdiag', X=X)
|
||||
if not d.shape[0] == X.shape[0]:
|
||||
d = np.tile(d, (X.shape[0], 1))
|
||||
return d
|
||||
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2=None):
|
||||
#if self._X is None or X.base is not self._X.base or X2 is not None:
|
||||
g = self.eval_gradients_X('k', dL_dK, X=X, Z=X2)
|
||||
if X2 is None:
|
||||
g *= 2
|
||||
return g
|
||||
|
||||
def gradients_X_diag(self, dL_dK, X):
|
||||
return self.eval_gradients_X('kdiag', dL_dK, X=X)
|
||||
|
||||
def update_gradients_full(self, dL_dK, X, X2=None):
|
||||
# Need to extract parameters to local variables first
|
||||
if X2 is None:
|
||||
# need to double this inside ...
|
||||
self.eval_update_gradients('k', dL_dK, X=X)
|
||||
else:
|
||||
self.eval_update_gradients('k', dL_dK, X=X, Z=X2)
|
||||
|
||||
|
||||
def update_gradients_diag(self, dL_dKdiag, X):
|
||||
self.eval_update_gradients('kdiag', dL_dKdiag, X)
|
||||
|
||||
|
|
@ -1,17 +0,0 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
# Construct covariance functions from matlab saves.
|
||||
import numpy as np
|
||||
from kern import kern
|
||||
import parts
|
||||
import scipy.io
|
||||
|
||||
def read_matlab(mat_data)
|
||||
mat_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'frey_rawface.mat'))
|
||||
if mat_data['type']=='cmpnd':
|
||||
# cmpnd kernel
|
||||
types = []
|
||||
for i in range(mat_data['comp'][0][0]):
|
||||
types.append(mat_data['comp'][0][0][i]
|
||||
|
||||
|
|
@ -1,318 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from scipy import weave
|
||||
from ...util.linalg import tdot
|
||||
from ...util.misc import fast_array_equal
|
||||
|
||||
class EQ(Kernpart_stationary):
|
||||
"""
|
||||
Exponentiated Quadratic covariance function als known as radial basis function, squared-exponential or Gaussian kernel.
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
||||
|
||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale of the kernel
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
.. Note: this object implements both the ARD and 'spherical' version of the function
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
|
||||
Kernpart_stationary.__init__(self, input_dim, lengthscale, ARD)
|
||||
self.name = 'rbf'
|
||||
self._set_params(np.hstack((variance, self.lengthscale.flatten())))
|
||||
|
||||
# a set of optional args to pass to weave
|
||||
self.weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
|
||||
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance, self.lengthscale))
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variance = x[0]
|
||||
Kernpart_stationary._set_params(self, x[1:])
|
||||
|
||||
def _get_param_names(self):
|
||||
if self.num_params == 2:
|
||||
return ['variance', 'lengthscale']
|
||||
else:
|
||||
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
|
||||
|
||||
def K(self, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target += self.variance * self._K_dvar
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def dK_dtheta(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target[0] += np.sum(self._K_dvar * dL_dK)
|
||||
if self.ARD:
|
||||
dvardLdK = self._K_dvar * dL_dK
|
||||
var_len3 = self.variance / np.power(self.lengthscale, 3)
|
||||
if X2 is None:
|
||||
# save computation for the symmetrical case
|
||||
dvardLdK = dvardLdK + dvardLdK.T
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<i; j++){
|
||||
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q+1) += var_len3(q)*tmp;
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
|
||||
weave.inline(code, arg_names=['num_data',
|
||||
'num_inducing',
|
||||
'input_dim',
|
||||
'X', 'X2',
|
||||
'target',
|
||||
'dvardLdK',
|
||||
'var_len3'],
|
||||
type_converters=weave.converters.blitz,
|
||||
**self.weave_options)
|
||||
else:
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<num_inducing; j++){
|
||||
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q+1) += var_len3(q)*tmp;
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
||||
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
|
||||
weave.inline(code, arg_names=['num_data',
|
||||
'num_inducing',
|
||||
'input_dim',
|
||||
'X', 'X2',
|
||||
'target',
|
||||
'dvardLdK',
|
||||
'var_len3'],
|
||||
type_converters=weave.converters.blitz,
|
||||
**self.weave_options)
|
||||
else:
|
||||
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
|
||||
|
||||
|
||||
def dK_dX(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, mu, S, target):
|
||||
target += self.variance
|
||||
|
||||
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
|
||||
target[0] += np.sum(dL_dpsi0)
|
||||
|
||||
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
|
||||
pass
|
||||
|
||||
def psi1(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi1
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
||||
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
|
||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||
if not self.ARD:
|
||||
target[1] += dpsi1_dlength.sum()
|
||||
else:
|
||||
target[1:] += dpsi1_dlength.sum(0).sum(0)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
denominator = (self.lengthscale2 * (self._psi1_denom))
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
|
||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
|
||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
def psi2(self, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
target += self._psi2
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||
"""Shape N,num_inducing,num_inducing,Ntheta"""
|
||||
self._psi_computations(Z, mu, S)
|
||||
d_var = 2.*self._psi2 / self.variance
|
||||
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
|
||||
|
||||
target[0] += np.sum(dL_dpsi2 * d_var)
|
||||
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
||||
if not self.ARD:
|
||||
target[1] += dpsi2_dlength.sum()
|
||||
else:
|
||||
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
"""Think N,num_inducing,num_inducing,input_dim """
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
|
||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
params = self._get_params()
|
||||
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2) and fast_array_equal(self._params , params)):
|
||||
self._X = X.copy()
|
||||
self._params = params.copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
X = X / self.lengthscale
|
||||
Xsquare = np.sum(np.square(X), 1)
|
||||
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
X = X / self.lengthscale
|
||||
X2 = X2 / self.lengthscale
|
||||
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
|
||||
self._K_dvar = np.exp(-0.5 * self._K_dist2)
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
if not fast_array_equal(Z, self._Z):
|
||||
# Z has changed, compute Z specific stuff
|
||||
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
|
||||
|
||||
if not fast_array_equal(Z, self._Z) or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
|
||||
# something's changed. recompute EVERYTHING
|
||||
|
||||
# psi1
|
||||
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
|
||||
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
|
||||
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
|
||||
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
|
||||
self._psi1 = self.variance * np.exp(self._psi1_exponent)
|
||||
|
||||
# psi2
|
||||
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
|
||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
|
||||
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
||||
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
||||
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
||||
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
|
||||
|
||||
# store matrices for caching
|
||||
self._Z, self._mu, self._S = Z, mu, S
|
||||
|
||||
def weave_psi2(self, mu, Zhat):
|
||||
N, input_dim = mu.shape
|
||||
num_inducing = Zhat.shape[0]
|
||||
|
||||
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
|
||||
psi2 = np.empty((N, num_inducing, num_inducing))
|
||||
|
||||
psi2_Zdist_sq = self._psi2_Zdist_sq
|
||||
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
||||
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
||||
variance_sq = float(np.square(self.variance))
|
||||
if self.ARD:
|
||||
lengthscale2 = self.lengthscale2
|
||||
else:
|
||||
lengthscale2 = np.ones(input_dim) * self.lengthscale2
|
||||
code = """
|
||||
double tmp;
|
||||
|
||||
#pragma omp parallel for private(tmp)
|
||||
for (int n=0; n<N; n++){
|
||||
for (int m=0; m<num_inducing; m++){
|
||||
for (int mm=0; mm<(m+1); mm++){
|
||||
for (int q=0; q<input_dim; q++){
|
||||
//compute mudist
|
||||
tmp = mu(n,q) - Zhat(m,mm,q);
|
||||
mudist(n,m,mm,q) = tmp;
|
||||
mudist(n,mm,m,q) = tmp;
|
||||
|
||||
//now mudist_sq
|
||||
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
|
||||
mudist_sq(n,m,mm,q) = tmp;
|
||||
mudist_sq(n,mm,m,q) = tmp;
|
||||
|
||||
//now psi2_exponent
|
||||
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
|
||||
psi2_exponent(n,mm,m) += tmp;
|
||||
if (m !=mm){
|
||||
psi2_exponent(n,m,mm) += tmp;
|
||||
}
|
||||
//psi2 would be computed like this, but np is faster
|
||||
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
|
||||
//psi2(n,m,mm) = tmp;
|
||||
//psi2(n,mm,m) = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
support_code = """
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||
type_converters=weave.converters.blitz, **self.weave_options)
|
||||
|
||||
return mudist, mudist_sq, psi2_exponent, psi2
|
||||
|
|
@ -1,428 +0,0 @@
|
|||
# Copyright (c) 2012, James Hensman and Ricardo Andrade
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from GPy.util.linalg import mdot, pdinv
|
||||
from GPy.util import ln_diff_erfs
|
||||
import pdb
|
||||
from scipy import weave
|
||||
|
||||
class Ode1(Kernpart):
|
||||
"""
|
||||
Covariance function for first order differential equation driven by an exponetiated quadratic covariance.
|
||||
|
||||
This kernel has the form
|
||||
.. math::
|
||||
|
||||
:param num_outputs: number of outputs driven by latent function.
|
||||
:type num_outputs: int
|
||||
|
||||
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self,num_outputs, W=None, rank=1, delay=None, kappa=None):
|
||||
self.rank = rank
|
||||
self.input_dim = 1
|
||||
self.name = 'ode1'
|
||||
self.num_outputs = num_outputs
|
||||
self.num_params = self.num_outputs*(1. + self.rank) + 1
|
||||
if kappa is not None:
|
||||
self.num_params+=num_outputs
|
||||
if delay is not None:
|
||||
self.num_params+=num_outputs
|
||||
self.rank = rank
|
||||
if W is None:
|
||||
self.W = 0.5*np.random.randn(self.num_outputs,self.rank)/np.sqrt(self.rank)
|
||||
else:
|
||||
assert W.shape==(self.num_outputs,self.rank)
|
||||
self.W = W
|
||||
|
||||
if kappa is not None:
|
||||
assert kappa.shape==(self.num_outputs,)
|
||||
self.kappa = kappa
|
||||
|
||||
if delay is not None:
|
||||
assert delay.shape==(self.num_outputs,)
|
||||
self.delay = delay
|
||||
self._set_params(self._get_params())
|
||||
|
||||
def _get_params(self):
|
||||
param_list = [self.W.flatten()]
|
||||
if self.kappa is not None:
|
||||
param_list.append(self.kappa)
|
||||
param_list.append(self.decay)
|
||||
if self.delay is not None:
|
||||
param_list.append(self.delay)
|
||||
param_list.append(self.length_scale)
|
||||
return np.hstack(param_list)
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.size == self.num_params
|
||||
end = self.num_outputs*self.rank
|
||||
self.W = x[:end].reshape(self.num_outputs,self.rank)
|
||||
start = end
|
||||
self.B = np.dot(self.W,self.W.T)
|
||||
if self.kappa is not None:
|
||||
end+=self.num_outputs
|
||||
self.kappa = x[start:end]
|
||||
self.B += np.diag(self.kappa)
|
||||
start=end
|
||||
end+=num_outputs
|
||||
self.decay = x[start:end]
|
||||
start=end
|
||||
if self.delay is not None:
|
||||
end+=num_outputs
|
||||
self.delay = x[start:end]
|
||||
start=end
|
||||
end+=1
|
||||
self.length_scale = x[start]
|
||||
self.sigma = np.sqrt(2)*self.length_scale
|
||||
|
||||
|
||||
def _get_param_names(self):
|
||||
param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.num_outputs)],[])
|
||||
if self.kappa is not None:
|
||||
param_names += ['kappa_%i'%i for i in range(self.num_outputs)]
|
||||
param_names += ['decay_%i'%i for i in range(self.num_outputs)]
|
||||
if self.delay is not None:
|
||||
param_names += ['delay_%i'%i for i in range(self.num_outputs)]
|
||||
param_names+= ['length_scale']
|
||||
return param_names
|
||||
|
||||
def K(self,X,X2,target):
|
||||
|
||||
if X.shape[1] > 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
|
||||
|
||||
self._K_computations()
|
||||
target += self._scales*self._dK_dvar
|
||||
|
||||
if self.gaussian_initial:
|
||||
# Add covariance associated with initial condition.
|
||||
t1_mat = self._t[self._rorder, None]
|
||||
t2_mat = self._t2[None, self._rorder2]
|
||||
target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
|
||||
|
||||
|
||||
|
||||
def Kdiag(self,index,target):
|
||||
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||
pass
|
||||
|
||||
def dK_dtheta(self,dL_dK,index,index2,target):
|
||||
pass
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||
pass
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
def _extract_t_indices(X, X2=None):
|
||||
"""Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
|
||||
|
||||
# TODO: some fast checking here to see if this needs recomputing?
|
||||
self._t = X[:, 0]
|
||||
if X.shape[1]==1:
|
||||
# No index passed, assume single output of ode model.
|
||||
self._index = np.ones_like(X[:, 1],dtype=np.int)
|
||||
self._index = np.asarray(X[:, 1],dtype=np.int)
|
||||
# Sort indices so that outputs are in blocks for computational
|
||||
# convenience.
|
||||
self._order = self._index.argsort()
|
||||
self._index = self._index[self._order]
|
||||
self._t = self._t[self._order]
|
||||
self._rorder = self._order.argsort() # rorder is for reversing the order
|
||||
|
||||
if X2 is None:
|
||||
self._t2 = None
|
||||
self._index2 = None
|
||||
self._rorder2 = self._rorder
|
||||
else:
|
||||
if X2.shape[1] > 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
|
||||
self._t2 = X2[:, 0]
|
||||
if X.shape[1]==1:
|
||||
# No index passed, assume single output of ode model.
|
||||
self._index2 = np.ones_like(X2[:, 1],dtype=np.int)
|
||||
self._index2 = np.asarray(X2[:, 1],dtype=np.int)
|
||||
self._order2 = self._index2.argsort()
|
||||
slef._index2 = self._index2[self._order2]
|
||||
self._t2 = self._t2[self._order2]
|
||||
self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
|
||||
|
||||
def _K_computations(X, X2):
|
||||
"""Perform main body of computations for the ode1 covariance function."""
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2)
|
||||
|
||||
self._K_compute_eq()
|
||||
self._K_compute_eq_x_ode()
|
||||
if X2 is None:
|
||||
self._K_ode_eq = self._K_ode_eq.T
|
||||
else:
|
||||
self._K_compute_eq_x_ode(transpose=True)
|
||||
self._K_compute_ode()
|
||||
|
||||
# Reorder values of blocks for placing back into _K_dvar.
|
||||
self._K_dvar[self._rorder, :] = np.vstack((
|
||||
np.hstack((self._K_eq, self._Keq_ode))
|
||||
np.hstack((self._K_ode_eq, self.K_ode))))[:, self._rorder2]
|
||||
|
||||
|
||||
def _K_compute_eq():
|
||||
"""Compute covariance for latent covariance."""
|
||||
t_eq = self._t[self._index==0]
|
||||
if t_eq.shape[0]==0:
|
||||
self._K_eq = np.zeros((0, 0))
|
||||
return
|
||||
|
||||
if self._t2 is None:
|
||||
self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
|
||||
else:
|
||||
t2_eq = self._t2[self._index2==0]
|
||||
if t2_eq.shape[0]==0:
|
||||
self._K_eq_eq = np.zeros((0, 0))
|
||||
return
|
||||
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
|
||||
|
||||
self._K_eq = np.exp(-self._dist2/(2*self.length_scale*self.length_scale))
|
||||
if self.is_normalise:
|
||||
self._K_eq/=(np.sqrt(2*np.pi)*self.length_scale)
|
||||
|
||||
def _K_compute_ode_eq(transpose=False):
|
||||
"""Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
|
||||
|
||||
:param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
|
||||
:type transpose: bool"""
|
||||
|
||||
if transpose:
|
||||
t_ode = self._t2[self._index>0]
|
||||
index_ode = self._index2[self._index>0]-1
|
||||
if t_ode.shape[0]==0:
|
||||
self._K_ode = np.zeros((0, 0))
|
||||
return
|
||||
|
||||
if self._t2 is not None:
|
||||
t2_ode = self._t2[self._index2>0]
|
||||
index2_ode = self._index2[self._index2>0]-1
|
||||
if t2_eq.shape[0]==0:
|
||||
self._K_ode = np.zeros((0, 0))
|
||||
return
|
||||
else:
|
||||
|
||||
# Matrix giving scales of each output
|
||||
self._scale = np.zeros((t_ode.shape[0], t_eq.shape[0]))
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
for(int j=0; j<N2; j++){
|
||||
scale_mat[i+j*N] = self.W[index_sim[i]+index_eq[j]*num_outputs];
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B = self._scale, self._B
|
||||
N, N2, num_outputs = index_ode.size, index_eq.size, self.num_outputs
|
||||
weave.inline(code,['index_ode', 'index_eq',
|
||||
'scale_mat', 'B',
|
||||
'N', 'N2', 'num_outputs'])
|
||||
else:
|
||||
self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
|
||||
code = """
|
||||
for(int i=0; i<N; i++){
|
||||
for(int j=0; j<N2; j++){
|
||||
scale_mat[i+j*N] = B[index_ode[i]+num_outputs*index2_ode[j]]
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B = self._scale, self._B
|
||||
N, N2, num_outputs = index_ode.size, index2.size, self.num_outputs
|
||||
weave.inline(code, ['index_ode', 'index2_ode',
|
||||
'scale_mat', 'B',
|
||||
'N', 'N2', 'num_outputs'])
|
||||
if transpose:
|
||||
t_ode = t2 - self.delay
|
||||
t_eq_mat = t1[None, :]
|
||||
else:
|
||||
t_ode = t1 - self.delay
|
||||
t_eq_mat = t2[None, :]
|
||||
t_ode_mat = t_ode[:, None]
|
||||
diff_t = (t_ode_mat - t_eq_mat)
|
||||
sigma = sqrt(2/self.inverseWidth)
|
||||
|
||||
invSigmaDiffT = 1/sigma*diff_t
|
||||
halfSigmaD_i = 0.5*sigma*self.decay
|
||||
|
||||
if self.isStationary == false
|
||||
[ln_part, signs] = ln_diff_erfs(halfSigmaD_i + t2Mat/sigma, halfSigmaD_i - invSigmaDiffT)
|
||||
else
|
||||
[ln_part, signs] = ln_diff_erfs(inf, halfSigmaD_i - invSigmaDiffT)
|
||||
end
|
||||
sK = signs .* exp(halfSigmaD_i*halfSigmaD_i - self.decay*diff_t + ln_part)
|
||||
|
||||
sK *= 0.5
|
||||
|
||||
if not self.is_normalised:
|
||||
sK *= sqrt(pi)*self.sigma
|
||||
|
||||
|
||||
if transpose:
|
||||
self._K_eq_ode =
|
||||
else:
|
||||
self._K_ode_eq = sK
|
||||
return K
|
||||
|
||||
def _K_compute_ode():
|
||||
# Compute covariances between outputs of the ODE models.
|
||||
|
||||
t_ode = self._t[self._index>0]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
if t_ode.shape[0]==0:
|
||||
self._K_ode = np.zeros((0, 0))
|
||||
return
|
||||
|
||||
if self._t2 is not None:
|
||||
t2_ode = self._t2[self._index2>0]
|
||||
index2_ode = self._index2[self._index2>0]-1
|
||||
if t2_eq.shape[0]==0:
|
||||
self._K_ode = np.zeros((0, 0))
|
||||
return
|
||||
|
||||
if self._index2 is None:
|
||||
# Matrix giving scales of each output
|
||||
self._scale = np.zeros((t_ode.shape[0], t_ode.shape[0]))
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
scale_mat[i+i*N] = B[index_ode[i]+num_outputs*(index_ode[i])];
|
||||
for(int j=0; j<i; j++){
|
||||
scale_mat[j+i*N] = B[index_ode[i]+num_outputs*index_ode[j]];
|
||||
scale_mat[i+j*N] = scale_mat[j+i*N];
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B = self._scale, self._B
|
||||
N, num_outputs = index_ode.size, self.num_outputs
|
||||
weave.inline(code,['index_ode',
|
||||
'scale_mat', 'B',
|
||||
'N', 'num_outputs'])
|
||||
else:
|
||||
self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
|
||||
code = """
|
||||
for(int i=0; i<N; i++){
|
||||
for(int j=0; j<N2; j++){
|
||||
scale_mat[i+j*N] = B[index_ode[i]+num_outputs*index2_ode[j]]
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B = self._scale, self._B
|
||||
N, N2, num_outputs = index_ode.size, index2.size, self.num_outputs
|
||||
weave.inline(code, ['index_ode', 'index2_ode',
|
||||
'scale_mat', 'B',
|
||||
'N', 'N2', 'num_outputs'])
|
||||
|
||||
# When index is identical
|
||||
if self.is_stationary:
|
||||
h = self._compute_H_stat(t_ode, index_ode, t2_ode, index2_ode)
|
||||
else:
|
||||
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode)
|
||||
|
||||
if t2 is None:
|
||||
self._K_ode = 0.5 * (h + h.T)
|
||||
else:
|
||||
if self.is_stationary:
|
||||
h2 = self._compute_H_stat(t2, index2, t, index)
|
||||
else:
|
||||
h2 = self._compute_H(t2, index2, t, index)
|
||||
self._K_ode += 0.5 * (h + h2.T)
|
||||
|
||||
if not self.is_normalized:
|
||||
self._K_ode *= np.sqrt(np.pi)*sigma
|
||||
|
||||
def _compute_H(t, index, t2, index2, update_derivatives=False):
|
||||
"""Helper function for computing part of the ode1 covariance function.
|
||||
|
||||
:param t: first time input.
|
||||
:type t: array
|
||||
:param index: Indices of first output.
|
||||
:type index: array of int
|
||||
:param t2: second time input.
|
||||
:type t2: array
|
||||
:param index2: Indices of second output.
|
||||
:type index2: array of int
|
||||
:param update_derivatives: whether to update derivatives (default is False)
|
||||
:return h : result of this subcomponent of the kernel for the given values.
|
||||
:rtype: ndarray
|
||||
"""
|
||||
|
||||
if t.shape[1] > 1 or t2.shape[1] > 1:
|
||||
raise error('Input can only have one column')
|
||||
|
||||
# Vector of decays and delays associated with each output.
|
||||
Decay = np.zeros(t.shape[0])
|
||||
Delay = np.zeros(t.shape[0])
|
||||
Decay2 = np.zeros(t2.shape[0])
|
||||
Delay2 = np.zeros(t2.shape[0])
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
Delay[i] = decays[index[i]];
|
||||
Decay[i] = delays[index[i]];
|
||||
}
|
||||
for(int i=0; i<N2; i++){
|
||||
Delay2[i] = decays[index2[i]];
|
||||
Decay2[i] = delays[index2[i]];
|
||||
}
|
||||
"""
|
||||
delays, decays = self.delays, self.decays
|
||||
N, N2 = index.size, index2.size
|
||||
weave.inline(code,['index_ode',
|
||||
'Delay', 'Decay',
|
||||
'Delay2', 'Decay2',
|
||||
'delays', 'decays',
|
||||
'N', 'N2'])
|
||||
|
||||
t_mat = t[:, None]-Delay[:, None]
|
||||
t2_mat = t2[None, :]-Delay2[None, :]
|
||||
diff_t = (t_mat - t2_mat)
|
||||
inv_sigma_diff_t = 1./self.sigma*diff_t
|
||||
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
|
||||
|
||||
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
|
||||
half_sigma_decay_i - inv_sigma_diff_t)
|
||||
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
|
||||
half_sigma_decay_i - t_mat/self.sigma)
|
||||
|
||||
h = sign1*np.exp(half_sigma_decay_i
|
||||
*half_sigma_decay_i
|
||||
-Decay[:, None]*diff_t+ln_part_1
|
||||
-np.log(Decay[:, None] + Decay2[None, :]))
|
||||
h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
|
||||
-Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
|
||||
-np.log(Decay[:, None] + Decay2[None, :]))
|
||||
|
||||
|
||||
# if update_derivatives:
|
||||
# sigma2 = self.sigma*self.sigma
|
||||
# # Update ith decay gradient
|
||||
# dh_ddecay += (0.5*self.decay[i]*sigma2*(self.decay[i] + decay[j])-1)*h
|
||||
# + (-diff_t*sign1*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*diff_t+ln_part_1)
|
||||
# +t_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*t_mat - decay[j]*t2_mat+ln_part_2)) ...
|
||||
# +self.sigma/sqrt(pi)*(-np.exp(-diff_t*diff_t/sigma2)
|
||||
# +np.exp(-t2_mat*t2_mat/sigma2-self.decay[i]*t_mat)
|
||||
# +np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) ...
|
||||
# -np.exp(-(self.decay[i]*t_mat + decay[j]*t2_mat)))
|
||||
# self._dh_ddecay[i] += real(dh_ddecay/(self.decay[i]+decay[j]))
|
||||
|
||||
# # Update jth decay gradient
|
||||
# dh_ddecay = t2_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-(self.decay[i]*t_mat + decay[j]*t2_mat)+ln_part_2)-h
|
||||
# self._dh_ddecay[j] += real(dh_ddecay/(self.decay[i] + decay[j]))
|
||||
|
||||
# # Update sigma gradient
|
||||
# self._dh_dsigma += 0.5*self.decay[i]*self.decay[i]*self.sigma*h + 2/(np.sqrt(np.pi)*(self.decay[i]+decay[j]))*((-diff_t/sigma2-self.decay[i]/
|
||||
# 2)*np.exp(-diff_t*
|
||||
# diff_t/sigma2)
|
||||
# + (-t2_mat/sigma2+self.decay[i]/2)
|
||||
# *np.exp(-t2_mat*t2_mat/sigma2
|
||||
# -self.decay[i]*t_mat)
|
||||
# - (-t_mat/sigma2-self.decay[i]/2)
|
||||
# *np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat)
|
||||
# - self.decay[i]/2*np.exp(-(self.decay[i]*t_mat+decay[j]*t2_mat)))
|
||||
|
||||
|
|
@ -1,461 +0,0 @@
|
|||
import numpy as np
|
||||
import sympy as sp
|
||||
from sympy.utilities.codegen import codegen
|
||||
from sympy.core.cache import clear_cache
|
||||
from scipy import weave
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
|
||||
import tempfile
|
||||
import pdb
|
||||
import ast
|
||||
from kernpart import Kernpart
|
||||
from ...util.config import config
|
||||
|
||||
class spkern(Kernpart):
|
||||
"""
|
||||
A kernel object, where all the hard work in done by sympy.
|
||||
|
||||
:param k: the covariance function
|
||||
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
|
||||
|
||||
To construct a new sympy kernel, you'll need to define:
|
||||
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
|
||||
- that's it! we'll extract the variables from the function k.
|
||||
|
||||
Note:
|
||||
- to handle multiple inputs, call them x_1, z_1, etc
|
||||
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
|
||||
"""
|
||||
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
|
||||
if name is None:
|
||||
self.name='sympykern'
|
||||
else:
|
||||
self.name = name
|
||||
if k is None:
|
||||
raise ValueError, "You must provide an argument for the covariance function."
|
||||
self._sp_k = k
|
||||
sp_vars = [e for e in k.atoms() if e.is_Symbol]
|
||||
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
|
||||
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
|
||||
# Check that variable names make sense.
|
||||
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
|
||||
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
|
||||
assert len(self._sp_x)==len(self._sp_z)
|
||||
self.input_dim = len(self._sp_x)
|
||||
self._real_input_dim = self.input_dim
|
||||
if output_dim > 1:
|
||||
self.input_dim += 1
|
||||
assert self.input_dim == input_dim
|
||||
self.output_dim = output_dim
|
||||
# extract parameter names
|
||||
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
|
||||
|
||||
|
||||
# Look for parameters with index.
|
||||
if self.output_dim>1:
|
||||
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
|
||||
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
|
||||
# Make sure parameter appears with both indices!
|
||||
assert len(self._sp_theta_i)==len(self._sp_theta_j)
|
||||
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
|
||||
|
||||
# Extract names of shared parameters
|
||||
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
|
||||
|
||||
self.num_split_params = len(self._sp_theta_i)
|
||||
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
|
||||
for theta in self._split_theta_names:
|
||||
setattr(self, theta, np.ones(self.output_dim))
|
||||
|
||||
self.num_shared_params = len(self._sp_theta)
|
||||
self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
|
||||
|
||||
else:
|
||||
self.num_split_params = 0
|
||||
self._split_theta_names = []
|
||||
self._sp_theta = thetas
|
||||
self.num_shared_params = len(self._sp_theta)
|
||||
self.num_params = self.num_shared_params
|
||||
|
||||
for theta in self._sp_theta:
|
||||
val = 1.0
|
||||
if param is not None:
|
||||
if param.has_key(theta):
|
||||
val = param[theta]
|
||||
setattr(self, theta.name, val)
|
||||
#deal with param
|
||||
self._set_params(self._get_params())
|
||||
|
||||
#Differentiate!
|
||||
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
|
||||
if self.output_dim > 1:
|
||||
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
|
||||
|
||||
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
|
||||
|
||||
if False:
|
||||
self.compute_psi_stats()
|
||||
|
||||
self._gen_code()
|
||||
|
||||
if False:
|
||||
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
|
||||
else:
|
||||
extra_compile_args = []
|
||||
|
||||
self.weave_kwargs = {
|
||||
'support_code':self._function_code,
|
||||
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
|
||||
'headers':['"sympy_helpers.h"'],
|
||||
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
|
||||
'extra_compile_args':extra_compile_args,
|
||||
'extra_link_args':[],
|
||||
'verbose':True}
|
||||
if config.getboolean('parallel', 'openmp'): self.weave_kwargs.append('-lgomp')
|
||||
|
||||
def __add__(self,other):
|
||||
return spkern(self._sp_k+other._sp_k)
|
||||
|
||||
def _gen_code(self):
|
||||
"""Generates the C functions necessary for computing the covariance function using the sympy objects as input."""
|
||||
#TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances??
|
||||
|
||||
#generate c functions from sympy objects
|
||||
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
|
||||
code_list = [('k',self._sp_k)]
|
||||
# gradients with respect to covariance input
|
||||
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
|
||||
# gradient with respect to parameters
|
||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
|
||||
# gradient with respect to multiple output parameters
|
||||
if self.output_dim > 1:
|
||||
argument_sequence += self._sp_theta_i + self._sp_theta_j
|
||||
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
|
||||
(foo_c,self._function_code), (foo_h,self._function_header) = \
|
||||
codegen(code_list, "C",'kernel_code',argument_sequence=argument_sequence)
|
||||
#put the header file where we can find it
|
||||
f = file(os.path.join(tempfile.gettempdir(),'kernel_code.h'),'w')
|
||||
f.write(self._function_header)
|
||||
f.close()
|
||||
|
||||
# Substitute any known derivatives which sympy doesn't compute
|
||||
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
|
||||
|
||||
|
||||
############################################################
|
||||
# This is the basic argument construction for the C code. #
|
||||
############################################################
|
||||
|
||||
arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
|
||||
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
|
||||
|
||||
# for multiple outputs need to also provide these arguments reversed.
|
||||
if self.output_dim>1:
|
||||
reverse_arg_list = list(arg_list)
|
||||
reverse_arg_list.reverse()
|
||||
|
||||
# Add in any 'shared' parameters to the list.
|
||||
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
|
||||
arg_list += param_arg_list
|
||||
|
||||
precompute_list=[]
|
||||
if self.output_dim > 1:
|
||||
reverse_arg_list+=list(param_arg_list)
|
||||
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
|
||||
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
|
||||
arg_list += split_param_arg_list
|
||||
reverse_arg_list += split_param_reverse_arg_list
|
||||
# Extract the right output indices from the inputs.
|
||||
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
|
||||
precompute_list += c_define_output_indices
|
||||
reverse_arg_string = ", ".join(reverse_arg_list)
|
||||
arg_string = ", ".join(arg_list)
|
||||
precompute_string = "\n".join(precompute_list)
|
||||
|
||||
# Code to compute argments string needed when only X is provided.
|
||||
X_arg_string = re.sub('Z','X',arg_string)
|
||||
# Code to compute argument string when only diagonal is required.
|
||||
diag_arg_string = re.sub('int jj','//int jj',X_arg_string)
|
||||
diag_arg_string = re.sub('j','i',diag_arg_string)
|
||||
if precompute_string == '':
|
||||
# if it's not multioutput, the precompute strings are set to zero
|
||||
diag_precompute_string = ''
|
||||
diag_precompute_replace = ''
|
||||
else:
|
||||
# for multioutput we need to extract the index of the output form the input.
|
||||
diag_precompute_string = precompute_list[0]
|
||||
diag_precompute_replace = precompute_list[1]
|
||||
|
||||
|
||||
# Here's the code to do the looping for K
|
||||
self._K_code =\
|
||||
"""
|
||||
// _K_code
|
||||
// Code for computing the covariance function.
|
||||
int i;
|
||||
int j;
|
||||
int N = target_array->dimensions[0];
|
||||
int num_inducing = target_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N;i++){
|
||||
for (j=0;j<num_inducing;j++){
|
||||
%s
|
||||
//target[i*num_inducing+j] =
|
||||
TARGET2(i, j) += k(%s);
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
self._K_code_X = """
|
||||
// _K_code_X
|
||||
// Code for computing the covariance function.
|
||||
int i;
|
||||
int j;
|
||||
int N = target_array->dimensions[0];
|
||||
int num_inducing = target_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N;i++){
|
||||
%s // int ii=(int)X2(i, 1);
|
||||
TARGET2(i, i) += k(%s);
|
||||
for (j=0;j<i;j++){
|
||||
%s //int jj=(int)X2(j, 1);
|
||||
double kval = k(%s); //double kval = k(X2(i, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii));
|
||||
TARGET2(i, j) += kval;
|
||||
TARGET2(j, i) += kval;
|
||||
}
|
||||
}
|
||||
/*%s*/
|
||||
"""%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', diag_precompute_replace), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
|
||||
|
||||
# Code to do the looping for Kdiag
|
||||
self._Kdiag_code =\
|
||||
"""
|
||||
// _Kdiag_code
|
||||
// Code for computing diagonal of covariance function.
|
||||
int i;
|
||||
int N = target_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for
|
||||
for (i=0;i<N;i++){
|
||||
%s
|
||||
//target[i] =
|
||||
TARGET1(i)=k(%s);
|
||||
}
|
||||
%s
|
||||
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
# Code to compute gradients
|
||||
grad_func_list = []
|
||||
if self.output_dim>1:
|
||||
grad_func_list += c_define_output_indices
|
||||
grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
|
||||
grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
|
||||
grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)])
|
||||
grad_func_string = '\n'.join(grad_func_list)
|
||||
|
||||
self._dK_dtheta_code =\
|
||||
"""
|
||||
// _dK_dtheta_code
|
||||
// Code for computing gradient of covariance with respect to parameters.
|
||||
int i;
|
||||
int j;
|
||||
int N = partial_array->dimensions[0];
|
||||
int num_inducing = partial_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N;i++){
|
||||
for (j=0;j<num_inducing;j++){
|
||||
%s
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
|
||||
|
||||
|
||||
# Code to compute gradients for Kdiag TODO: needs clean up
|
||||
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
|
||||
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
|
||||
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
|
||||
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL1(i)',diag_grad_func_string)
|
||||
self._dKdiag_dtheta_code =\
|
||||
"""
|
||||
// _dKdiag_dtheta_code
|
||||
// Code for computing gradient of diagonal with respect to parameters.
|
||||
int i;
|
||||
int N = partial_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (i=0;i<N;i++){
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
|
||||
gradX_func_list = []
|
||||
if self.output_dim>1:
|
||||
gradX_func_list += c_define_output_indices
|
||||
gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
|
||||
gradX_func_string = "\n".join(gradX_func_list)
|
||||
|
||||
self._dK_dX_code = \
|
||||
"""
|
||||
// _dK_dX_code
|
||||
// Code for computing gradient of covariance with respect to inputs.
|
||||
int i;
|
||||
int j;
|
||||
int N = partial_array->dimensions[0];
|
||||
int num_inducing = partial_array->dimensions[1];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
//#pragma omp parallel for private(j)
|
||||
for (i=0;i<N; i++){
|
||||
for (j=0; j<num_inducing; j++){
|
||||
%s
|
||||
}
|
||||
}
|
||||
%s
|
||||
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
|
||||
|
||||
|
||||
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
|
||||
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
|
||||
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
|
||||
diag_gradX_func_string = re.sub('PARTIAL2\(i, i\)','2*PARTIAL1(i)',diag_gradX_func_string)
|
||||
|
||||
# Code for gradients of Kdiag wrt X
|
||||
self._dKdiag_dX_code= \
|
||||
"""
|
||||
// _dKdiag_dX_code
|
||||
// Code for computing gradient of diagonal with respect to inputs.
|
||||
int N = partial_array->dimensions[0];
|
||||
int input_dim = X_array->dimensions[1];
|
||||
for (int i=0;i<N; i++){
|
||||
%s
|
||||
}
|
||||
%s
|
||||
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
|
||||
# string representation forces recompile when needed Get rid
|
||||
# of Zs in argument for diagonal. TODO: Why wasn't
|
||||
# diag_func_string called here? Need to check that.
|
||||
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
|
||||
|
||||
# Code to use when only X is provided.
|
||||
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
|
||||
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= PARTIAL2(', '+= 2*PARTIAL2(')
|
||||
self._dK_dtheta_code_X = self._dK_dtheta_code_X.replace('Z2(', 'X2(')
|
||||
self._dK_dX_code_X = self._dK_dX_code_X.replace('Z2(', 'X2(')
|
||||
|
||||
|
||||
#TODO: insert multiple functions here via string manipulation
|
||||
#TODO: similar functions for psi_stats
|
||||
def _get_arg_names(self, Z=None, partial=None):
|
||||
arg_names = ['target','X']
|
||||
for shared_params in self._sp_theta:
|
||||
arg_names += [shared_params.name]
|
||||
if Z is not None:
|
||||
arg_names += ['Z']
|
||||
if partial is not None:
|
||||
arg_names += ['partial']
|
||||
if self.output_dim>1:
|
||||
arg_names += self._split_theta_names
|
||||
arg_names += ['output_dim']
|
||||
return arg_names
|
||||
|
||||
def _weave_inline(self, code, X, target, Z=None, partial=None):
|
||||
output_dim = self.output_dim
|
||||
for shared_params in self._sp_theta:
|
||||
locals()[shared_params.name] = getattr(self, shared_params.name)
|
||||
|
||||
# Need to extract parameters first
|
||||
for split_params in self._split_theta_names:
|
||||
locals()[split_params] = getattr(self, split_params)
|
||||
arg_names = self._get_arg_names(Z, partial)
|
||||
weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
|
||||
|
||||
def K(self,X,Z,target):
|
||||
if Z is None:
|
||||
self._weave_inline(self._K_code_X, X, target)
|
||||
else:
|
||||
self._weave_inline(self._K_code, X, target, Z)
|
||||
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
self._weave_inline(self._Kdiag_code, X, target)
|
||||
|
||||
def dK_dtheta(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
|
||||
else:
|
||||
self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
|
||||
|
||||
def dKdiag_dtheta(self,partial,X,target):
|
||||
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
|
||||
|
||||
def dK_dX(self,partial,X,Z,target):
|
||||
if Z is None:
|
||||
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
|
||||
else:
|
||||
self._weave_inline(self._dK_dX_code, X, target, Z, partial)
|
||||
|
||||
def dKdiag_dX(self,partial,X,target):
|
||||
self._weave_inline(self._dKdiag_dX_code, X, target, Z=None, partial=partial)
|
||||
|
||||
def compute_psi_stats(self):
|
||||
#define some normal distributions
|
||||
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
|
||||
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
|
||||
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
|
||||
|
||||
#do some integration!
|
||||
#self._sp_psi0 = ??
|
||||
self._sp_psi1 = self._sp_k
|
||||
for i in range(self.input_dim):
|
||||
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
|
||||
sys.stdout.flush()
|
||||
self._sp_psi1 *= normals[i]
|
||||
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
|
||||
clear_cache()
|
||||
self._sp_psi1 = self._sp_psi1.simplify()
|
||||
|
||||
#and here's psi2 (eek!)
|
||||
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
|
||||
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
|
||||
for i in range(self.input_dim):
|
||||
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
|
||||
sys.stdout.flush()
|
||||
self._sp_psi2 *= normals[i]
|
||||
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
|
||||
clear_cache()
|
||||
self._sp_psi2 = self._sp_psi2.simplify()
|
||||
|
||||
|
||||
def _set_params(self,param):
|
||||
assert param.size == (self.num_params)
|
||||
for i, shared_params in enumerate(self._sp_theta):
|
||||
setattr(self, shared_params.name, param[i])
|
||||
|
||||
if self.output_dim>1:
|
||||
for i, split_params in enumerate(self._split_theta_names):
|
||||
start = self.num_shared_params + i*self.output_dim
|
||||
end = self.num_shared_params + (i+1)*self.output_dim
|
||||
setattr(self, split_params, param[start:end])
|
||||
|
||||
|
||||
def _get_params(self):
|
||||
params = np.zeros(0)
|
||||
for shared_params in self._sp_theta:
|
||||
params = np.hstack((params, getattr(self, shared_params.name)))
|
||||
if self.output_dim>1:
|
||||
for split_params in self._split_theta_names:
|
||||
params = np.hstack((params, getattr(self, split_params).flatten()))
|
||||
return params
|
||||
|
||||
def _get_param_names(self):
|
||||
if self.output_dim>1:
|
||||
return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)]
|
||||
else:
|
||||
return [x.name for x in self._sp_theta]
|
||||
|
|
@ -1,179 +0,0 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
import hashlib
|
||||
|
||||
class rbf(kernpart):
|
||||
"""
|
||||
Rational quadratic kernel.
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \left(\frac{(\ell^{-2} + r^2)}{2*\alpha}\right)^{-\alpha} \qquad \qquad \\text{ where } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}}
|
||||
|
||||
where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input.
|
||||
|
||||
:param D: the number of input dimensions
|
||||
:type D: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the lengthscale of the kernel (l)
|
||||
:type lengthscale: float
|
||||
:param smoothness: the smoothness parameter of the kernel
|
||||
:type lengthscale: float
|
||||
|
||||
.. Note: for rational quadratic with different lengthscale on each dimension, see ratquad_ARD
|
||||
"""
|
||||
|
||||
def __init__(self,D,variance=1.,lengthscale=1.,smoothness)=1.:
|
||||
self.D = D
|
||||
self.Nparam = 3
|
||||
self.name = 'ratquad'
|
||||
self._set_params(np.hstack((variance,lengthscale,smoothness)))
|
||||
|
||||
#initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance,self.lengthscale,self.smoothness))
|
||||
|
||||
def _set_params(self,x):
|
||||
self.variance, self.lengthscale, self.smoothness = x
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
#reset cached results
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance','lengthscale','smoothness']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
self._K_computations(X,X2)
|
||||
np.add(self.variance*self._K_dvar, target,target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
np.add(target,self.variance,target)
|
||||
|
||||
def dK_dtheta(self,partial,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
target[0] += np.sum(self._K_dvar*partial)
|
||||
target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
|
||||
target[2] +=
|
||||
def dKdiag_dtheta(self,partial,X,target):
|
||||
#NB: derivative of diagonal elements wrt lengthscale is 0
|
||||
target[0] += np.sum(partial)
|
||||
|
||||
def dK_dX(self,partial,X,X2,target):
|
||||
self._K_computations(X,X2)
|
||||
if X2 is None:
|
||||
_K_dist = 2*X[:, None, :] - X[None, :, :]
|
||||
else:
|
||||
_K_dist = X[:,None,:]-X2[None,:,:]
|
||||
dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
|
||||
target += np.sum(dK_dX*partial.T[:,:,None],0)
|
||||
|
||||
def dKdiag_dX(self,partial,X,target):
|
||||
pass
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
if not (np.all(X==self._X) and np.all(X2==self._X2)):
|
||||
self._X = X
|
||||
self._X2 = X2
|
||||
if X2 is None: X2 = X
|
||||
XXT = np.dot(X,X2.T)
|
||||
if X is X2:
|
||||
self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
|
||||
else:
|
||||
self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,np.newaxis] + np.sum(np.square(X2),1)[np.newaxis,:])/self.lengthscale2
|
||||
self._K_dvar = (0.5./self.smoothness*(1./self.lengthscale2 + self._K_dist2))**(-self.smoothness)
|
||||
|
||||
def psi0(self,Z,mu,S,target):
|
||||
target += self.variance
|
||||
|
||||
def dpsi0_dtheta(self,partial,Z,mu,S,target):
|
||||
target[0] += 1.
|
||||
|
||||
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
pass
|
||||
|
||||
def psi1(self,Z,mu,S,target):
|
||||
self._psi_computations(Z,mu,S)
|
||||
target += self._psi1
|
||||
|
||||
def dpsi1_dtheta(self,partial,Z,mu,S,target):
|
||||
self._psi_computations(Z,mu,S)
|
||||
denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
|
||||
d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
|
||||
target[0] += np.sum(partial*self._psi1/self.variance)
|
||||
target[1] += np.sum(d_length*partial[:,:,None])
|
||||
|
||||
def dpsi1_dZ(self,partial,Z,mu,S,target):
|
||||
self._psi_computations(Z,mu,S)
|
||||
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0)
|
||||
|
||||
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
|
||||
self._psi_computations(Z,mu,S)
|
||||
tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
|
||||
target_mu += np.sum(partial*tmp*self._psi1_dist,1)
|
||||
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1)
|
||||
|
||||
def psi2(self,Z,mu,S,target):
|
||||
self._psi_computations(Z,mu,S)
|
||||
target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
|
||||
|
||||
def dpsi2_dtheta(self,partial,Z,mu,S,target):
|
||||
"""Shape N,M,M,Ntheta"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
d_var = np.sum(2.*self._psi2/self.variance,0)
|
||||
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
|
||||
d_length = d_length.sum(0)
|
||||
target[0] += np.sum(partial*d_var)
|
||||
target[1] += np.sum(d_length*partial)
|
||||
|
||||
def dpsi2_dZ(self,partial,Z,mu,S,target):
|
||||
"""Returns shape N,M,M,Q"""
|
||||
self._psi_computations(Z,mu,S)
|
||||
dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
|
||||
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
|
||||
|
||||
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
|
||||
"""Think N,M,M,Q """
|
||||
self._psi_computations(Z,mu,S)
|
||||
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
|
||||
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
|
||||
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
|
||||
|
||||
def _psi_computations(self,Z,mu,S):
|
||||
#here are the "statistics" for psi1 and psi2
|
||||
if not np.all(Z==self._Z):
|
||||
#Z has changed, compute Z specific stuff
|
||||
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
|
||||
self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
|
||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscale2 # M,M,Q
|
||||
self._Z = Z
|
||||
|
||||
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
|
||||
#something's changed. recompute EVERYTHING
|
||||
|
||||
#TODO: make more efficient for large Q (using NDL's dot product trick)
|
||||
#psi1
|
||||
self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
|
||||
self._psi1_dist = Z[None,:,:]-mu[:,None,:]
|
||||
self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscale2/self._psi1_denom
|
||||
self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
|
||||
self._psi1 = self.variance*np.exp(self._psi1_exponent)
|
||||
|
||||
#psi2
|
||||
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
|
||||
self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
||||
self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
||||
self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
|
||||
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
|
||||
|
||||
self._Z, self._mu, self._S = Z, mu,S
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue