Revert "Minor edits to reading Lee data in datasets.py"

This reverts commit 730e229238.
This commit is contained in:
James Hensman 2014-06-08 21:02:42 +01:00
parent 1dfe7ed0a8
commit 0812a0e15c
57 changed files with 5 additions and 5969 deletions

View file

@ -1,5 +0,0 @@
Combination covariances, with a particular flag (partition?) allow interogation of the individual covariance contribution. So if we have k_3 = k_1 + k_2 because f_3 = f_1 + f_2 then we allow a multi output covariance over f_1, f_2 and f_3 jointly. Then users can make observations of f_3 and f_1 and build a posterior over f_2 using multiple output encoding. When this flag is set an extra input is added to deal with it. We allow for a new 'combination covariance' of the form scale and add so that
f_3 = f_1*a + f_2
Where a can also be negative.

View file

@ -1,52 +0,0 @@
try:
import sympy as sym
sympy_available=True
except ImportError:
sympy_available=False
import numpy as np
from GPy.util.symbolic import differfln
from symbolic import Symbolic
from sympy import Function, S, oo, I, cos, sin, asin, log, erf, pi, exp, sqrt, sign, gamma, polygamma
class Ode1_eq_lfm(Symbolic):
"""
A symbolic covariance based on a first order differential equation being driven by a latent force that is an exponentiated quadratic.
"""
def __init__(self, output_dim=1, param=None, name='Ode1_eq_lfm'):
input_dim = 2
x_0, z_0, decay_i, decay_j, lengthscale = sym.symbols('x_0, z_0, decay_i, decay_j, lengthscale', positive=True)
scale_i, scale_j = sym.symbols('scale_i, scale_j')
# note that covariance only valid for positive time.
class sim_h(Function):
nargs = 5
@classmethod
def eval(cls, t, tprime, d_i, d_j, l):
half_l_di = 0.5*l*d_i
arg_1 = half_l_di + tprime/l
arg_2 = half_l_di - (t-tprime)/l
ln_part_1 = differfln(arg_1, arg_2)
arg_1 = half_l_di
arg_2 = half_l_di - t/l
ln_part_2 = differfln(half_l_di, half_l_di - t/l)
return (exp(half_l_di*half_l_di
- d_i*(t-tprime)
+ ln_part_1
- log(d_i + d_j))
- exp(half_l_di*half_l_di
- d_i*t - d_j*tprime
+ ln_part_2
- log(d_i + d_j)))
f = scale_i*scale_j*(sim_h(x_0, z_0, decay_i, decay_j, lengthscale)
+ sim_h(z_0, x_0, decay_j, decay_i, lengthscale))
# extra input dim is to signify the output dimension.
super(Ode1_eq_lfm, self).__init__(input_dim, k=f, output_dim=output_dim, name=name)
self.lengthscale.constrain_positive()
self.decay.constrain_positive()

View file

@ -1,445 +0,0 @@
# Check Matthew Rocklin's blog post.
try:
import sympy as sym
sympy_available=True
from sympy.utilities.lambdify import lambdify
from GPy.util.symbolic import stabilise
except ImportError:
sympy_available=False
import numpy as np
from kern import Kern
from scipy.special import gammaln, gamma, erf, erfc, erfcx, polygamma
from GPy.util.functions import normcdf, normcdfln, logistic, logisticln, differfln
from ...core.parameterization import Param
class Symbolic(Kern):
"""
A kernel object, where all the hard work is done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
"""
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', param=None, active_dims=None, operators=None, func_modules=[]):
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
self.func_modules = func_modules
self.func_modules += [{'gamma':gamma,
'gammaln':gammaln,
'erf':erf, 'erfc':erfc,
'erfcx':erfcx,
'polygamma':polygamma,
'differfln':differfln,
'normcdf':normcdf,
'normcdfln':normcdfln,
'logistic':logistic,
'logisticln':logisticln},
'numpy']
super(Symbolic, self).__init__(input_dim, active_dims, name)
self._sym_k = k
# pull the variable names out of the symbolic covariance function.
sym_vars = [e for e in k.atoms() if e.is_Symbol]
self._sym_x= sorted([e for e in sym_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
self._sym_z= sorted([e for e in sym_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
# Check that variable names make sense.
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sym_x)])
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sym_z)])
assert len(self._sym_x)==len(self._sym_z)
x_dim=len(self._sym_x)
self._sym_kdiag = k
for x, z in zip(self._sym_x, self._sym_z):
self._sym_kdiag = self._sym_kdiag.subs(z, x)
# If it is a multi-output covariance, add an input for indexing the outputs.
self._real_input_dim = x_dim
# Check input dim is number of xs + 1 if output_dim is >1
assert self.input_dim == x_dim + int(output_dim > 1)
self.output_dim = output_dim
# extract parameter names from the covariance
thetas = sorted([e for e in sym_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index (subscripts), they are associated with different outputs.
if self.output_dim>1:
self._sym_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
self._sym_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
# Make sure parameter appears with both indices!
assert len(self._sym_theta_i)==len(self._sym_theta_j)
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j)])
# Extract names of shared parameters (those without a subscript)
self._sym_theta = [theta for theta in thetas if theta not in self._sym_theta_i and theta not in self._sym_theta_j]
self.num_split_params = len(self._sym_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sym_theta_i]
# Add split parameters to the model.
for theta in self._split_theta_names:
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted?
setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
self.add_parameter(getattr(self, theta))
self.num_shared_params = len(self._sym_theta)
for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
self._sym_kdiag = self._sym_kdiag.subs(theta_j, theta_i)
else:
self.num_split_params = 0
self._split_theta_names = []
self._sym_theta = thetas
self.num_shared_params = len(self._sym_theta)
# Add parameters to the model.
for theta in self._sym_theta:
val = 1.0
# TODO: what if user has passed a parameter vector, how should that be stored and interpreted? This is the old way before params class.
if param is not None:
if param.has_key(theta.name):
val = param[theta.name]
setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name))
# Differentiate with respect to parameters.
derivative_arguments = self._sym_x + self._sym_theta
if self.output_dim > 1:
derivative_arguments += self._sym_theta_i
self.derivatives = {theta.name : stabilise(sym.diff(self._sym_k,theta)) for theta in derivative_arguments}
self.diag_derivatives = {theta.name : stabilise(sym.diff(self._sym_kdiag,theta)) for theta in derivative_arguments}
# This gives the parameters for the arg list.
self.arg_list = self._sym_x + self._sym_z + self._sym_theta
self.diag_arg_list = self._sym_x + self._sym_theta
if self.output_dim > 1:
self.arg_list += self._sym_theta_i + self._sym_theta_j
self.diag_arg_list += self._sym_theta_i
# Check if there are additional linear operators on the covariance.
self._sym_operators = operators
# TODO: Deal with linear operators
#if self._sym_operators:
# for operator in self._sym_operators:
# psi_stats aren't yet implemented.
if False:
self.compute_psi_stats()
# generate the code for the covariance functions
self._gen_code()
def __add__(self,other):
return spkern(self._sym_k+other._sym_k)
def _gen_code(self):
#fn_theano = theano_function([self.arg_lists], [self._sym_k + self.derivatives], dims={x: 1}, dtypes={x_0: 'float64', z_0: 'float64'})
self._K_function = lambdify(self.arg_list, self._sym_k, self.func_modules)
self._K_derivatives_code = {key: lambdify(self.arg_list, self.derivatives[key], self.func_modules) for key in self.derivatives.keys()}
self._Kdiag_function = lambdify(self.diag_arg_list, self._sym_kdiag, self.func_modules)
self._Kdiag_derivatives_code = {key: lambdify(self.diag_arg_list, self.diag_derivatives[key], self.func_modules) for key in self.diag_derivatives.keys()}
def K(self,X,X2=None):
self._K_computations(X, X2)
return self._K_function(**self._arguments)
def Kdiag(self,X):
self._K_computations(X)
return self._Kdiag_function(**self._diag_arguments)
def _param_grad_helper(self,partial,X,Z,target):
pass
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
self._K_computations(X, X2)
gradients_X = np.zeros_like(X)
for i, x in enumerate(self._sym_x):
gf = self._K_derivatives_code[x.name]
gradients_X[:, i] = (gf(**self._arguments)*dL_dK).sum(1)
if X2 is None:
gradients_X *= 2
return gradients_X
def gradients_X_diag(self, dL_dK, X):
self._K_computations(X)
dX = np.zeros_like(X)
for i, x in enumerate(self._sym_x):
gf = self._Kdiag_derivatives_code[x.name]
dX[:, i] = gf(**self._diag_arguments)*dL_dK
return dX
def update_gradients_full(self, dL_dK, X, X2=None):
# Need to extract parameters to local variables first
self._K_computations(X, X2)
for theta in self._sym_theta:
parameter = getattr(self, theta.name)
gf = self._K_derivatives_code[theta.name]
gradient = (gf(**self._arguments)*dL_dK).sum()
if X2 is not None:
gradient += (gf(**self._reverse_arguments)*dL_dK).sum()
setattr(parameter, 'gradient', gradient)
if self.output_dim>1:
for theta in self._sym_theta_i:
parameter = getattr(self, theta.name[:-2])
gf = self._K_derivatives_code[theta.name]
A = gf(**self._arguments)*dL_dK
gradient = np.asarray([A[np.where(self._output_ind==i)].T.sum()
for i in np.arange(self.output_dim)])
if X2 is None:
gradient *= 2
else:
A = gf(**self._reverse_arguments)*dL_dK.T
gradient += np.asarray([A[np.where(self._output_ind2==i)].T.sum()
for i in np.arange(self.output_dim)])
setattr(parameter, 'gradient', gradient)
def update_gradients_diag(self, dL_dKdiag, X):
self._K_computations(X)
for theta in self._sym_theta:
parameter = getattr(self, theta.name)
gf = self._Kdiag_derivatives_code[theta.name]
setattr(parameter, 'gradient', (gf(**self._diag_arguments)*dL_dKdiag).sum())
if self.output_dim>1:
for theta in self._sym_theta_i:
parameter = getattr(self, theta.name[:-2])
gf = self._Kdiag_derivatives_code[theta.name]
a = gf(**self._diag_arguments)*dL_dKdiag
setattr(parameter, 'gradient',
np.asarray([a[np.where(self._output_ind==i)].sum()
for i in np.arange(self.output_dim)]))
def _K_computations(self, X, X2=None):
"""Set up argument lists for the derivatives."""
# Could check if this needs doing or not, there could
# definitely be some computational savings by checking for
# parameter updates here.
self._arguments = {}
self._diag_arguments = {}
for i, x in enumerate(self._sym_x):
self._arguments[x.name] = X[:, i][:, None]
self._diag_arguments[x.name] = X[:, i][:, None]
if self.output_dim > 1:
self._output_ind = np.asarray(X[:, -1], dtype='int')
for i, theta in enumerate(self._sym_theta_i):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind])[:, None]
self._diag_arguments[theta.name] = self._arguments[theta.name]
for theta in self._sym_theta:
self._arguments[theta.name] = np.asarray(getattr(self, theta.name))
self._diag_arguments[theta.name] = self._arguments[theta.name]
if X2 is not None:
for i, z in enumerate(self._sym_z):
self._arguments[z.name] = X2[:, i][None, :]
if self.output_dim > 1:
self._output_ind2 = np.asarray(X2[:, -1], dtype='int')
for i, theta in enumerate(self._sym_theta_j):
self._arguments[theta.name] = np.asarray(getattr(self, theta.name[:-2])[self._output_ind2])[None, :]
else:
for z in self._sym_z:
self._arguments[z.name] = self._arguments['x_'+z.name[2:]].T
if self.output_dim > 1:
self._output_ind2 = self._output_ind
for theta in self._sym_theta_j:
self._arguments[theta.name] = self._arguments[theta.name[:-2] + '_i'].T
if X2 is not None:
# These arguments are needed in gradients when X2 is not equal to X.
self._reverse_arguments = self._arguments
for x, z in zip(self._sym_x, self._sym_z):
self._reverse_arguments[x.name] = self._arguments[z.name].T
self._reverse_arguments[z.name] = self._arguments[x.name].T
if self.output_dim > 1:
for theta_i, theta_j in zip(self._sym_theta_i, self._sym_theta_j):
self._reverse_arguments[theta_i.name] = self._arguments[theta_j.name].T
self._reverse_arguments[theta_j.name] = self._arguments[theta_i.name].T
if False:
class Symcombine(CombinationKernel):
"""
Combine list of given sympy covariances together with the provided operations.
"""
def __init__(self, subkerns, operations, name='sympy_combine'):
super(Symcombine, self).__init__(subkerns, name)
for subkern, operation in zip(subkerns, operations):
self._sym_k += self._k_double_operate(subkern._sym_k, operation)
#def _double_operate(self, k, operation):
@Cache_this(limit=2, force_kwargs=['which_parts'])
def K(self, X, X2=None, which_parts=None):
"""
Combine covariances with a linear operator.
"""
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.K(X, X2) for p in which_parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def Kdiag(self, X, which_parts=None):
assert X.shape[1] == self.input_dim
if which_parts is None:
which_parts = self.parts
elif not isinstance(which_parts, (list, tuple)):
# if only one part is given
which_parts = [which_parts]
return reduce(np.add, (p.Kdiag(X) for p in which_parts))
def update_gradients_full(self, dL_dK, X, X2=None):
[p.update_gradients_full(dL_dK, X, X2) for p in self.parts]
def update_gradients_diag(self, dL_dK, X):
[p.update_gradients_diag(dL_dK, X) for p in self.parts]
def gradients_X(self, dL_dK, X, X2=None):
"""Compute the gradient of the objective function with respect to X.
:param dL_dK: An array of gradients of the objective function with respect to the covariance function.
:type dL_dK: np.ndarray (num_samples x num_inducing)
:param X: Observed data inputs
:type X: np.ndarray (num_samples x input_dim)
:param X2: Observed data inputs (optional, defaults to X)
:type X2: np.ndarray (num_inducing x input_dim)"""
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X(dL_dK, X, X2)) for p in self.parts]
return target
def gradients_X_diag(self, dL_dKdiag, X):
target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target
def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2
# compute the "cross" terms
from static import White, Bias
from rbf import RBF
#from rbf_inv import RBFInv
from linear import Linear
#ffrom fixed import Fixed
for p1, p2 in itertools.combinations(self.parts, 2):
# i1, i2 = p1.active_dims, p2.active_dims
# white doesn;t combine with anything
if isinstance(p1, White) or isinstance(p2, White):
pass
# rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else:
raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
for p1 in self.parts:
#compute the effective dL_dpsi1. Extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:# np.setdiff1d(p1.active_dims, ar2, assume_unique): # TODO: Careful, not correct for overlapping active_dims
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
p1.update_gradients_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target = np.zeros(Z.shape)
for p1 in self.parts:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self.parts:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
target += p1.gradients_Z_expectations(eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
return target
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
from static import White, Bias
target_mu = np.zeros(variational_posterior.shape)
target_S = np.zeros(variational_posterior.shape)
for p1 in self._parameters_:
#compute the effective dL_dpsi1. extra terms appear becaue of the cross terms in psi2!
eff_dL_dpsi1 = dL_dpsi1.copy()
for p2 in self._parameters_:
if p2 is p1:
continue
if isinstance(p2, White):
continue
elif isinstance(p2, Bias):
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.variance * 2.
else:
eff_dL_dpsi1 += dL_dpsi2.sum(1) * p2.psi1(Z, variational_posterior) * 2.
a, b = p1.gradients_qX_expectations(dL_dpsi0, eff_dL_dpsi1, dL_dpsi2, Z, variational_posterior)
target_mu += a
target_S += b
return target_mu, target_S
def _getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return super(Add, self)._getstate()
def _setstate(self, state):
super(Add, self)._setstate(state)
def add(self, other, name='sum'):
if isinstance(other, Add):
other_params = other._parameters_.copy()
for p in other_params:
other.remove_parameter(p)
self.add_parameters(*other_params)
else: self.add_parameter(other)
return self

View file

@ -1,564 +0,0 @@
# Check Matthew Rocklin's blog post.
try:
import sympy as sp
sympy_available=True
from sympy.utilities.autowrap import ufuncify
except ImportError:
sympy_available=False
exit()
from sympy.core.cache import clear_cache
from sympy.utilities.codegen import codegen
try:
from scipy import weave
weave_available = True
except ImportError:
weave_available = False
import os
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import sys
import numpy as np
import re
import tempfile
import pdb
import ast
from kernpart import Kernpart
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
# TODO have this set up in a set up file!
user_code_storage = tempfile.gettempdir()
class spkern(Kern):
"""
A kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
"""
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
if name is None:
name='sympykern'
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
super(spkern, self).__init__(input_dim, name)
self._sp_k = k
# pull the variable names out of the symbolic covariance function.
sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
# Check that variable names make sense.
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
assert len(self._sp_x)==len(self._sp_z)
x_dim=len(self._sp_x)
# If it is a multi-output covariance, add an input for indexing the outputs.
self._real_input_dim = x_dim
# Check input dim is number of xs + 1 if output_dim is >1
assert self.input_dim == x_dim + int(output_dim > 1)
self.output_dim = output_dim
# extract parameter names from the covariance
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index (subscripts), they are associated with different outputs.
if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
# Make sure parameter appears with both indices!
assert len(self._sp_theta_i)==len(self._sp_theta_j)
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
# Extract names of shared parameters (those without a subscript)
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
self.num_split_params = len(self._sp_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
for theta in self._split_theta_names:
setattr(self, theta, Param(theta, np.ones(self.output_dim), None))
self.add_parameters(getattr(self, theta))
#setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta)
#self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else:
self.num_split_params = 0
self._split_theta_names = []
self._sp_theta = thetas
self.num_shared_params = len(self._sp_theta)
#self.num_params = self.num_shared_params
# Add parameters to the model.
for theta in self._sp_theta:
val = 1.0
if param is not None:
if param.has_key(theta):
val = param[theta]
#setattr(self, theta.name, val)
setattr(self, theta.name, Param(theta.name, val, None))
self.add_parameters(getattr(self, theta.name))
#deal with param
#self._set_params(self._get_params())
# Differentiate with respect to parameters.
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
if self.output_dim > 1:
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
# differentiate with respect to input variables.
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
# psi_stats aren't yet implemented.
if False:
self.compute_psi_stats()
self._code = {}
# generate the code for the covariance functions
self._gen_code()
if weave_available:
if False:
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
else:
extra_compile_args = []
self.weave_kwargs = {
'support_code': None, #self._function_code,
'include_dirs':[user_code_storage, os.path.join(current_dir,'parts/')],
'headers':['"sympy_helpers.h"', '"'+self.name+'.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp"), os.path.join(user_code_storage, self.name+'.cpp')],
'extra_compile_args':extra_compile_args,
'extra_link_args':['-lgomp'],
'verbose':True}
self.parameters_changed() # initializes caches
def __add__(self,other):
return spkern(self._sp_k+other._sp_k)
def _gen_code(self):
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
code_list = [('k',self._sp_k)]
# gradients with respect to covariance input
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
# gradient with respect to parameters
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
# gradient with respect to multiple output parameters
if self.output_dim > 1:
argument_sequence += self._sp_theta_i + self._sp_theta_j
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
# generate c functions from sympy objects
if weave_available:
code_type = "C"
else:
code_type = "PYTHON"
# Need to add the sympy_helpers header in here.
(foo_c,self._function_code), (foo_h,self._function_header) = \
codegen(code_list,
code_type,
self.name,
argument_sequence=argument_sequence)
# Use weave to compute the underlying functions.
if weave_available:
# put the header file where we can find it
f = file(os.path.join(user_code_storage, self.name + '.h'),'w')
f.write(self._function_header)
f.close()
if weave_available:
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
# put the cpp file in user code storage (defaults to temp file location)
f = file(os.path.join(user_code_storage, self.name + '.cpp'),'w')
else:
# put the python file in user code storage
f = file(os.path.join(user_code_storage, self.name + '.py'),'w')
f.write(self._function_code)
f.close()
if weave_available:
# arg_list will store the arguments required for the C code.
input_arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
# for multiple outputs reverse argument list is also required
if self.output_dim>1:
reverse_input_arg_list = list(input_arg_list)
reverse_input_arg_list.reverse()
# This gives the parameters for the arg list.
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
arg_list = input_arg_list + param_arg_list
precompute_list=[]
if self.output_dim > 1:
reverse_arg_list= reverse_input_arg_list + list(param_arg_list)
# For multiple outputs, also need the split parameters.
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list
# Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list)
# Now we use the arguments in code that computes the separate parts.
# Any precomputations will be done here eventually.
self._precompute = \
"""
// Precompute code would go here. It will be called when parameters are updated.
"""
# Here's the code to do the looping for K
self._code['K'] =\
"""
// _K_code
// Code for computing the covariance function.
int i;
int j;
int n = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
//target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
}
}
%s
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/")
# adding a string representation of the function in the
# comment forces recompile when needed
self._code['K_X'] = self._code['K'].replace('Z2(', 'X2(')
# Code to compute diagonal of covariance.
diag_arg_string = re.sub('Z','X',arg_string)
diag_arg_string = re.sub('int jj','//int jj',diag_arg_string)
diag_arg_string = re.sub('j','i',diag_arg_string)
diag_precompute_string = re.sub('int jj','//int jj',precompute_string)
diag_precompute_string = re.sub('Z','X',diag_precompute_string)
diag_precompute_string = re.sub('j','i',diag_precompute_string)
# Code to do the looping for Kdiag
self._code['Kdiag'] =\
"""
// _code['Kdiag']
// Code for computing diagonal of covariance function.
int i;
int n = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<n;i++){
%s
//target[i] =
TARGET1(i)=k(%s);
}
%s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
if self.output_dim>1:
for i, theta in enumerate(self._sp_theta_i):
grad_func_list = [' '*26 + 'TARGET1(ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, arg_string)]
grad_func_list += [' '*26 + 'TARGET1(jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(theta.name, reverse_arg_string)]
grad_func_list = c_define_output_indices+grad_func_list
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' +theta.name + '_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
for i, theta in enumerate(self._sp_theta):
grad_func_list = [' '*26 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string)]
grad_func_string = '\n'.join(grad_func_list)
self._code['dK_d' + theta.name] =\
"""
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
self._code['dK_d' + theta.name +'_X'] = self._code['dK_d' + theta.name].replace('Z2(', 'X2(')
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL(i)',diag_grad_func_string)
self._code['dKdiag_d' + theta.name] =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<n;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += partial[i*num_inducing+j]*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._code['dK_dX'] = \
"""
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i;
int j;
int n = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<n; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
self._code['dK_dX_X'] = self._code['dK_dX'].replace('Z2(', 'X2(')
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradX_func_string = re.sub('PARTIAL2\(i\, i\)','2*PARTIAL(i)',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X
self._code['dKdiag_dX'] = \
"""
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int n = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<n; i++){
%s
}
%s
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that.
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
#TODO: similar functions when cython available.
#TODO: similar functions when only python available.
def _get_arg_names(self, target=None, Z=None, partial=None):
arg_names = ['X']
if target is not None:
arg_names += ['target']
for shared_params in self._sp_theta:
arg_names += [shared_params.name]
if Z is not None:
arg_names += ['Z']
if partial is not None:
arg_names += ['partial']
if self.output_dim>1:
arg_names += self._split_theta_names
arg_names += ['output_dim']
return arg_names
def _generate_inline(self, code, X, target=None, Z=None, partial=None):
output_dim = self.output_dim
# Need to extract parameters to local variables first
for shared_params in self._sp_theta:
locals()[shared_params.name] = getattr(self, shared_params.name)
for split_params in self._split_theta_names:
locals()[split_params] = np.asarray(getattr(self, split_params))
arg_names = self._get_arg_names(target, Z, partial)
if weave_available:
return weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
else:
raise RuntimeError('Weave not available and other variants of sympy covariance not yet implemented')
def K(self,X,Z,target):
if Z is None:
self._generate_inline(self._code['K_X'], X, target)
else:
self._generate_inline(self._code['K'], X, target, Z)
def Kdiag(self,X,target):
self._generate_inline(self._code['Kdiag'], X, target)
def _param_grad_helper(self,partial,X,Z,target):
if Z is None:
self._generate_inline(self._code['dK_dtheta_X'], X, target, Z, partial)
else:
self._generate_inline(self._code['dK_dtheta'], X, target, Z, partial)
def dKdiag_dtheta(self,partial,X,target):
self._generate_inline(self._code['dKdiag_dtheta'], X, target, Z=None, partial=partial).namelocals()[shared_params.name] = getattr(self, shared_params.name)
def gradients_X(self,partial,X,Z,target):
if Z is None:
self._generate_inline(self._code['dK_dX_X'], X, target, Z, partial)
else:
self._generate_inline(self._code['dK_dX'], X, target, Z, partial)
def dKdiag_dX(self,partial,X,target):
self._generate_inline(self._code['dKdiag_dX'], X, target, Z, partial)
def compute_psi_stats(self):
#define some normal distributions
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
#self._sp_psi0 = ??
self._sp_psi1 = self._sp_k
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi1 *= normals[i]
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi1 = self._sp_psi1.simplify()
#and here's psi2 (eek!)
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi2 *= normals[i]
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi2 = self._sp_psi2.simplify()
def parameters_changed(self):
# Reset the caches
self._cache, self._cache2 = np.empty(shape=(2, 1))
self._cache3, self._cache4, self._cache5 = np.empty(shape=(3, 1))
def update_gradients_full(self, dL_dK, X):
# Need to extract parameters to local variables first
self._K_computations(X, None)
for shared_params in self._sp_theta:
parameter = getattr(self, shared_params.name)
code = self._code['dK_d' + shared_params.name]
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
for split_params in self._split_theta_names:
parameter = getattr(self, split_params.name)
code = self._code['dK_d' + split_params.name]
setattr(parameter, 'gradient', self._generate_inline(code, X, target=None, Z=None, partial=dL_dK))
# def update_gradients_sparse(self, dL_dKmm, dL_dKnm, dL_dKdiag, X, Z):
# #contributions from Kdiag
# self.variance.gradient = np.sum(dL_dKdiag)
# #from Knm
# self._K_computations(X, Z)
# self.variance.gradient += np.sum(dL_dKnm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient = self._dL_dlengthscales_via_K(dL_dKnm, X, Z)
# else:
# self.lengthscale.gradient = (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKnm)
# #from Kmm
# self._K_computations(Z, None)
# self.variance.gradient += np.sum(dL_dKmm * self._K_dvar)
# if self.ARD:
# self.lengthscale.gradient += self._dL_dlengthscales_via_K(dL_dKmm, Z, None)
# else:
# self.lengthscale.gradient += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dKmm)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, Z):
if Z is None:
self._generate_inline(self._precompute, X)
else:
self._generate_inline(self._precompute, X, Z=Z)

View file

@ -1,134 +0,0 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
four_over_tau = 2./np.pi
class POLY(Kernpart):
"""
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
.. math::
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
(degree). Only gradients of the first three are provided for
kernel optimisation, it is assumed that polynomial degree would
be set by hand.
The kernel is not recommended as it is badly behaved when the
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
there is an automatic relevance determination version of this
kernel provided (NOTE YET IMPLEMENTED!).
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param degree: the degree of the polynomial.
:type degree: int
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params=3
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
else:
weight_variance = 1.*np.ones(1)
else:
self.num_params = self.input_dim + 2
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == self.input_dim, "bad number of weight variances"
else:
weight_variance = np.ones(self.input_dim)
raise NotImplementedError
self.degree=degree
self.name='poly_deg' + str(self.degree)
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
def _get_params(self):
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weight_variance = x[1:-1]
self.weight_std = np.sqrt(self.weight_variance)
self.bias_variance = x[-1]
def _get_param_names(self):
if self.num_params == 3:
return ['variance', 'weight_variance', 'bias_variance']
else:
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar
def dK_dtheta(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
base_cov_grad = base*dL_dK
target[0] += np.sum(self._K_dvar*dL_dK)
target[1] += (self._K_inner_prod*base_cov_grad).sum()
target[2] += base_cov_grad.sum()
def dK_dX(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg
if X2 is None:
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
else:
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_poly_arg
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
def _K_computations(self, X, X2):
if self.ARD:
pass
else:
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
self._K_dvar = self._K_poly_arg**self.degree
def _K_diag_computations(self, X):
if self.ARD:
pass
else:
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_dvar = self._K_diag_poly_arg**self.degree

View file

@ -1,70 +0,0 @@
# Check Matthew Rocklin's blog post.
import sympy as sym
import numpy as np
from kern import Kern
from ...core.symbolic import Symbolic_core
class Symbolic(Kern, Symbolic_core):
"""
"""
def __init__(self, input_dim, k=None, output_dim=1, name='symbolic', parameters=None, active_dims=None, operators=None, func_modules=[]):
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
Kern.__init__(self, input_dim, active_dims, name=name)
kdiag = k
self.cacheable = ['X', 'Z']
Symbolic_core.__init__(self, {'k':k,'kdiag':kdiag}, cacheable=self.cacheable, derivatives = ['X', 'theta'], parameters=parameters, func_modules=func_modules)
self.output_dim = output_dim
def __add__(self,other):
return spkern(self._sym_k+other._sym_k)
def _set_expressions(self, expressions):
"""This method is overwritten because we need to modify kdiag by substituting z for x. We do this by calling the parent expression method to extract variables from expressions, then subsitute the z variables that are present with x."""
Symbolic_core._set_expressions(self, expressions)
Symbolic_core._set_variables(self, self.cacheable)
# Substitute z with x to obtain kdiag.
for x, z in zip(self.variables['X'], self.variables['Z']):
expressions['kdiag'] = expressions['kdiag'].subs(z, x)
Symbolic_core._set_expressions(self, expressions)
def K(self,X,X2=None):
if X2 is None:
return self.eval_function('k', X=X, Z=X)
else:
return self.eval_function('k', X=X, Z=X2)
def Kdiag(self,X):
d = self.eval_function('kdiag', X=X)
if not d.shape[0] == X.shape[0]:
d = np.tile(d, (X.shape[0], 1))
return d
def gradients_X(self, dL_dK, X, X2=None):
#if self._X is None or X.base is not self._X.base or X2 is not None:
g = self.eval_gradients_X('k', dL_dK, X=X, Z=X2)
if X2 is None:
g *= 2
return g
def gradients_X_diag(self, dL_dK, X):
return self.eval_gradients_X('kdiag', dL_dK, X=X)
def update_gradients_full(self, dL_dK, X, X2=None):
# Need to extract parameters to local variables first
if X2 is None:
# need to double this inside ...
self.eval_update_gradients('k', dL_dK, X=X)
else:
self.eval_update_gradients('k', dL_dK, X=X, Z=X2)
def update_gradients_diag(self, dL_dKdiag, X):
self.eval_update_gradients('kdiag', dL_dKdiag, X)

View file

@ -1,17 +0,0 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
# Construct covariance functions from matlab saves.
import numpy as np
from kern import kern
import parts
import scipy.io
def read_matlab(mat_data)
mat_data = scipy.io.loadmat(os.path.join(data_path, data_set, 'frey_rawface.mat'))
if mat_data['type']=='cmpnd':
# cmpnd kernel
types = []
for i in range(mat_data['comp'][0][0]):
types.append(mat_data['comp'][0][0][i]

View file

@ -1,318 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from scipy import weave
from ...util.linalg import tdot
from ...util.misc import fast_array_equal
class EQ(Kernpart_stationary):
"""
Exponentiated Quadratic covariance function als known as radial basis function, squared-exponential or Gaussian kernel.
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: this object implements both the ARD and 'spherical' version of the function
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False):
Kernpart_stationary.__init__(self, input_dim, lengthscale, ARD)
self.name = 'rbf'
self._set_params(np.hstack((variance, self.lengthscale.flatten())))
# a set of optional args to pass to weave
self.weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
def _get_params(self):
return np.hstack((self.variance, self.lengthscale))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
Kernpart_stationary._set_params(self, x[1:])
def _get_param_names(self):
if self.num_params == 2:
return ['variance', 'lengthscale']
else:
return ['variance'] + ['lengthscale_%i' % i for i in range(self.lengthscale.size)]
def K(self, X, X2, target):
self._K_computations(X, X2)
target += self.variance * self._K_dvar
def Kdiag(self, X, target):
np.add(target, self.variance, target)
def dK_dtheta(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD:
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
weave.inline(code, arg_names=['num_data',
'num_inducing',
'input_dim',
'X', 'X2',
'target',
'dvardLdK',
'var_len3'],
type_converters=weave.converters.blitz,
**self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp;
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['num_data',
'num_inducing',
'input_dim',
'X', 'X2',
'target',
'dvardLdK',
'var_len3'],
type_converters=weave.converters.blitz,
**self.weave_options)
else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK)
def dK_dX(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
dK_dX = (-self.variance / self.lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, mu, S, target):
target += self.variance
def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
target[0] += np.sum(dL_dpsi0)
def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass
def psi1(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi1
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
d_length = self._psi1[:,:,None] * ((self._psi1_dist_sq - 1.)/(self.lengthscale*self._psi1_denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
target[1] += dpsi1_dlength.sum()
else:
target[1:] += dpsi1_dlength.sum(0).sum(0)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
denominator = (self.lengthscale2 * (self._psi1_denom))
dpsi1_dZ = -self._psi1[:, :, None] * ((self._psi1_dist / denominator))
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = self._psi1[:, :, None] / self.lengthscale2 / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def psi2(self, Z, mu, S, target):
self._psi_computations(Z, mu, S)
target += self._psi2
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
"""Shape N,num_inducing,num_inducing,Ntheta"""
self._psi_computations(Z, mu, S)
d_var = 2.*self._psi2 / self.variance
d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
target[0] += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
target[1] += dpsi2_dlength.sum()
else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist / self.lengthscale2 # num_inducing, num_inducing, input_dim
term2 = self._psi2_mudist / self._psi2_denom / self.lengthscale2 # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = self._psi2[:, :, :, None] / self.lengthscale2 / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
params = self._get_params()
if not (fast_array_equal(X, self._X) and fast_array_equal(X2, self._X2) and fast_array_equal(self._params , params)):
self._X = X.copy()
self._params = params.copy()
if X2 is None:
self._X2 = None
X = X / self.lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X / self.lengthscale
X2 = X2 / self.lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
if not fast_array_equal(Z, self._Z):
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist / self.lengthscale) # M,M,Q
if not fast_array_equal(Z, self._Z) or not fast_array_equal(mu, self._mu) or not fast_array_equal(S, self._S):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] / self.lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = np.square(self._psi1_dist) / self.lengthscale2 / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] / self.lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
lengthscale2 = self.lengthscale2
else:
lengthscale2 = np.ones(input_dim) * self.lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp/lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

View file

@ -1,428 +0,0 @@
# Copyright (c) 2012, James Hensman and Ricardo Andrade
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
from GPy.util import ln_diff_erfs
import pdb
from scipy import weave
class Ode1(Kernpart):
"""
Covariance function for first order differential equation driven by an exponetiated quadratic covariance.
This kernel has the form
.. math::
:param num_outputs: number of outputs driven by latent function.
:type num_outputs: int
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
def __init__(self,num_outputs, W=None, rank=1, delay=None, kappa=None):
self.rank = rank
self.input_dim = 1
self.name = 'ode1'
self.num_outputs = num_outputs
self.num_params = self.num_outputs*(1. + self.rank) + 1
if kappa is not None:
self.num_params+=num_outputs
if delay is not None:
self.num_params+=num_outputs
self.rank = rank
if W is None:
self.W = 0.5*np.random.randn(self.num_outputs,self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.num_outputs,self.rank)
self.W = W
if kappa is not None:
assert kappa.shape==(self.num_outputs,)
self.kappa = kappa
if delay is not None:
assert delay.shape==(self.num_outputs,)
self.delay = delay
self._set_params(self._get_params())
def _get_params(self):
param_list = [self.W.flatten()]
if self.kappa is not None:
param_list.append(self.kappa)
param_list.append(self.decay)
if self.delay is not None:
param_list.append(self.delay)
param_list.append(self.length_scale)
return np.hstack(param_list)
def _set_params(self,x):
assert x.size == self.num_params
end = self.num_outputs*self.rank
self.W = x[:end].reshape(self.num_outputs,self.rank)
start = end
self.B = np.dot(self.W,self.W.T)
if self.kappa is not None:
end+=self.num_outputs
self.kappa = x[start:end]
self.B += np.diag(self.kappa)
start=end
end+=num_outputs
self.decay = x[start:end]
start=end
if self.delay is not None:
end+=num_outputs
self.delay = x[start:end]
start=end
end+=1
self.length_scale = x[start]
self.sigma = np.sqrt(2)*self.length_scale
def _get_param_names(self):
param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.num_outputs)],[])
if self.kappa is not None:
param_names += ['kappa_%i'%i for i in range(self.num_outputs)]
param_names += ['decay_%i'%i for i in range(self.num_outputs)]
if self.delay is not None:
param_names += ['delay_%i'%i for i in range(self.num_outputs)]
param_names+= ['length_scale']
return param_names
def K(self,X,X2,target):
if X.shape[1] > 2:
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
self._K_computations()
target += self._scales*self._dK_dvar
if self.gaussian_initial:
# Add covariance associated with initial condition.
t1_mat = self._t[self._rorder, None]
t2_mat = self._t2[None, self._rorder2]
target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
def Kdiag(self,index,target):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass
def dK_dtheta(self,dL_dK,index,index2,target):
pass
def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass
def dK_dX(self,dL_dK,X,X2,target):
pass
def _extract_t_indices(X, X2=None):
"""Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
# TODO: some fast checking here to see if this needs recomputing?
self._t = X[:, 0]
if X.shape[1]==1:
# No index passed, assume single output of ode model.
self._index = np.ones_like(X[:, 1],dtype=np.int)
self._index = np.asarray(X[:, 1],dtype=np.int)
# Sort indices so that outputs are in blocks for computational
# convenience.
self._order = self._index.argsort()
self._index = self._index[self._order]
self._t = self._t[self._order]
self._rorder = self._order.argsort() # rorder is for reversing the order
if X2 is None:
self._t2 = None
self._index2 = None
self._rorder2 = self._rorder
else:
if X2.shape[1] > 2:
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
self._t2 = X2[:, 0]
if X.shape[1]==1:
# No index passed, assume single output of ode model.
self._index2 = np.ones_like(X2[:, 1],dtype=np.int)
self._index2 = np.asarray(X2[:, 1],dtype=np.int)
self._order2 = self._index2.argsort()
slef._index2 = self._index2[self._order2]
self._t2 = self._t2[self._order2]
self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
def _K_computations(X, X2):
"""Perform main body of computations for the ode1 covariance function."""
# First extract times and indices.
self._extract_t_indices(X, X2)
self._K_compute_eq()
self._K_compute_eq_x_ode()
if X2 is None:
self._K_ode_eq = self._K_ode_eq.T
else:
self._K_compute_eq_x_ode(transpose=True)
self._K_compute_ode()
# Reorder values of blocks for placing back into _K_dvar.
self._K_dvar[self._rorder, :] = np.vstack((
np.hstack((self._K_eq, self._Keq_ode))
np.hstack((self._K_ode_eq, self.K_ode))))[:, self._rorder2]
def _K_compute_eq():
"""Compute covariance for latent covariance."""
t_eq = self._t[self._index==0]
if t_eq.shape[0]==0:
self._K_eq = np.zeros((0, 0))
return
if self._t2 is None:
self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
else:
t2_eq = self._t2[self._index2==0]
if t2_eq.shape[0]==0:
self._K_eq_eq = np.zeros((0, 0))
return
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
self._K_eq = np.exp(-self._dist2/(2*self.length_scale*self.length_scale))
if self.is_normalise:
self._K_eq/=(np.sqrt(2*np.pi)*self.length_scale)
def _K_compute_ode_eq(transpose=False):
"""Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
:param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
:type transpose: bool"""
if transpose:
t_ode = self._t2[self._index>0]
index_ode = self._index2[self._index>0]-1
if t_ode.shape[0]==0:
self._K_ode = np.zeros((0, 0))
return
if self._t2 is not None:
t2_ode = self._t2[self._index2>0]
index2_ode = self._index2[self._index2>0]-1
if t2_eq.shape[0]==0:
self._K_ode = np.zeros((0, 0))
return
else:
# Matrix giving scales of each output
self._scale = np.zeros((t_ode.shape[0], t_eq.shape[0]))
code="""
for(int i=0;i<N; i++){
for(int j=0; j<N2; j++){
scale_mat[i+j*N] = self.W[index_sim[i]+index_eq[j]*num_outputs];
}
}
"""
scale_mat, B = self._scale, self._B
N, N2, num_outputs = index_ode.size, index_eq.size, self.num_outputs
weave.inline(code,['index_ode', 'index_eq',
'scale_mat', 'B',
'N', 'N2', 'num_outputs'])
else:
self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
code = """
for(int i=0; i<N; i++){
for(int j=0; j<N2; j++){
scale_mat[i+j*N] = B[index_ode[i]+num_outputs*index2_ode[j]]
}
}
"""
scale_mat, B = self._scale, self._B
N, N2, num_outputs = index_ode.size, index2.size, self.num_outputs
weave.inline(code, ['index_ode', 'index2_ode',
'scale_mat', 'B',
'N', 'N2', 'num_outputs'])
if transpose:
t_ode = t2 - self.delay
t_eq_mat = t1[None, :]
else:
t_ode = t1 - self.delay
t_eq_mat = t2[None, :]
t_ode_mat = t_ode[:, None]
diff_t = (t_ode_mat - t_eq_mat)
sigma = sqrt(2/self.inverseWidth)
invSigmaDiffT = 1/sigma*diff_t
halfSigmaD_i = 0.5*sigma*self.decay
if self.isStationary == false
[ln_part, signs] = ln_diff_erfs(halfSigmaD_i + t2Mat/sigma, halfSigmaD_i - invSigmaDiffT)
else
[ln_part, signs] = ln_diff_erfs(inf, halfSigmaD_i - invSigmaDiffT)
end
sK = signs .* exp(halfSigmaD_i*halfSigmaD_i - self.decay*diff_t + ln_part)
sK *= 0.5
if not self.is_normalised:
sK *= sqrt(pi)*self.sigma
if transpose:
self._K_eq_ode =
else:
self._K_ode_eq = sK
return K
def _K_compute_ode():
# Compute covariances between outputs of the ODE models.
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
if t_ode.shape[0]==0:
self._K_ode = np.zeros((0, 0))
return
if self._t2 is not None:
t2_ode = self._t2[self._index2>0]
index2_ode = self._index2[self._index2>0]-1
if t2_eq.shape[0]==0:
self._K_ode = np.zeros((0, 0))
return
if self._index2 is None:
# Matrix giving scales of each output
self._scale = np.zeros((t_ode.shape[0], t_ode.shape[0]))
code="""
for(int i=0;i<N; i++){
scale_mat[i+i*N] = B[index_ode[i]+num_outputs*(index_ode[i])];
for(int j=0; j<i; j++){
scale_mat[j+i*N] = B[index_ode[i]+num_outputs*index_ode[j]];
scale_mat[i+j*N] = scale_mat[j+i*N];
}
}
"""
scale_mat, B = self._scale, self._B
N, num_outputs = index_ode.size, self.num_outputs
weave.inline(code,['index_ode',
'scale_mat', 'B',
'N', 'num_outputs'])
else:
self._scale = np.zeros((t_ode.shape[0], t2_ode.shape[0]))
code = """
for(int i=0; i<N; i++){
for(int j=0; j<N2; j++){
scale_mat[i+j*N] = B[index_ode[i]+num_outputs*index2_ode[j]]
}
}
"""
scale_mat, B = self._scale, self._B
N, N2, num_outputs = index_ode.size, index2.size, self.num_outputs
weave.inline(code, ['index_ode', 'index2_ode',
'scale_mat', 'B',
'N', 'N2', 'num_outputs'])
# When index is identical
if self.is_stationary:
h = self._compute_H_stat(t_ode, index_ode, t2_ode, index2_ode)
else:
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode)
if t2 is None:
self._K_ode = 0.5 * (h + h.T)
else:
if self.is_stationary:
h2 = self._compute_H_stat(t2, index2, t, index)
else:
h2 = self._compute_H(t2, index2, t, index)
self._K_ode += 0.5 * (h + h2.T)
if not self.is_normalized:
self._K_ode *= np.sqrt(np.pi)*sigma
def _compute_H(t, index, t2, index2, update_derivatives=False):
"""Helper function for computing part of the ode1 covariance function.
:param t: first time input.
:type t: array
:param index: Indices of first output.
:type index: array of int
:param t2: second time input.
:type t2: array
:param index2: Indices of second output.
:type index2: array of int
:param update_derivatives: whether to update derivatives (default is False)
:return h : result of this subcomponent of the kernel for the given values.
:rtype: ndarray
"""
if t.shape[1] > 1 or t2.shape[1] > 1:
raise error('Input can only have one column')
# Vector of decays and delays associated with each output.
Decay = np.zeros(t.shape[0])
Delay = np.zeros(t.shape[0])
Decay2 = np.zeros(t2.shape[0])
Delay2 = np.zeros(t2.shape[0])
code="""
for(int i=0;i<N; i++){
Delay[i] = decays[index[i]];
Decay[i] = delays[index[i]];
}
for(int i=0; i<N2; i++){
Delay2[i] = decays[index2[i]];
Decay2[i] = delays[index2[i]];
}
"""
delays, decays = self.delays, self.decays
N, N2 = index.size, index2.size
weave.inline(code,['index_ode',
'Delay', 'Decay',
'Delay2', 'Decay2',
'delays', 'decays',
'N', 'N2'])
t_mat = t[:, None]-Delay[:, None]
t2_mat = t2[None, :]-Delay2[None, :]
diff_t = (t_mat - t2_mat)
inv_sigma_diff_t = 1./self.sigma*diff_t
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
half_sigma_decay_i - inv_sigma_diff_t)
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
half_sigma_decay_i - t_mat/self.sigma)
h = sign1*np.exp(half_sigma_decay_i
*half_sigma_decay_i
-Decay[:, None]*diff_t+ln_part_1
-np.log(Decay[:, None] + Decay2[None, :]))
h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
-Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
-np.log(Decay[:, None] + Decay2[None, :]))
# if update_derivatives:
# sigma2 = self.sigma*self.sigma
# # Update ith decay gradient
# dh_ddecay += (0.5*self.decay[i]*sigma2*(self.decay[i] + decay[j])-1)*h
# + (-diff_t*sign1*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*diff_t+ln_part_1)
# +t_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-self.decay[i]*t_mat - decay[j]*t2_mat+ln_part_2)) ...
# +self.sigma/sqrt(pi)*(-np.exp(-diff_t*diff_t/sigma2)
# +np.exp(-t2_mat*t2_mat/sigma2-self.decay[i]*t_mat)
# +np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat) ...
# -np.exp(-(self.decay[i]*t_mat + decay[j]*t2_mat)))
# self._dh_ddecay[i] += real(dh_ddecay/(self.decay[i]+decay[j]))
# # Update jth decay gradient
# dh_ddecay = t2_mat*sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i-(self.decay[i]*t_mat + decay[j]*t2_mat)+ln_part_2)-h
# self._dh_ddecay[j] += real(dh_ddecay/(self.decay[i] + decay[j]))
# # Update sigma gradient
# self._dh_dsigma += 0.5*self.decay[i]*self.decay[i]*self.sigma*h + 2/(np.sqrt(np.pi)*(self.decay[i]+decay[j]))*((-diff_t/sigma2-self.decay[i]/
# 2)*np.exp(-diff_t*
# diff_t/sigma2)
# + (-t2_mat/sigma2+self.decay[i]/2)
# *np.exp(-t2_mat*t2_mat/sigma2
# -self.decay[i]*t_mat)
# - (-t_mat/sigma2-self.decay[i]/2)
# *np.exp(-t_mat*t_mat/sigma2-decay[j]*t2_mat)
# - self.decay[i]/2*np.exp(-(self.decay[i]*t_mat+decay[j]*t2_mat)))

View file

@ -1,461 +0,0 @@
import numpy as np
import sympy as sp
from sympy.utilities.codegen import codegen
from sympy.core.cache import clear_cache
from scipy import weave
import re
import os
import sys
current_dir = os.path.dirname(os.path.abspath(os.path.dirname(__file__)))
import tempfile
import pdb
import ast
from kernpart import Kernpart
from ...util.config import config
class spkern(Kernpart):
"""
A kernel object, where all the hard work in done by sympy.
:param k: the covariance function
:type k: a positive definite sympy function of x_0, z_0, x_1, z_1, x_2, z_2...
To construct a new sympy kernel, you'll need to define:
- a kernel function using a sympy object. Ensure that the kernel is of the form k(x,z).
- that's it! we'll extract the variables from the function k.
Note:
- to handle multiple inputs, call them x_1, z_1, etc
- to handle multpile correlated outputs, you'll need to add parameters with an index, such as lengthscale_i and lengthscale_j.
"""
def __init__(self, input_dim, k=None, output_dim=1, name=None, param=None):
if name is None:
self.name='sympykern'
else:
self.name = name
if k is None:
raise ValueError, "You must provide an argument for the covariance function."
self._sp_k = k
sp_vars = [e for e in k.atoms() if e.is_Symbol]
self._sp_x= sorted([e for e in sp_vars if e.name[0:2]=='x_'],key=lambda x:int(x.name[2:]))
self._sp_z= sorted([e for e in sp_vars if e.name[0:2]=='z_'],key=lambda z:int(z.name[2:]))
# Check that variable names make sense.
assert all([x.name=='x_%i'%i for i,x in enumerate(self._sp_x)])
assert all([z.name=='z_%i'%i for i,z in enumerate(self._sp_z)])
assert len(self._sp_x)==len(self._sp_z)
self.input_dim = len(self._sp_x)
self._real_input_dim = self.input_dim
if output_dim > 1:
self.input_dim += 1
assert self.input_dim == input_dim
self.output_dim = output_dim
# extract parameter names
thetas = sorted([e for e in sp_vars if not (e.name[0:2]=='x_' or e.name[0:2]=='z_')],key=lambda e:e.name)
# Look for parameters with index.
if self.output_dim>1:
self._sp_theta_i = sorted([e for e in thetas if (e.name[-2:]=='_i')], key=lambda e:e.name)
self._sp_theta_j = sorted([e for e in thetas if (e.name[-2:]=='_j')], key=lambda e:e.name)
# Make sure parameter appears with both indices!
assert len(self._sp_theta_i)==len(self._sp_theta_j)
assert all([theta_i.name[:-2]==theta_j.name[:-2] for theta_i, theta_j in zip(self._sp_theta_i, self._sp_theta_j)])
# Extract names of shared parameters
self._sp_theta = [theta for theta in thetas if theta not in self._sp_theta_i and theta not in self._sp_theta_j]
self.num_split_params = len(self._sp_theta_i)
self._split_theta_names = ["%s"%theta.name[:-2] for theta in self._sp_theta_i]
for theta in self._split_theta_names:
setattr(self, theta, np.ones(self.output_dim))
self.num_shared_params = len(self._sp_theta)
self.num_params = self.num_shared_params+self.num_split_params*self.output_dim
else:
self.num_split_params = 0
self._split_theta_names = []
self._sp_theta = thetas
self.num_shared_params = len(self._sp_theta)
self.num_params = self.num_shared_params
for theta in self._sp_theta:
val = 1.0
if param is not None:
if param.has_key(theta):
val = param[theta]
setattr(self, theta.name, val)
#deal with param
self._set_params(self._get_params())
#Differentiate!
self._sp_dk_dtheta = [sp.diff(k,theta).simplify() for theta in self._sp_theta]
if self.output_dim > 1:
self._sp_dk_dtheta_i = [sp.diff(k,theta).simplify() for theta in self._sp_theta_i]
self._sp_dk_dx = [sp.diff(k,xi).simplify() for xi in self._sp_x]
if False:
self.compute_psi_stats()
self._gen_code()
if False:
extra_compile_args = ['-ftree-vectorize', '-mssse3', '-ftree-vectorizer-verbose=5']
else:
extra_compile_args = []
self.weave_kwargs = {
'support_code':self._function_code,
'include_dirs':[tempfile.gettempdir(), os.path.join(current_dir,'parts/')],
'headers':['"sympy_helpers.h"'],
'sources':[os.path.join(current_dir,"parts/sympy_helpers.cpp")],
'extra_compile_args':extra_compile_args,
'extra_link_args':[],
'verbose':True}
if config.getboolean('parallel', 'openmp'): self.weave_kwargs.append('-lgomp')
def __add__(self,other):
return spkern(self._sp_k+other._sp_k)
def _gen_code(self):
"""Generates the C functions necessary for computing the covariance function using the sympy objects as input."""
#TODO: maybe generate one C function only to save compile time? Also easier to take that as a basis and hand craft other covariances??
#generate c functions from sympy objects
argument_sequence = self._sp_x+self._sp_z+self._sp_theta
code_list = [('k',self._sp_k)]
# gradients with respect to covariance input
code_list += [('dk_d%s'%x.name,dx) for x,dx in zip(self._sp_x,self._sp_dk_dx)]
# gradient with respect to parameters
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta,self._sp_dk_dtheta)]
# gradient with respect to multiple output parameters
if self.output_dim > 1:
argument_sequence += self._sp_theta_i + self._sp_theta_j
code_list += [('dk_d%s'%theta.name,dtheta) for theta,dtheta in zip(self._sp_theta_i,self._sp_dk_dtheta_i)]
(foo_c,self._function_code), (foo_h,self._function_header) = \
codegen(code_list, "C",'kernel_code',argument_sequence=argument_sequence)
#put the header file where we can find it
f = file(os.path.join(tempfile.gettempdir(),'kernel_code.h'),'w')
f.write(self._function_header)
f.close()
# Substitute any known derivatives which sympy doesn't compute
self._function_code = re.sub('DiracDelta\(.+?,.+?\)','0.0',self._function_code)
############################################################
# This is the basic argument construction for the C code. #
############################################################
arg_list = (["X2(i, %s)"%x.name[2:] for x in self._sp_x]
+ ["Z2(j, %s)"%z.name[2:] for z in self._sp_z])
# for multiple outputs need to also provide these arguments reversed.
if self.output_dim>1:
reverse_arg_list = list(arg_list)
reverse_arg_list.reverse()
# Add in any 'shared' parameters to the list.
param_arg_list = [shared_params.name for shared_params in self._sp_theta]
arg_list += param_arg_list
precompute_list=[]
if self.output_dim > 1:
reverse_arg_list+=list(param_arg_list)
split_param_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['ii', 'jj'] for theta in self._sp_theta_i]
split_param_reverse_arg_list = ["%s1(%s)"%(theta.name[:-2].upper(),index) for index in ['jj', 'ii'] for theta in self._sp_theta_i]
arg_list += split_param_arg_list
reverse_arg_list += split_param_reverse_arg_list
# Extract the right output indices from the inputs.
c_define_output_indices = [' '*16 + "int %s=(int)%s(%s, %i);"%(index, var, index2, self.input_dim-1) for index, var, index2 in zip(['ii', 'jj'], ['X2', 'Z2'], ['i', 'j'])]
precompute_list += c_define_output_indices
reverse_arg_string = ", ".join(reverse_arg_list)
arg_string = ", ".join(arg_list)
precompute_string = "\n".join(precompute_list)
# Code to compute argments string needed when only X is provided.
X_arg_string = re.sub('Z','X',arg_string)
# Code to compute argument string when only diagonal is required.
diag_arg_string = re.sub('int jj','//int jj',X_arg_string)
diag_arg_string = re.sub('j','i',diag_arg_string)
if precompute_string == '':
# if it's not multioutput, the precompute strings are set to zero
diag_precompute_string = ''
diag_precompute_replace = ''
else:
# for multioutput we need to extract the index of the output form the input.
diag_precompute_string = precompute_list[0]
diag_precompute_replace = precompute_list[1]
# Here's the code to do the looping for K
self._K_code =\
"""
// _K_code
// Code for computing the covariance function.
int i;
int j;
int N = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
//target[i*num_inducing+j] =
TARGET2(i, j) += k(%s);
}
}
%s
"""%(precompute_string,arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
self._K_code_X = """
// _K_code_X
// Code for computing the covariance function.
int i;
int j;
int N = target_array->dimensions[0];
int num_inducing = target_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
%s // int ii=(int)X2(i, 1);
TARGET2(i, i) += k(%s);
for (j=0;j<i;j++){
%s //int jj=(int)X2(j, 1);
double kval = k(%s); //double kval = k(X2(i, 0), shared_lengthscale, LENGTHSCALE1(ii), SCALE1(ii));
TARGET2(i, j) += kval;
TARGET2(j, i) += kval;
}
}
/*%s*/
"""%(diag_precompute_string, diag_arg_string, re.sub('Z2', 'X2', diag_precompute_replace), X_arg_string,str(self._sp_k)) #adding a string representation forces recompile when needed
# Code to do the looping for Kdiag
self._Kdiag_code =\
"""
// _Kdiag_code
// Code for computing diagonal of covariance function.
int i;
int N = target_array->dimensions[0];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for
for (i=0;i<N;i++){
%s
//target[i] =
TARGET1(i)=k(%s);
}
%s
"""%(diag_precompute_string,diag_arg_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code to compute gradients
grad_func_list = []
if self.output_dim>1:
grad_func_list += c_define_output_indices
grad_func_list += [' '*16 + 'TARGET1(%i+ii) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, arg_string) for i, theta in enumerate(self._sp_theta_i)]
grad_func_list += [' '*16 + 'TARGET1(%i+jj) += PARTIAL2(i, j)*dk_d%s(%s);'%(self.num_shared_params+i*self.output_dim, theta.name, reverse_arg_string) for i, theta in enumerate(self._sp_theta_i)]
grad_func_list += ([' '*16 + 'TARGET1(%i) += PARTIAL2(i, j)*dk_d%s(%s);'%(i,theta.name,arg_string) for i,theta in enumerate(self._sp_theta)])
grad_func_string = '\n'.join(grad_func_list)
self._dK_dtheta_code =\
"""
// _dK_dtheta_code
// Code for computing gradient of covariance with respect to parameters.
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N;i++){
for (j=0;j<num_inducing;j++){
%s
}
}
%s
"""%(grad_func_string,"/*"+str(self._sp_k)+"*/") # adding a string representation forces recompile when needed
# Code to compute gradients for Kdiag TODO: needs clean up
diag_grad_func_string = re.sub('Z','X',grad_func_string,count=0)
diag_grad_func_string = re.sub('int jj','//int jj',diag_grad_func_string)
diag_grad_func_string = re.sub('j','i',diag_grad_func_string)
diag_grad_func_string = re.sub('PARTIAL2\(i, i\)','PARTIAL1(i)',diag_grad_func_string)
self._dKdiag_dtheta_code =\
"""
// _dKdiag_dtheta_code
// Code for computing gradient of diagonal with respect to parameters.
int i;
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (i=0;i<N;i++){
%s
}
%s
"""%(diag_grad_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
# Code for gradients wrt X, TODO: may need to deal with special case where one input is actually an output.
gradX_func_list = []
if self.output_dim>1:
gradX_func_list += c_define_output_indices
gradX_func_list += ["TARGET2(i, %i) += PARTIAL2(i, j)*dk_dx_%i(%s);"%(q,q,arg_string) for q in range(self._real_input_dim)]
gradX_func_string = "\n".join(gradX_func_list)
self._dK_dX_code = \
"""
// _dK_dX_code
// Code for computing gradient of covariance with respect to inputs.
int i;
int j;
int N = partial_array->dimensions[0];
int num_inducing = partial_array->dimensions[1];
int input_dim = X_array->dimensions[1];
//#pragma omp parallel for private(j)
for (i=0;i<N; i++){
for (j=0; j<num_inducing; j++){
%s
}
}
%s
"""%(gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a string representation forces recompile when needed
diag_gradX_func_string = re.sub('Z','X',gradX_func_string,count=0)
diag_gradX_func_string = re.sub('int jj','//int jj',diag_gradX_func_string)
diag_gradX_func_string = re.sub('j','i',diag_gradX_func_string)
diag_gradX_func_string = re.sub('PARTIAL2\(i, i\)','2*PARTIAL1(i)',diag_gradX_func_string)
# Code for gradients of Kdiag wrt X
self._dKdiag_dX_code= \
"""
// _dKdiag_dX_code
// Code for computing gradient of diagonal with respect to inputs.
int N = partial_array->dimensions[0];
int input_dim = X_array->dimensions[1];
for (int i=0;i<N; i++){
%s
}
%s
"""%(diag_gradX_func_string,"/*"+str(self._sp_k)+"*/") #adding a
# string representation forces recompile when needed Get rid
# of Zs in argument for diagonal. TODO: Why wasn't
# diag_func_string called here? Need to check that.
#self._dKdiag_dX_code = self._dKdiag_dX_code.replace('Z[j', 'X[i')
# Code to use when only X is provided.
self._dK_dtheta_code_X = self._dK_dtheta_code.replace('Z[', 'X[')
self._dK_dX_code_X = self._dK_dX_code.replace('Z[', 'X[').replace('+= PARTIAL2(', '+= 2*PARTIAL2(')
self._dK_dtheta_code_X = self._dK_dtheta_code_X.replace('Z2(', 'X2(')
self._dK_dX_code_X = self._dK_dX_code_X.replace('Z2(', 'X2(')
#TODO: insert multiple functions here via string manipulation
#TODO: similar functions for psi_stats
def _get_arg_names(self, Z=None, partial=None):
arg_names = ['target','X']
for shared_params in self._sp_theta:
arg_names += [shared_params.name]
if Z is not None:
arg_names += ['Z']
if partial is not None:
arg_names += ['partial']
if self.output_dim>1:
arg_names += self._split_theta_names
arg_names += ['output_dim']
return arg_names
def _weave_inline(self, code, X, target, Z=None, partial=None):
output_dim = self.output_dim
for shared_params in self._sp_theta:
locals()[shared_params.name] = getattr(self, shared_params.name)
# Need to extract parameters first
for split_params in self._split_theta_names:
locals()[split_params] = getattr(self, split_params)
arg_names = self._get_arg_names(Z, partial)
weave.inline(code=code, arg_names=arg_names,**self.weave_kwargs)
def K(self,X,Z,target):
if Z is None:
self._weave_inline(self._K_code_X, X, target)
else:
self._weave_inline(self._K_code, X, target, Z)
def Kdiag(self,X,target):
self._weave_inline(self._Kdiag_code, X, target)
def dK_dtheta(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dtheta_code_X, X, target, Z, partial)
else:
self._weave_inline(self._dK_dtheta_code, X, target, Z, partial)
def dKdiag_dtheta(self,partial,X,target):
self._weave_inline(self._dKdiag_dtheta_code, X, target, Z=None, partial=partial)
def dK_dX(self,partial,X,Z,target):
if Z is None:
self._weave_inline(self._dK_dX_code_X, X, target, Z, partial)
else:
self._weave_inline(self._dK_dX_code, X, target, Z, partial)
def dKdiag_dX(self,partial,X,target):
self._weave_inline(self._dKdiag_dX_code, X, target, Z=None, partial=partial)
def compute_psi_stats(self):
#define some normal distributions
mus = [sp.var('mu_%i'%i,real=True) for i in range(self.input_dim)]
Ss = [sp.var('S_%i'%i,positive=True) for i in range(self.input_dim)]
normals = [(2*sp.pi*Si)**(-0.5)*sp.exp(-0.5*(xi-mui)**2/Si) for xi, mui, Si in zip(self._sp_x, mus, Ss)]
#do some integration!
#self._sp_psi0 = ??
self._sp_psi1 = self._sp_k
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi1 *= normals[i]
self._sp_psi1 = sp.integrate(self._sp_psi1,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi1 = self._sp_psi1.simplify()
#and here's psi2 (eek!)
zprime = [sp.Symbol('zp%i'%i) for i in range(self.input_dim)]
self._sp_psi2 = self._sp_k.copy()*self._sp_k.copy().subs(zip(self._sp_z,zprime))
for i in range(self.input_dim):
print 'perfoming integrals %i of %i'%(self.input_dim+i+1,2*self.input_dim)
sys.stdout.flush()
self._sp_psi2 *= normals[i]
self._sp_psi2 = sp.integrate(self._sp_psi2,(self._sp_x[i],-sp.oo,sp.oo))
clear_cache()
self._sp_psi2 = self._sp_psi2.simplify()
def _set_params(self,param):
assert param.size == (self.num_params)
for i, shared_params in enumerate(self._sp_theta):
setattr(self, shared_params.name, param[i])
if self.output_dim>1:
for i, split_params in enumerate(self._split_theta_names):
start = self.num_shared_params + i*self.output_dim
end = self.num_shared_params + (i+1)*self.output_dim
setattr(self, split_params, param[start:end])
def _get_params(self):
params = np.zeros(0)
for shared_params in self._sp_theta:
params = np.hstack((params, getattr(self, shared_params.name)))
if self.output_dim>1:
for split_params in self._split_theta_names:
params = np.hstack((params, getattr(self, split_params).flatten()))
return params
def _get_param_names(self):
if self.output_dim>1:
return [x.name for x in self._sp_theta] + [x.name[:-2] + str(i) for x in self._sp_theta_i for i in range(self.output_dim)]
else:
return [x.name for x in self._sp_theta]

View file

@ -1,179 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import kernpart
import numpy as np
import hashlib
class rbf(kernpart):
"""
Rational quadratic kernel.
.. math::
k(r) = \sigma^2 \left(\frac{(\ell^{-2} + r^2)}{2*\alpha}\right)^{-\alpha} \qquad \qquad \\text{ where } r = \sqrt{\frac{\sum_{i=1}^d (x_i-x^\prime_i)^2}{\ell^2}}
where \ell is the lengthscale, \alpha the smoothness, \sigma^2 the variance and d the dimensionality of the input.
:param D: the number of input dimensions
:type D: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the lengthscale of the kernel (l)
:type lengthscale: float
:param smoothness: the smoothness parameter of the kernel
:type lengthscale: float
.. Note: for rational quadratic with different lengthscale on each dimension, see ratquad_ARD
"""
def __init__(self,D,variance=1.,lengthscale=1.,smoothness)=1.:
self.D = D
self.Nparam = 3
self.name = 'ratquad'
self._set_params(np.hstack((variance,lengthscale,smoothness)))
#initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3,1))
self._X, self._X2, self._params = np.empty(shape=(3,1))
def _get_params(self):
return np.hstack((self.variance,self.lengthscale,self.smoothness))
def _set_params(self,x):
self.variance, self.lengthscale, self.smoothness = x
self.lengthscale2 = np.square(self.lengthscale)
#reset cached results
self._X, self._X2, self._params = np.empty(shape=(3,1))
self._Z, self._mu, self._S = np.empty(shape=(3,1)) # cached versions of Z,mu,S
def _get_param_names(self):
return ['variance','lengthscale','smoothness']
def K(self,X,X2,target):
if X2 is None:
X2 = X
self._K_computations(X,X2)
np.add(self.variance*self._K_dvar, target,target)
def Kdiag(self,X,target):
np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target):
self._K_computations(X,X2)
target[0] += np.sum(self._K_dvar*partial)
target[1] += np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial)
target[2] +=
def dKdiag_dtheta(self,partial,X,target):
#NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial)
def dK_dX(self,partial,X,X2,target):
self._K_computations(X,X2)
if X2 is None:
_K_dist = 2*X[:, None, :] - X[None, :, :]
else:
_K_dist = X[:,None,:]-X2[None,:,:]
dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target):
pass
def _K_computations(self,X,X2):
if not (np.all(X==self._X) and np.all(X2==self._X2)):
self._X = X
self._X2 = X2
if X2 is None: X2 = X
XXT = np.dot(X,X2.T)
if X is X2:
self._K_dist2 = (-2.*XXT + np.diag(XXT)[:,np.newaxis] + np.diag(XXT)[np.newaxis,:])/self.lengthscale2
else:
self._K_dist2 = (-2.*XXT + np.sum(np.square(X),1)[:,np.newaxis] + np.sum(np.square(X2),1)[np.newaxis,:])/self.lengthscale2
self._K_dvar = (0.5./self.smoothness*(1./self.lengthscale2 + self._K_dist2))**(-self.smoothness)
def psi0(self,Z,mu,S,target):
target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target):
target[0] += 1.
def dpsi0_dmuS(self,Z,mu,S,target_mu,target_S):
pass
def psi1(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += self._psi1
def dpsi1_dtheta(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S)
denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
target[0] += np.sum(partial*self._psi1/self.variance)
target[1] += np.sum(d_length*partial[:,:,None])
def dpsi1_dZ(self,partial,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += np.sum(partial[:,:,None]*-self._psi1[:,:,None]*self._psi1_dist/self.lengthscale2/self._psi1_denom,0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S):
self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
target_mu += np.sum(partial*tmp*self._psi1_dist,1)
target_S += np.sum(partial*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S)
target += self._psi2.sum(0) #TODO: psi2 should be NxMxM (for het. noise)
def dpsi2_dtheta(self,partial,Z,mu,S,target):
"""Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S)
d_var = np.sum(2.*self._psi2/self.variance,0)
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
d_length = d_length.sum(0)
target[0] += np.sum(partial*d_var)
target[1] += np.sum(d_length*partial)
def dpsi2_dZ(self,partial,Z,mu,S,target):
"""Returns shape N,M,M,Q"""
self._psi_computations(Z,mu,S)
dZ = self._psi2[:,:,:,None]/self.lengthscale2*(-0.5*self._psi2_Zdist + self._psi2_mudist/self._psi2_denom)
target += np.sum(partial[None,:,:,None]*dZ,0).sum(1)
def dpsi2_dmuS(self,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """
self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (partial*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
def _psi_computations(self,Z,mu,S):
#here are the "statistics" for psi1 and psi2
if not np.all(Z==self._Z):
#Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5*(Z[:,None,:] +Z[None,:,:]) # M,M,Q
self._psi2_Zdist = Z[:,None,:]-Z[None,:,:] # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist)/self.lengthscale2 # M,M,Q
self._Z = Z
if not (np.all(Z==self._Z) and np.all(mu==self._mu) and np.all(S==self._S)):
#something's changed. recompute EVERYTHING
#TODO: make more efficient for large Q (using NDL's dot product trick)
#psi1
self._psi1_denom = S[:,None,:]/self.lengthscale2 + 1.
self._psi1_dist = Z[None,:,:]-mu[:,None,:]
self._psi1_dist_sq = np.square(self._psi1_dist)/self.lengthscale2/self._psi1_denom
self._psi1_exponent = -0.5*np.sum(self._psi1_dist_sq+np.log(self._psi1_denom),-1)
self._psi1 = self.variance*np.exp(self._psi1_exponent)
#psi2
self._psi2_denom = 2.*S[:,None,None,:]/self.lengthscale2+1. # N,M,M,Q
self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
self._psi2_exponent = np.sum(-self._psi2_Zdist_sq/4. -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M
self._psi2 = np.square(self.variance)*np.exp(self._psi2_exponent) # N,M,M
self._Z, self._mu, self._S = Z, mu,S