tidying in kern

This commit is contained in:
James Hensman 2014-02-24 15:56:06 +00:00
parent 70ada7fa46
commit 4215f5fb28
14 changed files with 1 additions and 687 deletions

161
GPy/kern/_src/todo/ODE_1.py Normal file
View file

@ -0,0 +1,161 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class ODE_1(Kernpart):
"""
kernel resultiong from a first order ODE with OU driving GP
:param input_dim: the number of input dimension, has to be equal to one
:type input_dim: int
:param varianceU: variance of the driving GP
:type varianceU: float
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
:type lengthscaleU: float
:param varianceY: 'variance' of the transfer function
:type varianceY: float
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
:type lengthscaleY: float
:rtype: kernel object
"""
def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
assert input_dim==1, "Only defined for input_dim = 1"
self.input_dim = input_dim
self.num_params = 4
self.name = 'ODE_1'
if lengthscaleU is not None:
lengthscaleU = np.asarray(lengthscaleU)
assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
else:
lengthscaleU = np.ones(1)
if lengthscaleY is not None:
lengthscaleY = np.asarray(lengthscaleY)
assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
else:
lengthscaleY = np.ones(1)
#lengthscaleY = 0.5
self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
def _get_params(self):
"""return the value of the parameters."""
return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
def _set_params(self, x):
"""set the value of the parameters."""
assert x.size == self.num_params
self.varianceU = x[0]
self.varianceY = x[1]
self.lengthscaleU = x[2]
self.lengthscaleY = x[3]
def _get_param_names(self):
"""return parameter names."""
return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
def K(self, X, X2, target):
"""Compute the covariance matrix between X and X2."""
if X2 is None: X2 = X
# i1 = X[:,1]
# i2 = X2[:,1]
# X = X[:,0].reshape(-1,1)
# X2 = X2[:,0].reshape(-1,1)
dist = np.abs(X - X2.T)
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix associated to X."""
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
k1 = (2*lu+ly)/(lu+ly)**2
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X
dist = np.abs(X - X2.T)
ly=1/self.lengthscaleY
lu=np.sqrt(3)/self.lengthscaleU
#ly=self.lengthscaleY
#lu=self.lengthscaleU
dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
#c=np.sqrt(3)
#t1=c/lu
#t2=1/ly
#dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3 )
dk2theta1 = 1*(
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
)
dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
dk2theta2 = 1*(
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
)
dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
dkdvar = k1+k2+k3
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
# def dKdiag_dtheta(self, dL_dKdiag, X, target):
# """derivative of the diagonal of the covariance matrix with respect to the parameters."""
# # NB: derivative of diagonal elements wrt lengthscale is 0
# target[0] += np.sum(dL_dKdiag)
# def dK_dX(self, dL_dK, X, X2, target):
# """derivative of the covariance matrix with respect to X."""
# if X2 is None: X2 = X
# dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
# ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
# dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
# target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
# def dKdiag_dX(self, dL_dKdiag, X, target):
# pass

View file

@ -0,0 +1,556 @@
# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from GPy.util.linalg import mdot, pdinv
from GPy.util.ln_diff_erfs import ln_diff_erfs
import pdb
from scipy import weave
class Eq_ode1(Kernpart):
"""
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
This outputs of this kernel have the form
.. math::
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
:param output_dim: number of outputs driven by latent function.
:type output_dim: int
:param W: sensitivities of each output to the latent driving function.
:type W: ndarray (output_dim x rank).
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
:type rank: int
:param decay: decay rates for the first order system.
:type decay: array of length output_dim.
:param delay: delay between latent force and output response.
:type delay: array of length output_dim.
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
:type kappa: array of length output_dim.
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
"""
def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None):
self.rank = rank
self.input_dim = 1
self.name = 'eq_ode1'
self.output_dim = output_dim
self.lengthscale = lengthscale
self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
if kappa is not None:
self.num_params+=self.output_dim
if delay is not None:
assert delay.shape==(self.output_dim-1,)
self.num_params+=self.output_dim-1
self.rank = rank
if W is None:
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
else:
assert W.shape==(self.output_dim,self.rank)
self.W = W
if decay is None:
self.decay = np.ones(self.output_dim-1)
if kappa is not None:
assert kappa.shape==(self.output_dim,)
self.kappa = kappa
self.delay = delay
self.is_normalized = True
self.is_stationary = False
self.gaussian_initial = False
self._set_params(self._get_params())
def _get_params(self):
param_list = [self.W.flatten()]
if self.kappa is not None:
param_list.append(self.kappa)
param_list.append(self.decay)
if self.delay is not None:
param_list.append(self.delay)
param_list.append(self.lengthscale)
return np.hstack(param_list)
def _set_params(self,x):
assert x.size == self.num_params
end = self.output_dim*self.rank
self.W = x[:end].reshape(self.output_dim,self.rank)
start = end
self.B = np.dot(self.W,self.W.T)
if self.kappa is not None:
end+=self.output_dim
self.kappa = x[start:end]
self.B += np.diag(self.kappa)
start=end
end+=self.output_dim-1
self.decay = x[start:end]
start=end
if self.delay is not None:
end+=self.output_dim-1
self.delay = x[start:end]
start=end
end+=1
self.lengthscale = x[start]
self.sigma = np.sqrt(2)*self.lengthscale
def _get_param_names(self):
param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
if self.kappa is not None:
param_names += ['kappa_%i'%i for i in range(self.output_dim)]
param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
if self.delay is not None:
param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
param_names+= ['lengthscale']
return param_names
def K(self,X,X2,target):
if X.shape[1] > 2:
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
self._K_computations(X, X2)
target += self._scale*self._K_dvar
if self.gaussian_initial:
# Add covariance associated with initial condition.
t1_mat = self._t[self._rorder, None]
t2_mat = self._t2[None, self._rorder2]
target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
def Kdiag(self,index,target):
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
pass
def _param_grad_helper(self,dL_dK,X,X2,target):
# First extract times and indices.
self._extract_t_indices(X, X2, dL_dK=dL_dK)
self._dK_ode_dtheta(target)
def _dK_ode_dtheta(self, target):
"""Do all the computations for the ode parts of the covariance function."""
t_ode = self._t[self._index>0]
dL_dK_ode = self._dL_dK[self._index>0, :]
index_ode = self._index[self._index>0]-1
if self._t2 is None:
if t_ode.size==0:
return
t2_ode = t_ode
dL_dK_ode = dL_dK_ode[:, self._index>0]
index2_ode = index_ode
else:
t2_ode = self._t2[self._index2>0]
dL_dK_ode = dL_dK_ode[:, self._index2>0]
if t_ode.size==0 or t2_ode.size==0:
return
index2_ode = self._index2[self._index2>0]-1
h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
#self._dK_ddelay = self._dh_ddelay
self._dK_dsigma = self._dh_dsigma
if self._t2 is None:
h2 = h1
else:
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
#self._dK_ddelay += self._dh_ddelay.T
self._dK_dsigma += self._dh_dsigma.T
# C1 = self.sensitivity
# C2 = self.sensitivity
# K = 0.5 * (h1 + h2.T)
# var2 = C1*C2
# if self.is_normalized:
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
# dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
# dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
# dk_dC1 = C2 * sum(sum(dL_dK.*K))
# dk_dC2 = C1 * sum(sum(dL_dK.*K))
# else:
# K = np.sqrt(np.pi) * K
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
# dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
# dk_dSim1Variance = dk_dC1
# Last element is the length scale.
(dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
# # only pass the gradient with respect to the inverse width to one
# # of the gradient vectors ... otherwise it is counted twice.
# g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
# g2 = real([dk_dD2 0 dk_dSim2Variance])
# return g1, g2"""
def dKdiag_dtheta(self,dL_dKdiag,index,target):
pass
def gradients_X(self,dL_dK,X,X2,target):
pass
def _extract_t_indices(self, X, X2=None, dL_dK=None):
"""Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
# TODO: some fast checking here to see if this needs recomputing?
self._t = X[:, 0]
if not X.shape[1] == 2:
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
self._index = np.asarray(X[:, 1],dtype=np.int)
# Sort indices so that outputs are in blocks for computational
# convenience.
self._order = self._index.argsort()
self._index = self._index[self._order]
self._t = self._t[self._order]
self._rorder = self._order.argsort() # rorder is for reversing the order
if X2 is None:
self._t2 = None
self._index2 = None
self._order2 = self._order
self._rorder2 = self._rorder
else:
if not X2.shape[1] == 2:
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
self._t2 = X2[:, 0]
self._index2 = np.asarray(X2[:, 1],dtype=np.int)
self._order2 = self._index2.argsort()
self._index2 = self._index2[self._order2]
self._t2 = self._t2[self._order2]
self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
if dL_dK is not None:
self._dL_dK = dL_dK[self._order, :]
self._dL_dK = self._dL_dK[:, self._order2]
def _K_computations(self, X, X2):
"""Perform main body of computations for the ode1 covariance function."""
# First extract times and indices.
self._extract_t_indices(X, X2)
self._K_compute_eq()
self._K_compute_ode_eq()
if X2 is None:
self._K_eq_ode = self._K_ode_eq.T
else:
self._K_compute_ode_eq(transpose=True)
self._K_compute_ode()
if X2 is None:
self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
else:
self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
# Reorder values of blocks for placing back into _K_dvar.
self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
np.hstack((self._K_ode_eq, self._K_ode))))
self._K_dvar = self._K_dvar[self._rorder, :]
self._K_dvar = self._K_dvar[:, self._rorder2]
if X2 is None:
# Matrix giving scales of each output
self._scale = np.zeros((self._t.size, self._t.size))
code="""
for(int i=0;i<N; i++){
scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
for(int j=0; j<i; j++){
scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
scale_mat[i+j*N] = scale_mat[j+i*N];
}
}
"""
scale_mat, B, index = self._scale, self.B, self._index
N, output_dim = self._t.size, self.output_dim
weave.inline(code,['index',
'scale_mat', 'B',
'N', 'output_dim'])
else:
self._scale = np.zeros((self._t.size, self._t2.size))
code = """
for(int i=0; i<N; i++){
for(int j=0; j<N2; j++){
scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
}
}
"""
scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
weave.inline(code, ['index', 'index2',
'scale_mat', 'B',
'N', 'N2', 'output_dim'])
def _K_compute_eq(self):
"""Compute covariance for latent covariance."""
t_eq = self._t[self._index==0]
if self._t2 is None:
if t_eq.size==0:
self._K_eq = np.zeros((0, 0))
return
self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
else:
t2_eq = self._t2[self._index2==0]
if t_eq.size==0 or t2_eq.size==0:
self._K_eq = np.zeros((t_eq.size, t2_eq.size))
return
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
if self.is_normalized:
self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
def _K_compute_ode_eq(self, transpose=False):
"""Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
:param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
:type transpose: bool"""
if self._t2 is not None:
if transpose:
t_eq = self._t[self._index==0]
t_ode = self._t2[self._index2>0]
index_ode = self._index2[self._index2>0]-1
else:
t_eq = self._t2[self._index2==0]
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
else:
t_eq = self._t[self._index==0]
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
if t_ode.size==0 or t_eq.size==0:
if transpose:
self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
else:
self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
return
t_ode_mat = t_ode[:, None]
t_eq_mat = t_eq[None, :]
if self.delay is not None:
t_ode_mat -= self.delay[index_ode, None]
diff_t = (t_ode_mat - t_eq_mat)
inv_sigma_diff_t = 1./self.sigma*diff_t
decay_vals = self.decay[index_ode][:, None]
half_sigma_d_i = 0.5*self.sigma*decay_vals
if self.is_stationary:
ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
else:
ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
sK *= 0.5
if not self.is_normalized:
sK *= np.sqrt(np.pi)*self.sigma
if transpose:
self._K_eq_ode = sK.T
else:
self._K_ode_eq = sK
def _K_compute_ode(self):
# Compute covariances between outputs of the ODE models.
t_ode = self._t[self._index>0]
index_ode = self._index[self._index>0]-1
if self._t2 is None:
if t_ode.size==0:
self._K_ode = np.zeros((0, 0))
return
t2_ode = t_ode
index2_ode = index_ode
else:
t2_ode = self._t2[self._index2>0]
if t_ode.size==0 or t2_ode.size==0:
self._K_ode = np.zeros((t_ode.size, t2_ode.size))
return
index2_ode = self._index2[self._index2>0]-1
# When index is identical
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
if self._t2 is None:
self._K_ode = 0.5 * (h + h.T)
else:
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
self._K_ode = 0.5 * (h + h2.T)
if not self.is_normalized:
self._K_ode *= np.sqrt(np.pi)*self.sigma
def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
"""Helper function for computing H for the diagonal only.
:param t: time input.
:type t: array
:param index: first output indices
:type index: array of int.
:param index: second output indices
:type index: array of int.
:param update_derivatives: whether or not to update the derivative portions (default False).
:type update_derivatives: bool
:param stationary: whether to compute the stationary version of the covariance (default False).
:type stationary: bool"""
"""if delta_i~=delta_j:
[h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
else:
Decay = self.decay[index]
if self.delay is not None:
t = t - self.delay[index]
t_squared = t*t
half_sigma_decay = 0.5*self.sigma*Decay
[ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
half_sigma_decay)
[ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
half_sigma_decay - t/self.sigma)
h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
+ ln_part_1
- log(Decay + D_j))
- sign2*np.exp(half_sigma_decay*half_sigma_decay
- (Decay + D_j)*t
+ ln_part_2
- log(Decay + D_j)))
sigma2 = self.sigma*self.sigma
if update_derivatives:
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
+ t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
)
+ self.sigma/np.sqrt(np.pi)*
(-1 + np.exp(-t_squared/sigma2-Decay*t)
+ np.exp(-t_squared/sigma2-D_j*t)
- np.exp(-(Decay + D_j)*t)))
dh_dD_i = (dh_dD_i/(Decay+D_j)).real
dh_dD_j = (t*sign2*np.exp(
half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
)
-h)
dh_dD_j = (dh_dD_j/(Decay + D_j)).real
dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
+ 2/(np.sqrt(np.pi)*(Decay+D_j))\
*((-Decay/2) \
+ (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
- (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
- Decay/2*np.exp(-(Decay+D_j)*t))"""
pass
def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
"""Helper function for computing part of the ode1 covariance function.
:param t: first time input.
:type t: array
:param index: Indices of first output.
:type index: array of int
:param t2: second time input.
:type t2: array
:param index2: Indices of second output.
:type index2: array of int
:param update_derivatives: whether to update derivatives (default is False)
:return h : result of this subcomponent of the kernel for the given values.
:rtype: ndarray
"""
if stationary:
raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
# Vector of decays and delays associated with each output.
Decay = self.decay[index]
Decay2 = self.decay[index2]
t_mat = t[:, None]
t2_mat = t2[None, :]
if self.delay is not None:
Delay = self.delay[index]
Delay2 = self.delay[index2]
t_mat-=Delay[:, None]
t2_mat-=Delay2[None, :]
diff_t = (t_mat - t2_mat)
inv_sigma_diff_t = 1./self.sigma*diff_t
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
half_sigma_decay_i - inv_sigma_diff_t,
return_sign=True)
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
half_sigma_decay_i - t_mat/self.sigma,
return_sign=True)
h = sign1*np.exp(half_sigma_decay_i
*half_sigma_decay_i
-Decay[:, None]*diff_t+ln_part_1
-np.log(Decay[:, None] + Decay2[None, :]))
h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
-Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
-np.log(Decay[:, None] + Decay2[None, :]))
if update_derivatives:
sigma2 = self.sigma*self.sigma
# Update ith decay gradient
dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
+ (-diff_t*sign1*np.exp(
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
)
+t_mat*sign2*np.exp(
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
- Decay2*t2_mat+ln_part_2))
+self.sigma/np.sqrt(np.pi)*(
-np.exp(
-diff_t*diff_t/sigma2
)+np.exp(
-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
)+np.exp(
-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
)-np.exp(
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
)
))
self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
# Update jth decay gradient
dh_ddecay2 = (t2_mat*sign2
*np.exp(
half_sigma_decay_i*half_sigma_decay_i
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
+ln_part_2
)
-h)
self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
# Update sigma gradient
self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
+ 2/(np.sqrt(np.pi)
*(Decay[:, None]+Decay2[None, :]))
*((-diff_t/sigma2-Decay[:, None]/2)
*np.exp(-diff_t*diff_t/sigma2)
+ (-t2_mat/sigma2+Decay[:, None]/2)
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
- (-t_mat/sigma2-Decay[:, None]/2)
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
- Decay[:, None]/2
*np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
return h

View file

@ -0,0 +1,74 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import pdinv,mdot
class FiniteDimensional(Kernpart):
def __init__(self, input_dim, F, G, variance=1., weights=None):
"""
Argumnents
----------
input_dim: int - the number of input dimensions
F: np.array of functions with shape (n,) - the n basis functions
G: np.array with shape (n,n) - the Gram matrix associated to F
weights : np.ndarray with shape (n,)
"""
self.input_dim = input_dim
self.F = F
self.G = G
self.G_1 ,L,Li,logdet = pdinv(G)
self.n = F.shape[0]
if weights is not None:
assert weights.shape==(self.n,)
else:
weights = np.ones(self.n)
self.num_params = self.n + 1
self.name = 'finite_dim'
self._set_params(np.hstack((variance,weights)))
def _get_params(self):
return np.hstack((self.variance,self.weights))
def _set_params(self,x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weights = x[1:]
def _get_param_names(self):
if self.n==1:
return ['variance','weight']
else:
return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
def K(self,X,X2,target):
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(product,target,target)
def Kdiag(self,X,target):
product = np.diag(self.K(X, X))
np.add(target,product,target)
def _param_grad_helper(self,X,X2,target):
"""Return shape is NxMx(Ntheta)"""
if X2 is None: X2 = X
FX = np.column_stack([f(X) for f in self.F])
FX2 = np.column_stack([f(X2) for f in self.F])
DER = np.zeros((self.n,self.n,self.n))
for i in range(self.n):
DER[i,i,i] = np.sqrt(self.weights[i])
dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
def dKdiag_dtheta(self,X,target):
np.add(target[:,0],1.,target[:,0])

View file

@ -0,0 +1,41 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class Fixed(Kernpart):
def __init__(self, input_dim, K, variance=1.):
"""
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
self.input_dim = input_dim
self.fixed_K = K
self.num_params = 1
self.name = 'fixed'
self._set_params(np.array([variance]).flatten())
def _get_params(self):
return self.variance
def _set_params(self, x):
assert x.shape == (1,)
self.variance = x
def _get_param_names(self):
return ['variance']
def K(self, X, X2, target):
target += self.variance * self.fixed_K
def _param_grad_helper(self, partial, X, X2, target):
target += (partial * self.fixed_K).sum()
def gradients_X(self, partial, X, X2, target):
pass
def dKdiag_dX(self, partial, X, target):
pass

154
GPy/kern/_src/todo/gibbs.py Normal file
View file

@ -0,0 +1,154 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Gibbs(Kernpart):
"""
Gibbs non-stationary covariance function.
.. math::
r = sqrt((x_i - x_j)'*(x_i - x_j))
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
where :math:`l(x)` is a function giving the length scale as a function of space and :math:`q` is the dimensionality of the input space.
This is the non stationary kernel proposed by Mark Gibbs in his 1997
thesis. It is similar to an RBF but has a length scale that varies
with input location. This leads to an additional term in front of
the kernel.
The parameters are :math:`\sigma^2`, the process variance, and
the parameters of l(x) which is a function that can be
specified by the user, by default an multi-layer peceptron is
used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
See Mark Gibbs's thesis for more details: Gibbs,
M. N. (1997). Bayesian Gaussian Processes for Regression and
Classification. PhD thesis, Department of Physics, University of
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
Learning by Rasmussen and Williams. Although note that we do not
constrain the lengthscale to be positive by default. This allows
anticorrelation to occur. The positive constraint can be included
by the user manually.
"""
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not ARD:
self.num_params=1+mapping.num_params
else:
raise NotImplementedError
self.mapping = mapping
self.name='gibbs'
self._set_params(np.hstack((variance, self.mapping._get_params())))
def _get_params(self):
return np.hstack((self.variance, self.mapping._get_params()))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.mapping._set_params(x[1:])
def _get_param_names(self):
return ['variance'] + self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
np.add(target, self.variance, target)
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
self._dK_computations(dL_dK)
if X2==None:
gmapping = self.mapping.df_dtheta(2*self._dL_dl[:, None], X)
else:
gmapping = self.mapping.df_dtheta(self._dL_dl[:, None], X)
gmapping += self.mapping.df_dtheta(self._dL_dl_two[:, None], X2)
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
# First account for gradients arising from presence of X in exponent.
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
# Now account for gradients arising from presence of X in lengthscale.
self._dK_computations(dL_dK)
if X2 is None:
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
else:
target += self.mapping.df_dX(self._dL_dl[:, None], X)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
pass
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
target[0] += np.sum(dL_dKdiag)
def _K_computations(self, X, X2=None):
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
self._lengthscales=self.mapping.f(X)
self._lengthscales2=np.square(self._lengthscales)
if X2==None:
self._lengthscales_two = self._lengthscales
self._lengthscales_two2 = self._lengthscales2
Xsquare = np.square(X).sum(1)
self._K_dist2 = -2.*tdot(X) + Xsquare[:, None] + Xsquare[None, :]
else:
self._lengthscales_two = self.mapping.f(X2)
self._lengthscales_two2 = np.square(self._lengthscales_two)
self._K_dist2 = -2.*np.dot(X, X2.T) + np.square(X).sum(1)[:, None] + np.square(X2).sum(1)[None, :]
self._w2 = self._lengthscales2 + self._lengthscales_two2.T
prod_length = self._lengthscales*self._lengthscales_two.T
self._K_exponential = np.exp(-self._K_dist2/self._w2)
self._K_dvar = np.sign(prod_length)*(2*np.abs(prod_length)/self._w2)**(self.input_dim/2.)*np.exp(-self._K_dist2/self._w2)
def _dK_computations(self, dL_dK):
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
:param dL_dK: the gradient of the objective with respect to the covariance function.
:type dL_dK: ndarray"""
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
if self._lengthscales_two is self._lengthscales:
self._dL_dl_two = None
else:
self._dL_dl_two = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales**4 - self._lengthscales_two.T**4 ) + 2*self._lengthscales_two2.T*self._K_dist2)/(self._w2*self._w2*self._lengthscales_two.T)).sum(0)

View file

@ -0,0 +1,104 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from IPython.core.debugger import Tracer; debug_here=Tracer()
from kernpart import Kernpart
import numpy as np
from ...util.linalg import tdot
from ...core.mapping import Mapping
import GPy
class Hetero(Kernpart):
"""
TODO: Need to constrain the function outputs
positive (still thinking of best way of doing this!!! Yes, intend to use
transformations, but what's the *best* way). Currently just squaring output.
Heteroschedastic noise which depends on input location. See, for example,
this paper by Goldberg et al.
.. math::
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
The parameters are the parameters of \sigma^2(x) which is a
function that can be specified by the user, by default an
multi-layer peceptron is used.
:param input_dim: the number of input dimensions
:type input_dim: int
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
:type mapping: GPy.core.Mapping
:rtype: Kernpart object
See this paper:
Goldberg, P. W. Williams, C. K. I. and Bishop,
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
Process Treatment In Advances in Neural Information Processing
Systems, Volume 10, pp. 493-499. MIT Press
for a Gaussian process treatment of this problem.
"""
def __init__(self, input_dim, mapping=None, transform=None):
self.input_dim = input_dim
if not mapping:
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
if not transform:
transform = GPy.core.transformations.logexp()
self.transform = transform
self.mapping = mapping
self.name='hetero'
self.num_params=self.mapping.num_params
self._set_params(self.mapping._get_params())
def _get_params(self):
return self.mapping._get_params()
def _set_params(self, x):
assert x.size == (self.num_params)
self.mapping._set_params(x)
def _get_param_names(self):
return self.mapping._get_param_names()
def K(self, X, X2, target):
"""Return covariance between X and X2."""
if (X2 is None) or (X2 is X):
target[np.diag_indices_from(target)] += self._Kdiag(X)
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
target+=self._Kdiag(X)
def _Kdiag(self, X):
"""Helper function for computing the diagonal elements of the covariance."""
return self.mapping.f(X).flatten()**2
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
if (X2 is None) or (X2 is X):
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dtheta(dL_dKdiag, X, target)
def dKdiag_dtheta(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to parameters."""
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X."""
if X2==None or X2 is X:
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
self.dKdiag_dX(dL_dKdiag, X, target)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X."""
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)

View file

@ -0,0 +1,38 @@
#include <math.h>
double k_uu(t1,t2,theta1,theta2,sig1,sig2)
{
double kern=0;
double dist=0;
dist = sqrt(t2*t2-t1*t1)
kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
return kern;
}
double k_yy(t1, t2, theta1,theta2,sig1,sig2)
{
double kern=0;
double dist=0;
dist = sqrt(t2*t2-t1*t1)
kern = sig1*sig2 * ( exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
exp(-dist) ) / ((theta2-theta1)*(theta2-theta1))
return kern;
}

138
GPy/kern/_src/todo/poly.py Normal file
View file

@ -0,0 +1,138 @@
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
four_over_tau = 2./np.pi
class POLY(Kernpart):
"""
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
.. math::
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
(degree). Only gradients of the first three are provided for
kernel optimisation, it is assumed that polynomial degree would
be set by hand.
The kernel is not recommended as it is badly behaved when the
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
there is an automatic relevance determination version of this
kernel provided (NOTE YET IMPLEMENTED!).
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance :math:`\sigma^2`
:type variance: float
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
:param degree: the degree of the polynomial.
:type degree: int
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
:type ARD: Boolean
:rtype: Kernpart object
"""
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
self.input_dim = input_dim
self.ARD = ARD
if not ARD:
self.num_params=3
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
else:
weight_variance = 1.*np.ones(1)
else:
self.num_params = self.input_dim + 2
if weight_variance is not None:
weight_variance = np.asarray(weight_variance)
assert weight_variance.size == self.input_dim, "bad number of weight variances"
else:
weight_variance = np.ones(self.input_dim)
raise NotImplementedError
self.degree=degree
self.name='poly_deg' + str(self.degree)
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
def _get_params(self):
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
def _set_params(self, x):
assert x.size == (self.num_params)
self.variance = x[0]
self.weight_variance = x[1:-1]
self.weight_std = np.sqrt(self.weight_variance)
self.bias_variance = x[-1]
def _get_param_names(self):
if self.num_params == 3:
return ['variance', 'weight_variance', 'bias_variance']
else:
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
def K(self, X, X2, target):
"""Return covariance between X and X2."""
self._K_computations(X, X2)
target += self.variance*self._K_dvar
def Kdiag(self, X, target):
"""Compute the diagonal of the covariance matrix for X."""
self._K_diag_computations(X)
target+= self.variance*self._K_diag_dvar
def _param_grad_helper(self, dL_dK, X, X2, target):
"""Derivative of the covariance with respect to the parameters."""
self._K_computations(X, X2)
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
base_cov_grad = base*dL_dK
target[0] += np.sum(self._K_dvar*dL_dK)
target[1] += (self._K_inner_prod*base_cov_grad).sum()
target[2] += base_cov_grad.sum()
def gradients_X(self, dL_dK, X, X2, target):
"""Derivative of the covariance matrix with respect to X"""
self._K_computations(X, X2)
arg = self._K_poly_arg
if X2 is None:
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
else:
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
def dKdiag_dX(self, dL_dKdiag, X, target):
"""Gradient of diagonal of covariance with respect to X"""
self._K_diag_computations(X)
arg = self._K_diag_poly_arg
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
def _K_computations(self, X, X2):
if self.ARD:
pass
else:
if X2 is None:
self._K_inner_prod = np.dot(X,X.T)
else:
self._K_inner_prod = np.dot(X,X2.T)
self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
self._K_dvar = self._K_poly_arg**self.degree
def _K_diag_computations(self, X):
if self.ARD:
pass
else:
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
self._K_diag_dvar = self._K_diag_poly_arg**self.degree

View file

@ -0,0 +1,336 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from rbf import RBF
import numpy as np
from scipy import weave
from ...util.linalg import tdot
from ...core.parameterization import Param
class RBFInv(RBF):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel. It only
differs from RBF in that here the parametrization is wrt the inverse lengthscale:
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: this object implements both the ARD and 'spherical' version of the function
"""
def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False, name='inverse rbf'):
#self.input_dim = input_dim
#self.name = 'rbf_inv'
if inv_lengthscale is not None: lengthscale = 1./np.array(inv_lengthscale)
else: lengthscale = None
super(RBFInv, self).__init__(input_dim, variance=variance, lengthscale=lengthscale, ARD=ARD, name=name)
self.ARD = ARD
if not ARD:
self.num_params = 2
if inv_lengthscale is not None:
inv_lengthscale = np.asarray(inv_lengthscale)
assert inv_lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
else:
inv_lengthscale = np.ones(1)
else:
self.num_params = self.input_dim + 1
if inv_lengthscale is not None:
inv_lengthscale = np.asarray(inv_lengthscale)
assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales"
else:
inv_lengthscale = np.ones(self.input_dim)
self.variance = Param('variance', variance)
self.inv_lengthscale = Param('sensitivity', inv_lengthscale)
self.inv_lengthscale.add_observer(self, self.update_inv_lengthscale)
self.remove_parameter(self.lengthscale)
self.add_parameters(self.variance, self.inv_lengthscale)
#self._set_params(np.hstack((variance, inv_lengthscale.flatten())))
# initialize cache
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
self._X, self._X2, self._params = np.empty(shape=(3, 1))
# a set of optional args to pass to weave
self.weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
'extra_link_args' : ['-lgomp']}
# def _get_params(self):
# return np.hstack((self.variance, self.inv_lengthscale))
def update_inv_lengthscale(self, il):
self.inv_lengthscale2 = np.square(self.inv_lengthscale)
# TODO: We can rewrite everything with inv_lengthscale and never need to do the below
self.lengthscale = 1. / self.inv_lengthscale
self.lengthscale2 = np.square(self.lengthscale)
#def _set_params(self, x):
def parameters_changed(self):
#assert x.size == (self.num_params)
#self.variance = x[0]
#self.inv_lengthscale = x[1:]
# reset cached results
self._X, self._X2, self._params = np.empty(shape=(3, 1))
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
# def _get_param_names(self):
# if self.num_params == 2:
# return ['variance', 'inv_lengthscale']
# else:
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
def _param_grad_helper(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
target[0] += np.sum(self._K_dvar * dL_dK)
if self.ARD:
dvardLdK = self._K_dvar * dL_dK
var_len3 = self.variance / np.power(self.lengthscale, 3)
len2 = self.lengthscale2
if X2 is None:
# save computation for the symmetrical case
dvardLdK = dvardLdK + dvardLdK.T
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<i; j++){
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
code = """
int q,i,j;
double tmp;
for(q=0; q<input_dim; q++){
tmp = 0;
for(i=0; i<num_data; i++){
for(j=0; j<num_inducing; j++){
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
}
}
target(q+1) += var_len3(q)*tmp*(-len2(q));
}
"""
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
else:
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
def gradients_X(self, dL_dK, X, X2, target):
self._K_computations(X, X2)
if X2 is None:
_K_dist = 2*(X[:, None, :] - X[None, :, :])
else:
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
def dKdiag_dX(self, dL_dKdiag, X, target):
pass
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
# def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
# self._psi_computations(Z, mu, S)
# denom_deriv = S[:, None, :] / (self.lengthscale ** 3 + self.lengthscale * S[:, None, :])
# d_length = self._psi1[:, :, None] * (self.lengthscale * np.square(self._psi1_dist / (self.lengthscale2 + S[:, None, :])) + denom_deriv)
# target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
# dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
# if not self.ARD:
# target[1] += dpsi1_dlength.sum()*(-self.lengthscale2)
# else:
# target[1:] += dpsi1_dlength.sum(0).sum(0)*(-self.lengthscale2)
# #target[1:] = target[1:]*(-self.lengthscale2)
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
tmp = 1 + S[:, None, :] * self.inv_lengthscale2
# d_inv_length_old = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / (self.lengthscale * self._psi1_denom) + self.inv_lengthscale) / self.inv_lengthscale2
d_length = -(self._psi1[:, :, None] * ((np.square(self._psi1_dist) * self.inv_lengthscale) / (tmp ** 2) + (S[:, None, :] * self.inv_lengthscale) / (tmp)))
# d_inv_length = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / self._psi1_denom + self.lengthscale)
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if not self.ARD:
target[1] += dpsi1_dlength.sum() # *(-self.lengthscale2)
else:
target[1:] += dpsi1_dlength.sum(0).sum(0) # *(-self.lengthscale2)
# target[1:] = target[1:]*(-self.lengthscale2)
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
self._psi_computations(Z, mu, S)
dpsi1_dZ = -self._psi1[:, :, None] * ((self.inv_lengthscale2 * self._psi1_dist) / self._psi1_denom)
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
self._psi_computations(Z, mu, S)
tmp = (self._psi1[:, :, None] * self.inv_lengthscale2) / self._psi1_denom
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
"""Shape N,num_inducing,num_inducing,Ntheta"""
self._psi_computations(Z, mu, S)
d_var = 2.*self._psi2 / self.variance
# d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
d_length = -2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] * self.inv_lengthscale2) / (self.inv_lengthscale * self._psi2_denom)
target[0] += np.sum(dL_dpsi2 * d_var)
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
if not self.ARD:
target[1] += dpsi2_dlength.sum() # *(-self.lengthscale2)
else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0) # *(-self.lengthscale2)
# target[1:] = target[1:]*(-self.lengthscale2)
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
self._psi_computations(Z, mu, S)
term1 = self._psi2_Zdist * self.inv_lengthscale2 # num_inducing, num_inducing, input_dim
term2 = (self._psi2_mudist * self.inv_lengthscale2) / self._psi2_denom # N, num_inducing, num_inducing, input_dim
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
"""Think N,num_inducing,num_inducing,input_dim """
self._psi_computations(Z, mu, S)
tmp = (self.inv_lengthscale2 * self._psi2[:, :, :, None]) / self._psi2_denom
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
#---------------------------------------#
# Precomputations #
#---------------------------------------#
def _K_computations(self, X, X2):
if not (np.array_equal(X, self._X) and np.array_equal(X2, self._X2) and np.array_equal(self._params , self._get_params())):
self._X = X.copy()
self._params = self._get_params().copy()
if X2 is None:
self._X2 = None
X = X * self.inv_lengthscale
Xsquare = np.sum(np.square(X), 1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X * self.inv_lengthscale
X2 = X2 * self.inv_lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)
def _psi_computations(self, Z, mu, S):
# here are the "statistics" for psi1 and psi2
if not np.array_equal(Z, self._Z):
# Z has changed, compute Z specific stuff
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
self._psi2_Zdist_sq = np.square(self._psi2_Zdist * self.inv_lengthscale) # M,M,Q
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
# something's changed. recompute EVERYTHING
# psi1
self._psi1_denom = S[:, None, :] * self.inv_lengthscale2 + 1.
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
self._psi1_dist_sq = (np.square(self._psi1_dist) * self.inv_lengthscale2) / self._psi1_denom
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
self._psi1 = self.variance * np.exp(self._psi1_exponent)
# psi2
self._psi2_denom = 2.*S[:, None, None, :] * self.inv_lengthscale2 + 1. # N,M,M,Q
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
# store matrices for caching
self._Z, self._mu, self._S = Z, mu, S
def weave_psi2(self, mu, Zhat):
N, input_dim = mu.shape
num_inducing = Zhat.shape[0]
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
psi2 = np.empty((N, num_inducing, num_inducing))
psi2_Zdist_sq = self._psi2_Zdist_sq
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
variance_sq = float(np.square(self.variance))
if self.ARD:
inv_lengthscale2 = self.inv_lengthscale2
else:
inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
code = """
double tmp;
#pragma omp parallel for private(tmp)
for (int n=0; n<N; n++){
for (int m=0; m<num_inducing; m++){
for (int mm=0; mm<(m+1); mm++){
for (int q=0; q<input_dim; q++){
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp*inv_lengthscale2(q)/_psi2_denom(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now psi2_exponent
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
psi2_exponent(n,mm,m) += tmp;
if (m !=mm){
psi2_exponent(n,m,mm) += tmp;
}
//psi2 would be computed like this, but np is faster
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
//psi2(n,m,mm) = tmp;
//psi2(n,mm,m) = tmp;
}
}
}
}
"""
support_code = """
#include <omp.h>
#include <math.h>
"""
weave.inline(code, support_code=support_code, libraries=['gomp'],
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return mudist, mudist_sq, psi2_exponent, psi2

View file

@ -0,0 +1,61 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
from ...core.parameterization import Param
def theta(x):
"""Heaviside step function"""
return np.where(x>=0.,1.,0.)
class Spline(Kernpart):
"""
Spline kernel
:param input_dim: the number of input dimensions (fixed to 1 right now TODO)
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
"""
def __init__(self,input_dim,variance=1.,lengthscale=1.):
self.input_dim = input_dim
assert self.input_dim==1
self.num_params = 1
self.name = 'spline'
self.variance = Param('variance', variance)
self.lengthscale = Param('lengthscale', lengthscale)
self.add_parameters(self.variance, self.lengthscale)
# def _get_params(self):
# return self.variance
#
# def _set_params(self,x):
# self.variance = x
#
# def _get_param_names(self):
# return ['variance']
def K(self,X,X2,target):
assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
t = X
s = X2.T
s_t = s-t # broadcasted subtraction
target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
def Kdiag(self,X,target):
target += self.variance*X.flatten()**3/3.
def _param_grad_helper(self,X,X2,target):
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
def dKdiag_dtheta(self,X,target):
target += X.flatten()**3/3.
def dKdiag_dX(self,X,target):
target += self.variance*X**2

View file

@ -0,0 +1,81 @@
# Copyright (c) 2012 James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kernpart import Kernpart
import numpy as np
class Symmetric(Kernpart):
"""
Symmetrical kernels
:param k: the kernel to symmetrify
:type k: Kernpart
:param transform: the transform to use in symmetrification (allows symmetry on specified axes)
:type transform: A numpy array (input_dim x input_dim) specifiying the transform
:rtype: Kernpart
"""
def __init__(self,k,transform=None):
if transform is None:
transform = np.eye(k.input_dim)*-1.
assert transform.shape == (k.input_dim, k.input_dim)
self.transform = transform
self.input_dim = k.input_dim
self.num_params = k.num_params
self.name = k.name + '_symm'
self.k = k
self.add_parameter(k)
#self._set_params(k._get_params())
def K(self,X,X2,target):
"""Compute the covariance matrix between X and X2."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
AX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.K(X,X2,target)
self.k.K(AX,X2,target)
self.k.K(X,AX2,target)
self.k.K(AX,AX2,target)
def _param_grad_helper(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k._param_grad_helper(dL_dK,X,X2,target)
self.k._param_grad_helper(dL_dK,AX,X2,target)
self.k._param_grad_helper(dL_dK,X,AX2,target)
self.k._param_grad_helper(dL_dK,AX,AX2,target)
def gradients_X(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform)
if X2 is None:
X2 = X
ZX2 = AX
else:
AX2 = np.dot(X2, self.transform)
self.k.gradients_X(dL_dK, X, X2, target)
self.k.gradients_X(dL_dK, AX, X2, target)
self.k.gradients_X(dL_dK, X, AX2, target)
self.k.gradients_X(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
foo = np.zeros((X.shape[0],X.shape[0]))
self.K(X,X,foo)
target += np.diag(foo)
def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError
def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X."""
raise NotImplementedError