mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-24 14:15:14 +02:00
tidying in kern
This commit is contained in:
parent
70ada7fa46
commit
4215f5fb28
14 changed files with 1 additions and 687 deletions
161
GPy/kern/_src/todo/ODE_1.py
Normal file
161
GPy/kern/_src/todo/ODE_1.py
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class ODE_1(Kernpart):
|
||||
"""
|
||||
kernel resultiong from a first order ODE with OU driving GP
|
||||
|
||||
:param input_dim: the number of input dimension, has to be equal to one
|
||||
:type input_dim: int
|
||||
:param varianceU: variance of the driving GP
|
||||
:type varianceU: float
|
||||
:param lengthscaleU: lengthscale of the driving GP (sqrt(3)/lengthscaleU)
|
||||
:type lengthscaleU: float
|
||||
:param varianceY: 'variance' of the transfer function
|
||||
:type varianceY: float
|
||||
:param lengthscaleY: 'lengthscale' of the transfer function (1/lengthscaleY)
|
||||
:type lengthscaleY: float
|
||||
:rtype: kernel object
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim=1, varianceU=1., varianceY=1., lengthscaleU=None, lengthscaleY=None):
|
||||
assert input_dim==1, "Only defined for input_dim = 1"
|
||||
self.input_dim = input_dim
|
||||
self.num_params = 4
|
||||
self.name = 'ODE_1'
|
||||
if lengthscaleU is not None:
|
||||
lengthscaleU = np.asarray(lengthscaleU)
|
||||
assert lengthscaleU.size == 1, "lengthscaleU should be one dimensional"
|
||||
else:
|
||||
lengthscaleU = np.ones(1)
|
||||
if lengthscaleY is not None:
|
||||
lengthscaleY = np.asarray(lengthscaleY)
|
||||
assert lengthscaleY.size == 1, "lengthscaleY should be one dimensional"
|
||||
else:
|
||||
lengthscaleY = np.ones(1)
|
||||
#lengthscaleY = 0.5
|
||||
self._set_params(np.hstack((varianceU, varianceY, lengthscaleU,lengthscaleY)))
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
return np.hstack((self.varianceU,self.varianceY, self.lengthscaleU,self.lengthscaleY))
|
||||
|
||||
def _set_params(self, x):
|
||||
"""set the value of the parameters."""
|
||||
assert x.size == self.num_params
|
||||
self.varianceU = x[0]
|
||||
self.varianceY = x[1]
|
||||
self.lengthscaleU = x[2]
|
||||
self.lengthscaleY = x[3]
|
||||
|
||||
def _get_param_names(self):
|
||||
"""return parameter names."""
|
||||
return ['varianceU','varianceY', 'lengthscaleU', 'lengthscaleY']
|
||||
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
# i1 = X[:,1]
|
||||
# i2 = X2[:,1]
|
||||
# X = X[:,0].reshape(-1,1)
|
||||
# X2 = X2[:,0].reshape(-1,1)
|
||||
dist = np.abs(X - X2.T)
|
||||
|
||||
ly=1/self.lengthscaleY
|
||||
lu=np.sqrt(3)/self.lengthscaleU
|
||||
#ly=self.lengthscaleY
|
||||
#lu=self.lengthscaleU
|
||||
|
||||
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
|
||||
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
|
||||
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
ly=1/self.lengthscaleY
|
||||
lu=np.sqrt(3)/self.lengthscaleU
|
||||
#ly=self.lengthscaleY
|
||||
#lu=self.lengthscaleU
|
||||
|
||||
k1 = (2*lu+ly)/(lu+ly)**2
|
||||
k2 = (ly-2*lu + 2*lu-ly ) / (ly-lu)**2
|
||||
k3 = 1/(lu+ly) + (lu)/(lu+ly)**2
|
||||
|
||||
np.add(self.varianceU*self.varianceY*(k1+k2+k3), target, target)
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
dist = np.abs(X - X2.T)
|
||||
|
||||
ly=1/self.lengthscaleY
|
||||
lu=np.sqrt(3)/self.lengthscaleU
|
||||
#ly=self.lengthscaleY
|
||||
#lu=self.lengthscaleU
|
||||
|
||||
dk1theta1 = np.exp(-ly*dist)*2*(-lu)/(lu+ly)**3
|
||||
#c=np.sqrt(3)
|
||||
#t1=c/lu
|
||||
#t2=1/ly
|
||||
#dk1theta1=np.exp(-dist*ly)*t2*( (2*c*t2+2*t1)/(c*t2+t1)**2 -2*(2*c*t2*t1+t1**2)/(c*t2+t1)**3 )
|
||||
|
||||
dk2theta1 = 1*(
|
||||
np.exp(-lu*dist)*dist*(-ly+2*lu-lu*ly*dist+dist*lu**2)*(ly-lu)**(-2) + np.exp(-lu*dist)*(-2+ly*dist-2*dist*lu)*(ly-lu)**(-2)
|
||||
+np.exp(-dist*lu)*(ly-2*lu+ly*lu*dist-dist*lu**2)*2*(ly-lu)**(-3)
|
||||
+np.exp(-dist*ly)*2*(ly-lu)**(-2)
|
||||
+np.exp(-dist*ly)*2*(2*lu-ly)*(ly-lu)**(-3)
|
||||
)
|
||||
|
||||
dk3theta1 = np.exp(-dist*lu)*(lu+ly)**(-2)*((2*lu+ly+dist*lu**2+lu*ly*dist)*(-dist-2/(lu+ly))+2+2*lu*dist+ly*dist)
|
||||
|
||||
dktheta1 = self.varianceU*self.varianceY*(dk1theta1+dk2theta1+dk3theta1)
|
||||
|
||||
|
||||
|
||||
|
||||
dk1theta2 = np.exp(-ly*dist) * ((lu+ly)**(-2)) * ( (-dist)*(2*lu+ly) + 1 + (-2)*(2*lu+ly)/(lu+ly) )
|
||||
|
||||
dk2theta2 = 1*(
|
||||
np.exp(-dist*lu)*(ly-lu)**(-2) * ( 1+lu*dist+(-2)*(ly-2*lu+lu*ly*dist-dist*lu**2)*(ly-lu)**(-1) )
|
||||
+np.exp(-dist*ly)*(ly-lu)**(-2) * ( (-dist)*(2*lu-ly) -1+(2*lu-ly)*(-2)*(ly-lu)**(-1) )
|
||||
)
|
||||
|
||||
dk3theta2 = np.exp(-dist*lu) * (-3*lu-ly-dist*lu**2-lu*ly*dist)/(lu+ly)**3
|
||||
|
||||
dktheta2 = self.varianceU*self.varianceY*(dk1theta2 + dk2theta2 +dk3theta2)
|
||||
|
||||
|
||||
|
||||
k1 = np.exp(-ly*dist)*(2*lu+ly)/(lu+ly)**2
|
||||
k2 = (np.exp(-lu*dist)*(ly-2*lu+lu*ly*dist-lu**2*dist) + np.exp(-ly*dist)*(2*lu-ly) ) / (ly-lu)**2
|
||||
k3 = np.exp(-lu*dist) * ( (1+lu*dist)/(lu+ly) + (lu)/(lu+ly)**2 )
|
||||
dkdvar = k1+k2+k3
|
||||
|
||||
target[0] += np.sum(self.varianceY*dkdvar * dL_dK)
|
||||
target[1] += np.sum(self.varianceU*dkdvar * dL_dK)
|
||||
target[2] += np.sum(dktheta1*(-np.sqrt(3)*self.lengthscaleU**(-2)) * dL_dK)
|
||||
target[3] += np.sum(dktheta2*(-self.lengthscaleY**(-2)) * dL_dK)
|
||||
|
||||
|
||||
# def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
# """derivative of the diagonal of the covariance matrix with respect to the parameters."""
|
||||
# # NB: derivative of diagonal elements wrt lengthscale is 0
|
||||
# target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
# def dK_dX(self, dL_dK, X, X2, target):
|
||||
# """derivative of the covariance matrix with respect to X."""
|
||||
# if X2 is None: X2 = X
|
||||
# dist = np.sqrt(np.sum(np.square((X[:, None, :] - X2[None, :, :]) / self.lengthscale), -1))[:, :, None]
|
||||
# ddist_dX = (X[:, None, :] - X2[None, :, :]) / self.lengthscale ** 2 / np.where(dist != 0., dist, np.inf)
|
||||
# dK_dX = -np.transpose(self.variance * np.exp(-dist) * ddist_dX, (1, 0, 2))
|
||||
# target += np.sum(dK_dX * dL_dK.T[:, :, None], 0)
|
||||
|
||||
# def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
# pass
|
||||
556
GPy/kern/_src/todo/eq_ode1.py
Normal file
556
GPy/kern/_src/todo/eq_ode1.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
# Copyright (c) 2013, GPy Authors, see AUTHORS.txt
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from GPy.util.linalg import mdot, pdinv
|
||||
from GPy.util.ln_diff_erfs import ln_diff_erfs
|
||||
import pdb
|
||||
from scipy import weave
|
||||
|
||||
class Eq_ode1(Kernpart):
|
||||
"""
|
||||
Covariance function for first order differential equation driven by an exponentiated quadratic covariance.
|
||||
|
||||
This outputs of this kernel have the form
|
||||
.. math::
|
||||
\frac{\text{d}y_j}{\text{d}t} = \sum_{i=1}^R w_{j,i} f_i(t-\delta_j) +\sqrt{\kappa_j}g_j(t) - d_jy_j(t)
|
||||
|
||||
where :math:`R` is the rank of the system, :math:`w_{j,i}` is the sensitivity of the :math:`j`th output to the :math:`i`th latent function, :math:`d_j` is the decay rate of the :math:`j`th output and :math:`f_i(t)` and :math:`g_i(t)` are independent latent Gaussian processes goverened by an exponentiated quadratic covariance.
|
||||
|
||||
:param output_dim: number of outputs driven by latent function.
|
||||
:type output_dim: int
|
||||
:param W: sensitivities of each output to the latent driving function.
|
||||
:type W: ndarray (output_dim x rank).
|
||||
:param rank: If rank is greater than 1 then there are assumed to be a total of rank latent forces independently driving the system, each with identical covariance.
|
||||
:type rank: int
|
||||
:param decay: decay rates for the first order system.
|
||||
:type decay: array of length output_dim.
|
||||
:param delay: delay between latent force and output response.
|
||||
:type delay: array of length output_dim.
|
||||
:param kappa: diagonal term that allows each latent output to have an independent component to the response.
|
||||
:type kappa: array of length output_dim.
|
||||
|
||||
.. Note: see first order differential equation examples in GPy.examples.regression for some usage.
|
||||
"""
|
||||
def __init__(self,output_dim, W=None, rank=1, kappa=None, lengthscale=1.0, decay=None, delay=None):
|
||||
self.rank = rank
|
||||
self.input_dim = 1
|
||||
self.name = 'eq_ode1'
|
||||
self.output_dim = output_dim
|
||||
self.lengthscale = lengthscale
|
||||
self.num_params = self.output_dim*self.rank + 1 + (self.output_dim - 1)
|
||||
if kappa is not None:
|
||||
self.num_params+=self.output_dim
|
||||
if delay is not None:
|
||||
assert delay.shape==(self.output_dim-1,)
|
||||
self.num_params+=self.output_dim-1
|
||||
self.rank = rank
|
||||
if W is None:
|
||||
self.W = 0.5*np.random.randn(self.output_dim,self.rank)/np.sqrt(self.rank)
|
||||
else:
|
||||
assert W.shape==(self.output_dim,self.rank)
|
||||
self.W = W
|
||||
if decay is None:
|
||||
self.decay = np.ones(self.output_dim-1)
|
||||
if kappa is not None:
|
||||
assert kappa.shape==(self.output_dim,)
|
||||
self.kappa = kappa
|
||||
|
||||
self.delay = delay
|
||||
self.is_normalized = True
|
||||
self.is_stationary = False
|
||||
self.gaussian_initial = False
|
||||
self._set_params(self._get_params())
|
||||
|
||||
def _get_params(self):
|
||||
param_list = [self.W.flatten()]
|
||||
if self.kappa is not None:
|
||||
param_list.append(self.kappa)
|
||||
param_list.append(self.decay)
|
||||
if self.delay is not None:
|
||||
param_list.append(self.delay)
|
||||
param_list.append(self.lengthscale)
|
||||
return np.hstack(param_list)
|
||||
|
||||
def _set_params(self,x):
|
||||
assert x.size == self.num_params
|
||||
end = self.output_dim*self.rank
|
||||
self.W = x[:end].reshape(self.output_dim,self.rank)
|
||||
start = end
|
||||
self.B = np.dot(self.W,self.W.T)
|
||||
if self.kappa is not None:
|
||||
end+=self.output_dim
|
||||
self.kappa = x[start:end]
|
||||
self.B += np.diag(self.kappa)
|
||||
start=end
|
||||
end+=self.output_dim-1
|
||||
self.decay = x[start:end]
|
||||
start=end
|
||||
if self.delay is not None:
|
||||
end+=self.output_dim-1
|
||||
self.delay = x[start:end]
|
||||
start=end
|
||||
end+=1
|
||||
self.lengthscale = x[start]
|
||||
self.sigma = np.sqrt(2)*self.lengthscale
|
||||
|
||||
|
||||
def _get_param_names(self):
|
||||
param_names = sum([['W%i_%i'%(i,j) for j in range(self.rank)] for i in range(self.output_dim)],[])
|
||||
if self.kappa is not None:
|
||||
param_names += ['kappa_%i'%i for i in range(self.output_dim)]
|
||||
param_names += ['decay_%i'%i for i in range(1,self.output_dim)]
|
||||
if self.delay is not None:
|
||||
param_names += ['delay_%i'%i for i in 1+range(1,self.output_dim)]
|
||||
param_names+= ['lengthscale']
|
||||
return param_names
|
||||
|
||||
def K(self,X,X2,target):
|
||||
|
||||
if X.shape[1] > 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have at most two columns, one containing times, the other output indices')
|
||||
|
||||
self._K_computations(X, X2)
|
||||
target += self._scale*self._K_dvar
|
||||
|
||||
if self.gaussian_initial:
|
||||
# Add covariance associated with initial condition.
|
||||
t1_mat = self._t[self._rorder, None]
|
||||
t2_mat = self._t2[None, self._rorder2]
|
||||
target+=self.initial_variance * np.exp(- self.decay * (t1_mat + t2_mat))
|
||||
|
||||
def Kdiag(self,index,target):
|
||||
#target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
|
||||
pass
|
||||
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2, dL_dK=dL_dK)
|
||||
self._dK_ode_dtheta(target)
|
||||
|
||||
|
||||
def _dK_ode_dtheta(self, target):
|
||||
"""Do all the computations for the ode parts of the covariance function."""
|
||||
t_ode = self._t[self._index>0]
|
||||
dL_dK_ode = self._dL_dK[self._index>0, :]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
if self._t2 is None:
|
||||
if t_ode.size==0:
|
||||
return
|
||||
t2_ode = t_ode
|
||||
dL_dK_ode = dL_dK_ode[:, self._index>0]
|
||||
index2_ode = index_ode
|
||||
else:
|
||||
t2_ode = self._t2[self._index2>0]
|
||||
dL_dK_ode = dL_dK_ode[:, self._index2>0]
|
||||
if t_ode.size==0 or t2_ode.size==0:
|
||||
return
|
||||
index2_ode = self._index2[self._index2>0]-1
|
||||
|
||||
h1 = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary, update_derivatives=True)
|
||||
#self._dK_ddelay = self._dh_ddelay
|
||||
self._dK_dsigma = self._dh_dsigma
|
||||
|
||||
if self._t2 is None:
|
||||
h2 = h1
|
||||
else:
|
||||
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary, update_derivatives=True)
|
||||
|
||||
#self._dK_ddelay += self._dh_ddelay.T
|
||||
self._dK_dsigma += self._dh_dsigma.T
|
||||
# C1 = self.sensitivity
|
||||
# C2 = self.sensitivity
|
||||
|
||||
# K = 0.5 * (h1 + h2.T)
|
||||
# var2 = C1*C2
|
||||
# if self.is_normalized:
|
||||
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + sum(sum(dL_dK.*dh2_dD1.T)))*0.5*var2
|
||||
# dk_dD2 = (sum(sum(dL_dK.*dh1_dD2)) + sum(sum(dL_dK.*dh2_dD2.T)))*0.5*var2
|
||||
# dk_dsigma = 0.5 * var2 * sum(sum(dL_dK.*dK_dsigma))
|
||||
# dk_dC1 = C2 * sum(sum(dL_dK.*K))
|
||||
# dk_dC2 = C1 * sum(sum(dL_dK.*K))
|
||||
# else:
|
||||
# K = np.sqrt(np.pi) * K
|
||||
# dk_dD1 = (sum(sum(dL_dK.*dh1_dD1)) + * sum(sum(dL_dK.*K))
|
||||
# dk_dC2 = self.sigma * C1 * sum(sum(dL_dK.*K))
|
||||
|
||||
|
||||
# dk_dSim1Variance = dk_dC1
|
||||
# Last element is the length scale.
|
||||
(dL_dK_ode[:, :, None]*self._dh_ddelay[:, None, :]).sum(2)
|
||||
|
||||
target[-1] += (dL_dK_ode*self._dK_dsigma/np.sqrt(2)).sum()
|
||||
|
||||
|
||||
# # only pass the gradient with respect to the inverse width to one
|
||||
# # of the gradient vectors ... otherwise it is counted twice.
|
||||
# g1 = real([dk_dD1 dk_dinvWidth dk_dSim1Variance])
|
||||
# g2 = real([dk_dD2 0 dk_dSim2Variance])
|
||||
# return g1, g2"""
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,index,target):
|
||||
pass
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
pass
|
||||
|
||||
def _extract_t_indices(self, X, X2=None, dL_dK=None):
|
||||
"""Extract times and output indices from the input matrix X. Times are ordered according to their index for convenience of computation, this ordering is stored in self._order and self.order2. These orderings are then mapped back to the original ordering (in X) using self._rorder and self._rorder2. """
|
||||
|
||||
# TODO: some fast checking here to see if this needs recomputing?
|
||||
self._t = X[:, 0]
|
||||
if not X.shape[1] == 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
|
||||
self._index = np.asarray(X[:, 1],dtype=np.int)
|
||||
# Sort indices so that outputs are in blocks for computational
|
||||
# convenience.
|
||||
self._order = self._index.argsort()
|
||||
self._index = self._index[self._order]
|
||||
self._t = self._t[self._order]
|
||||
self._rorder = self._order.argsort() # rorder is for reversing the order
|
||||
|
||||
if X2 is None:
|
||||
self._t2 = None
|
||||
self._index2 = None
|
||||
self._order2 = self._order
|
||||
self._rorder2 = self._rorder
|
||||
else:
|
||||
if not X2.shape[1] == 2:
|
||||
raise ValueError('Input matrix for ode1 covariance should have two columns, one containing times, the other output indices')
|
||||
self._t2 = X2[:, 0]
|
||||
self._index2 = np.asarray(X2[:, 1],dtype=np.int)
|
||||
self._order2 = self._index2.argsort()
|
||||
self._index2 = self._index2[self._order2]
|
||||
self._t2 = self._t2[self._order2]
|
||||
self._rorder2 = self._order2.argsort() # rorder2 is for reversing order
|
||||
|
||||
if dL_dK is not None:
|
||||
self._dL_dK = dL_dK[self._order, :]
|
||||
self._dL_dK = self._dL_dK[:, self._order2]
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
"""Perform main body of computations for the ode1 covariance function."""
|
||||
# First extract times and indices.
|
||||
self._extract_t_indices(X, X2)
|
||||
|
||||
self._K_compute_eq()
|
||||
self._K_compute_ode_eq()
|
||||
if X2 is None:
|
||||
self._K_eq_ode = self._K_ode_eq.T
|
||||
else:
|
||||
self._K_compute_ode_eq(transpose=True)
|
||||
self._K_compute_ode()
|
||||
|
||||
if X2 is None:
|
||||
self._K_dvar = np.zeros((self._t.shape[0], self._t.shape[0]))
|
||||
else:
|
||||
self._K_dvar = np.zeros((self._t.shape[0], self._t2.shape[0]))
|
||||
|
||||
# Reorder values of blocks for placing back into _K_dvar.
|
||||
self._K_dvar = np.vstack((np.hstack((self._K_eq, self._K_eq_ode)),
|
||||
np.hstack((self._K_ode_eq, self._K_ode))))
|
||||
self._K_dvar = self._K_dvar[self._rorder, :]
|
||||
self._K_dvar = self._K_dvar[:, self._rorder2]
|
||||
|
||||
|
||||
if X2 is None:
|
||||
# Matrix giving scales of each output
|
||||
self._scale = np.zeros((self._t.size, self._t.size))
|
||||
code="""
|
||||
for(int i=0;i<N; i++){
|
||||
scale_mat[i+i*N] = B[index[i]+output_dim*(index[i])];
|
||||
for(int j=0; j<i; j++){
|
||||
scale_mat[j+i*N] = B[index[i]+output_dim*index[j]];
|
||||
scale_mat[i+j*N] = scale_mat[j+i*N];
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B, index = self._scale, self.B, self._index
|
||||
N, output_dim = self._t.size, self.output_dim
|
||||
weave.inline(code,['index',
|
||||
'scale_mat', 'B',
|
||||
'N', 'output_dim'])
|
||||
else:
|
||||
self._scale = np.zeros((self._t.size, self._t2.size))
|
||||
code = """
|
||||
for(int i=0; i<N; i++){
|
||||
for(int j=0; j<N2; j++){
|
||||
scale_mat[i+j*N] = B[index[i]+output_dim*index2[j]];
|
||||
}
|
||||
}
|
||||
"""
|
||||
scale_mat, B, index, index2 = self._scale, self.B, self._index, self._index2
|
||||
N, N2, output_dim = self._t.size, self._t2.size, self.output_dim
|
||||
weave.inline(code, ['index', 'index2',
|
||||
'scale_mat', 'B',
|
||||
'N', 'N2', 'output_dim'])
|
||||
|
||||
|
||||
|
||||
def _K_compute_eq(self):
|
||||
"""Compute covariance for latent covariance."""
|
||||
t_eq = self._t[self._index==0]
|
||||
if self._t2 is None:
|
||||
if t_eq.size==0:
|
||||
self._K_eq = np.zeros((0, 0))
|
||||
return
|
||||
self._dist2 = np.square(t_eq[:, None] - t_eq[None, :])
|
||||
else:
|
||||
t2_eq = self._t2[self._index2==0]
|
||||
if t_eq.size==0 or t2_eq.size==0:
|
||||
self._K_eq = np.zeros((t_eq.size, t2_eq.size))
|
||||
return
|
||||
self._dist2 = np.square(t_eq[:, None] - t2_eq[None, :])
|
||||
|
||||
self._K_eq = np.exp(-self._dist2/(2*self.lengthscale*self.lengthscale))
|
||||
if self.is_normalized:
|
||||
self._K_eq/=(np.sqrt(2*np.pi)*self.lengthscale)
|
||||
|
||||
def _K_compute_ode_eq(self, transpose=False):
|
||||
"""Compute the cross covariances between latent exponentiated quadratic and observed ordinary differential equations.
|
||||
|
||||
:param transpose: if set to false the exponentiated quadratic is on the rows of the matrix and is computed according to self._t, if set to true it is on the columns and is computed according to self._t2 (default=False).
|
||||
:type transpose: bool"""
|
||||
|
||||
if self._t2 is not None:
|
||||
if transpose:
|
||||
t_eq = self._t[self._index==0]
|
||||
t_ode = self._t2[self._index2>0]
|
||||
index_ode = self._index2[self._index2>0]-1
|
||||
else:
|
||||
t_eq = self._t2[self._index2==0]
|
||||
t_ode = self._t[self._index>0]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
else:
|
||||
t_eq = self._t[self._index==0]
|
||||
t_ode = self._t[self._index>0]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
|
||||
if t_ode.size==0 or t_eq.size==0:
|
||||
if transpose:
|
||||
self._K_eq_ode = np.zeros((t_eq.shape[0], t_ode.shape[0]))
|
||||
else:
|
||||
self._K_ode_eq = np.zeros((t_ode.shape[0], t_eq.shape[0]))
|
||||
return
|
||||
|
||||
t_ode_mat = t_ode[:, None]
|
||||
t_eq_mat = t_eq[None, :]
|
||||
if self.delay is not None:
|
||||
t_ode_mat -= self.delay[index_ode, None]
|
||||
diff_t = (t_ode_mat - t_eq_mat)
|
||||
|
||||
inv_sigma_diff_t = 1./self.sigma*diff_t
|
||||
decay_vals = self.decay[index_ode][:, None]
|
||||
half_sigma_d_i = 0.5*self.sigma*decay_vals
|
||||
|
||||
if self.is_stationary:
|
||||
ln_part, signs = ln_diff_erfs(inf, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
|
||||
else:
|
||||
ln_part, signs = ln_diff_erfs(half_sigma_d_i + t_eq_mat/self.sigma, half_sigma_d_i - inv_sigma_diff_t, return_sign=True)
|
||||
sK = signs*np.exp(half_sigma_d_i*half_sigma_d_i - decay_vals*diff_t + ln_part)
|
||||
|
||||
sK *= 0.5
|
||||
|
||||
if not self.is_normalized:
|
||||
sK *= np.sqrt(np.pi)*self.sigma
|
||||
|
||||
|
||||
if transpose:
|
||||
self._K_eq_ode = sK.T
|
||||
else:
|
||||
self._K_ode_eq = sK
|
||||
|
||||
def _K_compute_ode(self):
|
||||
# Compute covariances between outputs of the ODE models.
|
||||
|
||||
t_ode = self._t[self._index>0]
|
||||
index_ode = self._index[self._index>0]-1
|
||||
if self._t2 is None:
|
||||
if t_ode.size==0:
|
||||
self._K_ode = np.zeros((0, 0))
|
||||
return
|
||||
t2_ode = t_ode
|
||||
index2_ode = index_ode
|
||||
else:
|
||||
t2_ode = self._t2[self._index2>0]
|
||||
if t_ode.size==0 or t2_ode.size==0:
|
||||
self._K_ode = np.zeros((t_ode.size, t2_ode.size))
|
||||
return
|
||||
index2_ode = self._index2[self._index2>0]-1
|
||||
|
||||
# When index is identical
|
||||
h = self._compute_H(t_ode, index_ode, t2_ode, index2_ode, stationary=self.is_stationary)
|
||||
|
||||
if self._t2 is None:
|
||||
self._K_ode = 0.5 * (h + h.T)
|
||||
else:
|
||||
h2 = self._compute_H(t2_ode, index2_ode, t_ode, index_ode, stationary=self.is_stationary)
|
||||
self._K_ode = 0.5 * (h + h2.T)
|
||||
|
||||
if not self.is_normalized:
|
||||
self._K_ode *= np.sqrt(np.pi)*self.sigma
|
||||
def _compute_diag_H(self, t, index, update_derivatives=False, stationary=False):
|
||||
"""Helper function for computing H for the diagonal only.
|
||||
:param t: time input.
|
||||
:type t: array
|
||||
:param index: first output indices
|
||||
:type index: array of int.
|
||||
:param index: second output indices
|
||||
:type index: array of int.
|
||||
:param update_derivatives: whether or not to update the derivative portions (default False).
|
||||
:type update_derivatives: bool
|
||||
:param stationary: whether to compute the stationary version of the covariance (default False).
|
||||
:type stationary: bool"""
|
||||
|
||||
"""if delta_i~=delta_j:
|
||||
[h, dh_dD_i, dh_dD_j, dh_dsigma] = np.diag(simComputeH(t, index, t, index, update_derivatives=True, stationary=self.is_stationary))
|
||||
else:
|
||||
Decay = self.decay[index]
|
||||
if self.delay is not None:
|
||||
t = t - self.delay[index]
|
||||
|
||||
t_squared = t*t
|
||||
half_sigma_decay = 0.5*self.sigma*Decay
|
||||
[ln_part_1, sign1] = ln_diff_erfs(half_sigma_decay + t/self.sigma,
|
||||
half_sigma_decay)
|
||||
|
||||
[ln_part_2, sign2] = ln_diff_erfs(half_sigma_decay,
|
||||
half_sigma_decay - t/self.sigma)
|
||||
|
||||
h = (sign1*np.exp(half_sigma_decay*half_sigma_decay
|
||||
+ ln_part_1
|
||||
- log(Decay + D_j))
|
||||
- sign2*np.exp(half_sigma_decay*half_sigma_decay
|
||||
- (Decay + D_j)*t
|
||||
+ ln_part_2
|
||||
- log(Decay + D_j)))
|
||||
|
||||
sigma2 = self.sigma*self.sigma
|
||||
|
||||
if update_derivatives:
|
||||
|
||||
dh_dD_i = ((0.5*Decay*sigma2*(Decay + D_j)-1)*h
|
||||
+ t*sign2*np.exp(
|
||||
half_sigma_decay*half_sigma_decay-(Decay+D_j)*t + ln_part_2
|
||||
)
|
||||
+ self.sigma/np.sqrt(np.pi)*
|
||||
(-1 + np.exp(-t_squared/sigma2-Decay*t)
|
||||
+ np.exp(-t_squared/sigma2-D_j*t)
|
||||
- np.exp(-(Decay + D_j)*t)))
|
||||
|
||||
dh_dD_i = (dh_dD_i/(Decay+D_j)).real
|
||||
|
||||
|
||||
|
||||
dh_dD_j = (t*sign2*np.exp(
|
||||
half_sigma_decay*half_sigma_decay-(Decay + D_j)*t+ln_part_2
|
||||
)
|
||||
-h)
|
||||
dh_dD_j = (dh_dD_j/(Decay + D_j)).real
|
||||
|
||||
dh_dsigma = 0.5*Decay*Decay*self.sigma*h \
|
||||
+ 2/(np.sqrt(np.pi)*(Decay+D_j))\
|
||||
*((-Decay/2) \
|
||||
+ (-t/sigma2+Decay/2)*np.exp(-t_squared/sigma2 - Decay*t) \
|
||||
- (-t/sigma2-Decay/2)*np.exp(-t_squared/sigma2 - D_j*t) \
|
||||
- Decay/2*np.exp(-(Decay+D_j)*t))"""
|
||||
pass
|
||||
|
||||
def _compute_H(self, t, index, t2, index2, update_derivatives=False, stationary=False):
|
||||
"""Helper function for computing part of the ode1 covariance function.
|
||||
|
||||
:param t: first time input.
|
||||
:type t: array
|
||||
:param index: Indices of first output.
|
||||
:type index: array of int
|
||||
:param t2: second time input.
|
||||
:type t2: array
|
||||
:param index2: Indices of second output.
|
||||
:type index2: array of int
|
||||
:param update_derivatives: whether to update derivatives (default is False)
|
||||
:return h : result of this subcomponent of the kernel for the given values.
|
||||
:rtype: ndarray
|
||||
"""
|
||||
|
||||
if stationary:
|
||||
raise NotImplementedError, "Error, stationary version of this covariance not yet implemented."
|
||||
# Vector of decays and delays associated with each output.
|
||||
Decay = self.decay[index]
|
||||
Decay2 = self.decay[index2]
|
||||
t_mat = t[:, None]
|
||||
t2_mat = t2[None, :]
|
||||
if self.delay is not None:
|
||||
Delay = self.delay[index]
|
||||
Delay2 = self.delay[index2]
|
||||
t_mat-=Delay[:, None]
|
||||
t2_mat-=Delay2[None, :]
|
||||
|
||||
diff_t = (t_mat - t2_mat)
|
||||
inv_sigma_diff_t = 1./self.sigma*diff_t
|
||||
half_sigma_decay_i = 0.5*self.sigma*Decay[:, None]
|
||||
|
||||
ln_part_1, sign1 = ln_diff_erfs(half_sigma_decay_i + t2_mat/self.sigma,
|
||||
half_sigma_decay_i - inv_sigma_diff_t,
|
||||
return_sign=True)
|
||||
ln_part_2, sign2 = ln_diff_erfs(half_sigma_decay_i,
|
||||
half_sigma_decay_i - t_mat/self.sigma,
|
||||
return_sign=True)
|
||||
|
||||
h = sign1*np.exp(half_sigma_decay_i
|
||||
*half_sigma_decay_i
|
||||
-Decay[:, None]*diff_t+ln_part_1
|
||||
-np.log(Decay[:, None] + Decay2[None, :]))
|
||||
h -= sign2*np.exp(half_sigma_decay_i*half_sigma_decay_i
|
||||
-Decay[:, None]*t_mat-Decay2[None, :]*t2_mat+ln_part_2
|
||||
-np.log(Decay[:, None] + Decay2[None, :]))
|
||||
|
||||
if update_derivatives:
|
||||
sigma2 = self.sigma*self.sigma
|
||||
# Update ith decay gradient
|
||||
|
||||
dh_ddecay = ((0.5*Decay[:, None]*sigma2*(Decay[:, None] + Decay2[None, :])-1)*h
|
||||
+ (-diff_t*sign1*np.exp(
|
||||
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*diff_t+ln_part_1
|
||||
)
|
||||
+t_mat*sign2*np.exp(
|
||||
half_sigma_decay_i*half_sigma_decay_i-Decay[:, None]*t_mat
|
||||
- Decay2*t2_mat+ln_part_2))
|
||||
+self.sigma/np.sqrt(np.pi)*(
|
||||
-np.exp(
|
||||
-diff_t*diff_t/sigma2
|
||||
)+np.exp(
|
||||
-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat
|
||||
)+np.exp(
|
||||
-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat
|
||||
)-np.exp(
|
||||
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
|
||||
)
|
||||
))
|
||||
self._dh_ddecay = (dh_ddecay/(Decay[:, None]+Decay2[None, :])).real
|
||||
|
||||
# Update jth decay gradient
|
||||
dh_ddecay2 = (t2_mat*sign2
|
||||
*np.exp(
|
||||
half_sigma_decay_i*half_sigma_decay_i
|
||||
-(Decay[:, None]*t_mat + Decay2[None, :]*t2_mat)
|
||||
+ln_part_2
|
||||
)
|
||||
-h)
|
||||
self._dh_ddecay2 = (dh_ddecay/(Decay[:, None] + Decay2[None, :])).real
|
||||
|
||||
# Update sigma gradient
|
||||
self._dh_dsigma = (half_sigma_decay_i*Decay[:, None]*h
|
||||
+ 2/(np.sqrt(np.pi)
|
||||
*(Decay[:, None]+Decay2[None, :]))
|
||||
*((-diff_t/sigma2-Decay[:, None]/2)
|
||||
*np.exp(-diff_t*diff_t/sigma2)
|
||||
+ (-t2_mat/sigma2+Decay[:, None]/2)
|
||||
*np.exp(-t2_mat*t2_mat/sigma2-Decay[:, None]*t_mat)
|
||||
- (-t_mat/sigma2-Decay[:, None]/2)
|
||||
*np.exp(-t_mat*t_mat/sigma2-Decay2[None, :]*t2_mat)
|
||||
- Decay[:, None]/2
|
||||
*np.exp(-(Decay[:, None]*t_mat+Decay2[None, :]*t2_mat))))
|
||||
|
||||
return h
|
||||
74
GPy/kern/_src/todo/finite_dimensional.py
Normal file
74
GPy/kern/_src/todo/finite_dimensional.py
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...util.linalg import pdinv,mdot
|
||||
|
||||
class FiniteDimensional(Kernpart):
|
||||
def __init__(self, input_dim, F, G, variance=1., weights=None):
|
||||
"""
|
||||
Argumnents
|
||||
----------
|
||||
input_dim: int - the number of input dimensions
|
||||
F: np.array of functions with shape (n,) - the n basis functions
|
||||
G: np.array with shape (n,n) - the Gram matrix associated to F
|
||||
weights : np.ndarray with shape (n,)
|
||||
"""
|
||||
self.input_dim = input_dim
|
||||
self.F = F
|
||||
self.G = G
|
||||
self.G_1 ,L,Li,logdet = pdinv(G)
|
||||
self.n = F.shape[0]
|
||||
if weights is not None:
|
||||
assert weights.shape==(self.n,)
|
||||
else:
|
||||
weights = np.ones(self.n)
|
||||
self.num_params = self.n + 1
|
||||
self.name = 'finite_dim'
|
||||
self._set_params(np.hstack((variance,weights)))
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance,self.weights))
|
||||
def _set_params(self,x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variance = x[0]
|
||||
self.weights = x[1:]
|
||||
def _get_param_names(self):
|
||||
if self.n==1:
|
||||
return ['variance','weight']
|
||||
else:
|
||||
return ['variance']+['weight_%i'%i for i in range(self.weights.size)]
|
||||
|
||||
def K(self,X,X2,target):
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
FX2 = np.column_stack([f(X2) for f in self.F])
|
||||
product = self.variance * mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
np.add(product,target,target)
|
||||
def Kdiag(self,X,target):
|
||||
product = np.diag(self.K(X, X))
|
||||
np.add(target,product,target)
|
||||
def _param_grad_helper(self,X,X2,target):
|
||||
"""Return shape is NxMx(Ntheta)"""
|
||||
if X2 is None: X2 = X
|
||||
FX = np.column_stack([f(X) for f in self.F])
|
||||
FX2 = np.column_stack([f(X2) for f in self.F])
|
||||
DER = np.zeros((self.n,self.n,self.n))
|
||||
for i in range(self.n):
|
||||
DER[i,i,i] = np.sqrt(self.weights[i])
|
||||
dw = self.variance * mdot(FX,DER,self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
dv = mdot(FX,np.diag(np.sqrt(self.weights)),self.G_1,np.diag(np.sqrt(self.weights)),FX2.T)
|
||||
np.add(target[:,:,0],np.transpose(dv,(0,2,1)), target[:,:,0])
|
||||
np.add(target[:,:,1:],np.transpose(dw,(0,2,1)), target[:,:,1:])
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
np.add(target[:,0],1.,target[:,0])
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
41
GPy/kern/_src/todo/fixed.py
Normal file
41
GPy/kern/_src/todo/fixed.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class Fixed(Kernpart):
|
||||
def __init__(self, input_dim, K, variance=1.):
|
||||
"""
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
"""
|
||||
self.input_dim = input_dim
|
||||
self.fixed_K = K
|
||||
self.num_params = 1
|
||||
self.name = 'fixed'
|
||||
self._set_params(np.array([variance]).flatten())
|
||||
|
||||
def _get_params(self):
|
||||
return self.variance
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.shape == (1,)
|
||||
self.variance = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance']
|
||||
|
||||
def K(self, X, X2, target):
|
||||
target += self.variance * self.fixed_K
|
||||
|
||||
def _param_grad_helper(self, partial, X, X2, target):
|
||||
target += (partial * self.fixed_K).sum()
|
||||
|
||||
def gradients_X(self, partial, X, X2, target):
|
||||
pass
|
||||
|
||||
def dKdiag_dX(self, partial, X, target):
|
||||
pass
|
||||
154
GPy/kern/_src/todo/gibbs.py
Normal file
154
GPy/kern/_src/todo/gibbs.py
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...util.linalg import tdot
|
||||
from ...core.mapping import Mapping
|
||||
import GPy
|
||||
|
||||
class Gibbs(Kernpart):
|
||||
"""
|
||||
Gibbs non-stationary covariance function.
|
||||
|
||||
.. math::
|
||||
|
||||
r = sqrt((x_i - x_j)'*(x_i - x_j))
|
||||
|
||||
k(x_i, x_j) = \sigma^2*Z*exp(-r^2/(l(x)*l(x) + l(x')*l(x')))
|
||||
|
||||
Z = (2*l(x)*l(x')/(l(x)*l(x) + l(x')*l(x')^{q/2}
|
||||
|
||||
where :math:`l(x)` is a function giving the length scale as a function of space and :math:`q` is the dimensionality of the input space.
|
||||
This is the non stationary kernel proposed by Mark Gibbs in his 1997
|
||||
thesis. It is similar to an RBF but has a length scale that varies
|
||||
with input location. This leads to an additional term in front of
|
||||
the kernel.
|
||||
|
||||
The parameters are :math:`\sigma^2`, the process variance, and
|
||||
the parameters of l(x) which is a function that can be
|
||||
specified by the user, by default an multi-layer peceptron is
|
||||
used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter \sigma^2_w), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
See Mark Gibbs's thesis for more details: Gibbs,
|
||||
M. N. (1997). Bayesian Gaussian Processes for Regression and
|
||||
Classification. PhD thesis, Department of Physics, University of
|
||||
Cambridge. Or also see Page 93 of Gaussian Processes for Machine
|
||||
Learning by Rasmussen and Williams. Although note that we do not
|
||||
constrain the lengthscale to be positive by default. This allows
|
||||
anticorrelation to occur. The positive constraint can be included
|
||||
by the user manually.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., mapping=None, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if not mapping:
|
||||
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
|
||||
if not ARD:
|
||||
self.num_params=1+mapping.num_params
|
||||
else:
|
||||
raise NotImplementedError
|
||||
|
||||
self.mapping = mapping
|
||||
self.name='gibbs'
|
||||
self._set_params(np.hstack((variance, self.mapping._get_params())))
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance, self.mapping._get_params()))
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variance = x[0]
|
||||
self.mapping._set_params(x[1:])
|
||||
|
||||
def _get_param_names(self):
|
||||
return ['variance'] + self.mapping._get_param_names()
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
self._K_computations(X, X2)
|
||||
target += self.variance*self._K_dvar
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
np.add(target, self.variance, target)
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
self._dK_computations(dL_dK)
|
||||
if X2==None:
|
||||
gmapping = self.mapping.df_dtheta(2*self._dL_dl[:, None], X)
|
||||
else:
|
||||
gmapping = self.mapping.df_dtheta(self._dL_dl[:, None], X)
|
||||
gmapping += self.mapping.df_dtheta(self._dL_dl_two[:, None], X2)
|
||||
|
||||
target+= np.hstack([(dL_dK*self._K_dvar).sum(), gmapping])
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
# First account for gradients arising from presence of X in exponent.
|
||||
self._K_computations(X, X2)
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_co
|
||||
gradients_X = (-2.*self.variance)*np.transpose((self._K_dvar/self._w2)[:, :, None]*_K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X*dL_dK.T[:, :, None], 0)
|
||||
# Now account for gradients arising from presence of X in lengthscale.
|
||||
self._dK_computations(dL_dK)
|
||||
if X2 is None:
|
||||
target += 2.*self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
else:
|
||||
target += self.mapping.df_dX(self._dL_dl[:, None], X)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
pass
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target[0] += np.sum(dL_dKdiag)
|
||||
|
||||
|
||||
|
||||
def _K_computations(self, X, X2=None):
|
||||
"""Pre-computations for the covariance function (used both when computing the covariance and its gradients). Here self._dK_dvar and self._K_dist2 are updated."""
|
||||
self._lengthscales=self.mapping.f(X)
|
||||
self._lengthscales2=np.square(self._lengthscales)
|
||||
if X2==None:
|
||||
self._lengthscales_two = self._lengthscales
|
||||
self._lengthscales_two2 = self._lengthscales2
|
||||
Xsquare = np.square(X).sum(1)
|
||||
self._K_dist2 = -2.*tdot(X) + Xsquare[:, None] + Xsquare[None, :]
|
||||
else:
|
||||
self._lengthscales_two = self.mapping.f(X2)
|
||||
self._lengthscales_two2 = np.square(self._lengthscales_two)
|
||||
self._K_dist2 = -2.*np.dot(X, X2.T) + np.square(X).sum(1)[:, None] + np.square(X2).sum(1)[None, :]
|
||||
self._w2 = self._lengthscales2 + self._lengthscales_two2.T
|
||||
prod_length = self._lengthscales*self._lengthscales_two.T
|
||||
self._K_exponential = np.exp(-self._K_dist2/self._w2)
|
||||
self._K_dvar = np.sign(prod_length)*(2*np.abs(prod_length)/self._w2)**(self.input_dim/2.)*np.exp(-self._K_dist2/self._w2)
|
||||
|
||||
def _dK_computations(self, dL_dK):
|
||||
"""Pre-computations for the gradients of the covaraince function. Here the gradient of the covariance with respect to all the individual lengthscales is computed.
|
||||
:param dL_dK: the gradient of the objective with respect to the covariance function.
|
||||
:type dL_dK: ndarray"""
|
||||
|
||||
self._dL_dl = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales_two.T**4 - self._lengthscales**4) + 2*self._lengthscales2*self._K_dist2)/(self._w2*self._w2*self._lengthscales)).sum(1)
|
||||
if self._lengthscales_two is self._lengthscales:
|
||||
self._dL_dl_two = None
|
||||
else:
|
||||
self._dL_dl_two = (dL_dK*self.variance*self._K_dvar*(self.input_dim/2.*(self._lengthscales**4 - self._lengthscales_two.T**4 ) + 2*self._lengthscales_two2.T*self._K_dist2)/(self._w2*self._w2*self._lengthscales_two.T)).sum(0)
|
||||
104
GPy/kern/_src/todo/hetero.py
Normal file
104
GPy/kern/_src/todo/hetero.py
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from IPython.core.debugger import Tracer; debug_here=Tracer()
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...util.linalg import tdot
|
||||
from ...core.mapping import Mapping
|
||||
import GPy
|
||||
|
||||
class Hetero(Kernpart):
|
||||
"""
|
||||
TODO: Need to constrain the function outputs
|
||||
positive (still thinking of best way of doing this!!! Yes, intend to use
|
||||
transformations, but what's the *best* way). Currently just squaring output.
|
||||
|
||||
Heteroschedastic noise which depends on input location. See, for example,
|
||||
this paper by Goldberg et al.
|
||||
|
||||
.. math::
|
||||
|
||||
k(x_i, x_j) = \delta_{i,j} \sigma^2(x_i)
|
||||
|
||||
where :math:`\sigma^2(x)` is a function giving the variance as a function of input space and :math:`\delta_{i,j}` is the Kronecker delta function.
|
||||
|
||||
The parameters are the parameters of \sigma^2(x) which is a
|
||||
function that can be specified by the user, by default an
|
||||
multi-layer peceptron is used.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param mapping: the mapping that gives the lengthscale across the input space (by default GPy.mappings.MLP is used with 20 hidden nodes).
|
||||
:type mapping: GPy.core.Mapping
|
||||
:rtype: Kernpart object
|
||||
|
||||
See this paper:
|
||||
|
||||
Goldberg, P. W. Williams, C. K. I. and Bishop,
|
||||
C. M. (1998) Regression with Input-dependent Noise: a Gaussian
|
||||
Process Treatment In Advances in Neural Information Processing
|
||||
Systems, Volume 10, pp. 493-499. MIT Press
|
||||
|
||||
for a Gaussian process treatment of this problem.
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, mapping=None, transform=None):
|
||||
self.input_dim = input_dim
|
||||
if not mapping:
|
||||
mapping = GPy.mappings.MLP(output_dim=1, hidden_dim=20, input_dim=input_dim)
|
||||
if not transform:
|
||||
transform = GPy.core.transformations.logexp()
|
||||
|
||||
self.transform = transform
|
||||
self.mapping = mapping
|
||||
self.name='hetero'
|
||||
self.num_params=self.mapping.num_params
|
||||
self._set_params(self.mapping._get_params())
|
||||
|
||||
def _get_params(self):
|
||||
return self.mapping._get_params()
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.mapping._set_params(x)
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.mapping._get_param_names()
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
if (X2 is None) or (X2 is X):
|
||||
target[np.diag_indices_from(target)] += self._Kdiag(X)
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
target+=self._Kdiag(X)
|
||||
|
||||
def _Kdiag(self, X):
|
||||
"""Helper function for computing the diagonal elements of the covariance."""
|
||||
return self.mapping.f(X).flatten()**2
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
if (X2 is None) or (X2 is X):
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dtheta(dL_dKdiag, X, target)
|
||||
|
||||
def dKdiag_dtheta(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to parameters."""
|
||||
target += 2.*self.mapping.df_dtheta(dL_dKdiag[:, None]*self.mapping.f(X), X)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X."""
|
||||
if X2==None or X2 is X:
|
||||
dL_dKdiag = dL_dK.flat[::dL_dK.shape[0]+1]
|
||||
self.dKdiag_dX(dL_dKdiag, X, target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X."""
|
||||
target += 2.*self.mapping.df_dX(dL_dKdiag[:, None], X)*self.mapping.f(X)
|
||||
|
||||
|
||||
|
||||
38
GPy/kern/_src/todo/odekern1.c
Normal file
38
GPy/kern/_src/todo/odekern1.c
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#include <math.h>
|
||||
|
||||
double k_uu(t1,t2,theta1,theta2,sig1,sig2)
|
||||
{
|
||||
double kern=0;
|
||||
double dist=0;
|
||||
|
||||
dist = sqrt(t2*t2-t1*t1)
|
||||
|
||||
kern = sig1*(1+theta1*dist)*exp(-theta1*dist)
|
||||
|
||||
return kern;
|
||||
}
|
||||
|
||||
|
||||
|
||||
double k_yy(t1, t2, theta1,theta2,sig1,sig2)
|
||||
{
|
||||
double kern=0;
|
||||
double dist=0;
|
||||
|
||||
dist = sqrt(t2*t2-t1*t1)
|
||||
|
||||
kern = sig1*sig2 * ( exp(-theta1*dist)*(theta2-2*theta1+theta1*theta2*dist-theta1*theta1*dist) +
|
||||
exp(-dist) ) / ((theta2-theta1)*(theta2-theta1))
|
||||
|
||||
return kern;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
138
GPy/kern/_src/todo/poly.py
Normal file
138
GPy/kern/_src/todo/poly.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
# Copyright (c) 2013, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
four_over_tau = 2./np.pi
|
||||
|
||||
class POLY(Kernpart):
|
||||
"""
|
||||
|
||||
Polynomial kernel parameter initialisation. Included for completeness, but generally not recommended, is the polynomial kernel:
|
||||
|
||||
.. math::
|
||||
k(x, y) = \sigma^2\*(\sigma_w^2 x'y+\sigma_b^b)^d
|
||||
|
||||
The kernel parameters are :math:`\sigma^2` (variance), :math:`\sigma^2_w`
|
||||
(weight_variance), :math:`\sigma^2_b` (bias_variance) and d
|
||||
(degree). Only gradients of the first three are provided for
|
||||
kernel optimisation, it is assumed that polynomial degree would
|
||||
be set by hand.
|
||||
|
||||
The kernel is not recommended as it is badly behaved when the
|
||||
:math:`\sigma^2_w\*x'\*y + \sigma^2_b` has a magnitude greater than one. For completeness
|
||||
there is an automatic relevance determination version of this
|
||||
kernel provided (NOTE YET IMPLEMENTED!).
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance :math:`\sigma^2`
|
||||
:type variance: float
|
||||
:param weight_variance: the vector of the variances of the prior over input weights in the neural network :math:`\sigma^2_w`
|
||||
:type weight_variance: array or list of the appropriate size (or float if there is only one weight variance parameter)
|
||||
:param bias_variance: the variance of the prior over bias parameters :math:`\sigma^2_b`
|
||||
:param degree: the degree of the polynomial.
|
||||
:type degree: int
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one weight variance parameter :math:`\sigma^2_w`), otherwise there is one weight variance parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: Kernpart object
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., weight_variance=None, bias_variance=1., degree=2, ARD=False):
|
||||
self.input_dim = input_dim
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
self.num_params=3
|
||||
if weight_variance is not None:
|
||||
weight_variance = np.asarray(weight_variance)
|
||||
assert weight_variance.size == 1, "Only one weight variance needed for non-ARD kernel"
|
||||
else:
|
||||
weight_variance = 1.*np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 2
|
||||
if weight_variance is not None:
|
||||
weight_variance = np.asarray(weight_variance)
|
||||
assert weight_variance.size == self.input_dim, "bad number of weight variances"
|
||||
else:
|
||||
weight_variance = np.ones(self.input_dim)
|
||||
raise NotImplementedError
|
||||
self.degree=degree
|
||||
self.name='poly_deg' + str(self.degree)
|
||||
self._set_params(np.hstack((variance, weight_variance.flatten(), bias_variance)))
|
||||
|
||||
def _get_params(self):
|
||||
return np.hstack((self.variance, self.weight_variance.flatten(), self.bias_variance))
|
||||
|
||||
def _set_params(self, x):
|
||||
assert x.size == (self.num_params)
|
||||
self.variance = x[0]
|
||||
self.weight_variance = x[1:-1]
|
||||
self.weight_std = np.sqrt(self.weight_variance)
|
||||
self.bias_variance = x[-1]
|
||||
|
||||
def _get_param_names(self):
|
||||
if self.num_params == 3:
|
||||
return ['variance', 'weight_variance', 'bias_variance']
|
||||
else:
|
||||
return ['variance'] + ['weight_variance_%i' % i for i in range(self.lengthscale.size)] + ['bias_variance']
|
||||
|
||||
def K(self, X, X2, target):
|
||||
"""Return covariance between X and X2."""
|
||||
self._K_computations(X, X2)
|
||||
target += self.variance*self._K_dvar
|
||||
|
||||
def Kdiag(self, X, target):
|
||||
"""Compute the diagonal of the covariance matrix for X."""
|
||||
self._K_diag_computations(X)
|
||||
target+= self.variance*self._K_diag_dvar
|
||||
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance with respect to the parameters."""
|
||||
self._K_computations(X, X2)
|
||||
base = self.variance*self.degree*self._K_poly_arg**(self.degree-1)
|
||||
base_cov_grad = base*dL_dK
|
||||
|
||||
|
||||
|
||||
target[0] += np.sum(self._K_dvar*dL_dK)
|
||||
target[1] += (self._K_inner_prod*base_cov_grad).sum()
|
||||
target[2] += base_cov_grad.sum()
|
||||
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
"""Derivative of the covariance matrix with respect to X"""
|
||||
self._K_computations(X, X2)
|
||||
arg = self._K_poly_arg
|
||||
if X2 is None:
|
||||
target += 2*self.weight_variance*self.degree*self.variance*(((X[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
else:
|
||||
target += self.weight_variance*self.degree*self.variance*(((X2[None,:, :])) *(arg**(self.degree-1))[:, :, None]*dL_dK[:, :, None]).sum(1)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
"""Gradient of diagonal of covariance with respect to X"""
|
||||
self._K_diag_computations(X)
|
||||
arg = self._K_diag_poly_arg
|
||||
target += 2.*self.weight_variance*self.degree*self.variance*X*dL_dKdiag[:, None]*(arg**(self.degree-1))[:, None]
|
||||
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if self.ARD:
|
||||
pass
|
||||
else:
|
||||
if X2 is None:
|
||||
self._K_inner_prod = np.dot(X,X.T)
|
||||
else:
|
||||
self._K_inner_prod = np.dot(X,X2.T)
|
||||
self._K_poly_arg = self._K_inner_prod*self.weight_variance + self.bias_variance
|
||||
self._K_dvar = self._K_poly_arg**self.degree
|
||||
|
||||
def _K_diag_computations(self, X):
|
||||
if self.ARD:
|
||||
pass
|
||||
else:
|
||||
self._K_diag_poly_arg = (X*X).sum(1)*self.weight_variance + self.bias_variance
|
||||
self._K_diag_dvar = self._K_diag_poly_arg**self.degree
|
||||
|
||||
|
||||
|
||||
|
||||
336
GPy/kern/_src/todo/rbf_inv.py
Normal file
336
GPy/kern/_src/todo/rbf_inv.py
Normal file
|
|
@ -0,0 +1,336 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from rbf import RBF
|
||||
import numpy as np
|
||||
from scipy import weave
|
||||
from ...util.linalg import tdot
|
||||
from ...core.parameterization import Param
|
||||
|
||||
class RBFInv(RBF):
|
||||
"""
|
||||
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel. It only
|
||||
differs from RBF in that here the parametrization is wrt the inverse lengthscale:
|
||||
|
||||
.. math::
|
||||
|
||||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
|
||||
|
||||
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
|
||||
|
||||
:param input_dim: the number of input dimensions
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
:param lengthscale: the vector of lengthscale of the kernel
|
||||
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
|
||||
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
|
||||
:type ARD: Boolean
|
||||
:rtype: kernel object
|
||||
|
||||
.. Note: this object implements both the ARD and 'spherical' version of the function
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance=1., inv_lengthscale=None, ARD=False, name='inverse rbf'):
|
||||
#self.input_dim = input_dim
|
||||
#self.name = 'rbf_inv'
|
||||
if inv_lengthscale is not None: lengthscale = 1./np.array(inv_lengthscale)
|
||||
else: lengthscale = None
|
||||
super(RBFInv, self).__init__(input_dim, variance=variance, lengthscale=lengthscale, ARD=ARD, name=name)
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
self.num_params = 2
|
||||
if inv_lengthscale is not None:
|
||||
inv_lengthscale = np.asarray(inv_lengthscale)
|
||||
assert inv_lengthscale.size == 1, "Only one lengthscale needed for non-ARD kernel"
|
||||
else:
|
||||
inv_lengthscale = np.ones(1)
|
||||
else:
|
||||
self.num_params = self.input_dim + 1
|
||||
if inv_lengthscale is not None:
|
||||
inv_lengthscale = np.asarray(inv_lengthscale)
|
||||
assert inv_lengthscale.size == self.input_dim, "bad number of lengthscales"
|
||||
else:
|
||||
inv_lengthscale = np.ones(self.input_dim)
|
||||
|
||||
self.variance = Param('variance', variance)
|
||||
self.inv_lengthscale = Param('sensitivity', inv_lengthscale)
|
||||
self.inv_lengthscale.add_observer(self, self.update_inv_lengthscale)
|
||||
self.remove_parameter(self.lengthscale)
|
||||
self.add_parameters(self.variance, self.inv_lengthscale)
|
||||
#self._set_params(np.hstack((variance, inv_lengthscale.flatten())))
|
||||
|
||||
# initialize cache
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
|
||||
# a set of optional args to pass to weave
|
||||
self.weave_options = {'headers' : ['<omp.h>'],
|
||||
'extra_compile_args': ['-fopenmp -O3'], # -march=native'],
|
||||
'extra_link_args' : ['-lgomp']}
|
||||
|
||||
|
||||
|
||||
# def _get_params(self):
|
||||
# return np.hstack((self.variance, self.inv_lengthscale))
|
||||
|
||||
def update_inv_lengthscale(self, il):
|
||||
self.inv_lengthscale2 = np.square(self.inv_lengthscale)
|
||||
# TODO: We can rewrite everything with inv_lengthscale and never need to do the below
|
||||
self.lengthscale = 1. / self.inv_lengthscale
|
||||
self.lengthscale2 = np.square(self.lengthscale)
|
||||
|
||||
#def _set_params(self, x):
|
||||
def parameters_changed(self):
|
||||
#assert x.size == (self.num_params)
|
||||
#self.variance = x[0]
|
||||
#self.inv_lengthscale = x[1:]
|
||||
# reset cached results
|
||||
self._X, self._X2, self._params = np.empty(shape=(3, 1))
|
||||
self._Z, self._mu, self._S = np.empty(shape=(3, 1)) # cached versions of Z,mu,S
|
||||
|
||||
# def _get_param_names(self):
|
||||
# if self.num_params == 2:
|
||||
# return ['variance', 'inv_lengthscale']
|
||||
# else:
|
||||
# return ['variance'] + ['inv_lengthscale%i' % i for i in range(self.inv_lengthscale.size)]
|
||||
|
||||
# TODO: Rewrite computations so that lengthscale is not needed (but only inv. lengthscale)
|
||||
def _param_grad_helper(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
target[0] += np.sum(self._K_dvar * dL_dK)
|
||||
if self.ARD:
|
||||
dvardLdK = self._K_dvar * dL_dK
|
||||
var_len3 = self.variance / np.power(self.lengthscale, 3)
|
||||
len2 = self.lengthscale2
|
||||
if X2 is None:
|
||||
# save computation for the symmetrical case
|
||||
dvardLdK = dvardLdK + dvardLdK.T
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<i; j++){
|
||||
tmp += (X(i,q)-X(j,q))*(X(i,q)-X(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q+1) += var_len3(q)*tmp*(-len2(q));
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X.shape[0], self.input_dim
|
||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||
else:
|
||||
code = """
|
||||
int q,i,j;
|
||||
double tmp;
|
||||
for(q=0; q<input_dim; q++){
|
||||
tmp = 0;
|
||||
for(i=0; i<num_data; i++){
|
||||
for(j=0; j<num_inducing; j++){
|
||||
tmp += (X(i,q)-X2(j,q))*(X(i,q)-X2(j,q))*dvardLdK(i,j);
|
||||
}
|
||||
}
|
||||
target(q+1) += var_len3(q)*tmp*(-len2(q));
|
||||
}
|
||||
"""
|
||||
num_data, num_inducing, input_dim = X.shape[0], X2.shape[0], self.input_dim
|
||||
# [np.add(target[1+q:2+q],var_len3[q]*np.sum(dvardLdK*np.square(X[:,q][:,None]-X2[:,q][None,:])),target[1+q:2+q]) for q in range(self.input_dim)]
|
||||
weave.inline(code, arg_names=['num_data', 'num_inducing', 'input_dim', 'X', 'X2', 'target', 'dvardLdK', 'var_len3', 'len2'], type_converters=weave.converters.blitz, **self.weave_options)
|
||||
else:
|
||||
target[1] += (self.variance / self.lengthscale) * np.sum(self._K_dvar * self._K_dist2 * dL_dK) * (-self.lengthscale2)
|
||||
|
||||
def gradients_X(self, dL_dK, X, X2, target):
|
||||
self._K_computations(X, X2)
|
||||
if X2 is None:
|
||||
_K_dist = 2*(X[:, None, :] - X[None, :, :])
|
||||
else:
|
||||
_K_dist = X[:, None, :] - X2[None, :, :] # don't cache this in _K_computations because it is high memory. If this function is being called, chances are we're not in the high memory arena.
|
||||
gradients_X = (-self.variance * self.inv_lengthscale2) * np.transpose(self._K_dvar[:, :, np.newaxis] * _K_dist, (1, 0, 2))
|
||||
target += np.sum(gradients_X * dL_dK.T[:, :, None], 0)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
pass
|
||||
|
||||
|
||||
#---------------------------------------#
|
||||
# PSI statistics #
|
||||
#---------------------------------------#
|
||||
|
||||
# def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||
# self._psi_computations(Z, mu, S)
|
||||
# denom_deriv = S[:, None, :] / (self.lengthscale ** 3 + self.lengthscale * S[:, None, :])
|
||||
# d_length = self._psi1[:, :, None] * (self.lengthscale * np.square(self._psi1_dist / (self.lengthscale2 + S[:, None, :])) + denom_deriv)
|
||||
# target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
||||
# dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||
# if not self.ARD:
|
||||
# target[1] += dpsi1_dlength.sum()*(-self.lengthscale2)
|
||||
# else:
|
||||
# target[1:] += dpsi1_dlength.sum(0).sum(0)*(-self.lengthscale2)
|
||||
# #target[1:] = target[1:]*(-self.lengthscale2)
|
||||
|
||||
def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = 1 + S[:, None, :] * self.inv_lengthscale2
|
||||
# d_inv_length_old = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / (self.lengthscale * self._psi1_denom) + self.inv_lengthscale) / self.inv_lengthscale2
|
||||
d_length = -(self._psi1[:, :, None] * ((np.square(self._psi1_dist) * self.inv_lengthscale) / (tmp ** 2) + (S[:, None, :] * self.inv_lengthscale) / (tmp)))
|
||||
# d_inv_length = -self._psi1[:, :, None] * ((self._psi1_dist_sq - 1.) / self._psi1_denom + self.lengthscale)
|
||||
target[0] += np.sum(dL_dpsi1 * self._psi1 / self.variance)
|
||||
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
|
||||
if not self.ARD:
|
||||
target[1] += dpsi1_dlength.sum() # *(-self.lengthscale2)
|
||||
else:
|
||||
target[1:] += dpsi1_dlength.sum(0).sum(0) # *(-self.lengthscale2)
|
||||
# target[1:] = target[1:]*(-self.lengthscale2)
|
||||
|
||||
def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
dpsi1_dZ = -self._psi1[:, :, None] * ((self.inv_lengthscale2 * self._psi1_dist) / self._psi1_denom)
|
||||
target += np.sum(dL_dpsi1[:, :, None] * dpsi1_dZ, 0)
|
||||
|
||||
def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = (self._psi1[:, :, None] * self.inv_lengthscale2) / self._psi1_denom
|
||||
target_mu += np.sum(dL_dpsi1[:, :, None] * tmp * self._psi1_dist, 1)
|
||||
target_S += np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (self._psi1_dist_sq - 1), 1)
|
||||
|
||||
def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
|
||||
"""Shape N,num_inducing,num_inducing,Ntheta"""
|
||||
self._psi_computations(Z, mu, S)
|
||||
d_var = 2.*self._psi2 / self.variance
|
||||
# d_length = 2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] / self.lengthscale2) / (self.lengthscale * self._psi2_denom)
|
||||
d_length = -2.*self._psi2[:, :, :, None] * (self._psi2_Zdist_sq * self._psi2_denom + self._psi2_mudist_sq + S[:, None, None, :] * self.inv_lengthscale2) / (self.inv_lengthscale * self._psi2_denom)
|
||||
target[0] += np.sum(dL_dpsi2 * d_var)
|
||||
dpsi2_dlength = d_length * dL_dpsi2[:, :, :, None]
|
||||
if not self.ARD:
|
||||
target[1] += dpsi2_dlength.sum() # *(-self.lengthscale2)
|
||||
else:
|
||||
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0) # *(-self.lengthscale2)
|
||||
# target[1:] = target[1:]*(-self.lengthscale2)
|
||||
|
||||
def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
|
||||
self._psi_computations(Z, mu, S)
|
||||
term1 = self._psi2_Zdist * self.inv_lengthscale2 # num_inducing, num_inducing, input_dim
|
||||
term2 = (self._psi2_mudist * self.inv_lengthscale2) / self._psi2_denom # N, num_inducing, num_inducing, input_dim
|
||||
dZ = self._psi2[:, :, :, None] * (term1[None] + term2)
|
||||
target += (dL_dpsi2[:, :, :, None] * dZ).sum(0).sum(0)
|
||||
|
||||
def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
|
||||
"""Think N,num_inducing,num_inducing,input_dim """
|
||||
self._psi_computations(Z, mu, S)
|
||||
tmp = (self.inv_lengthscale2 * self._psi2[:, :, :, None]) / self._psi2_denom
|
||||
target_mu += -2.*(dL_dpsi2[:, :, :, None] * tmp * self._psi2_mudist).sum(1).sum(1)
|
||||
target_S += (dL_dpsi2[:, :, :, None] * tmp * (2.*self._psi2_mudist_sq - 1)).sum(1).sum(1)
|
||||
|
||||
#---------------------------------------#
|
||||
# Precomputations #
|
||||
#---------------------------------------#
|
||||
|
||||
def _K_computations(self, X, X2):
|
||||
if not (np.array_equal(X, self._X) and np.array_equal(X2, self._X2) and np.array_equal(self._params , self._get_params())):
|
||||
self._X = X.copy()
|
||||
self._params = self._get_params().copy()
|
||||
if X2 is None:
|
||||
self._X2 = None
|
||||
X = X * self.inv_lengthscale
|
||||
Xsquare = np.sum(np.square(X), 1)
|
||||
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
|
||||
else:
|
||||
self._X2 = X2.copy()
|
||||
X = X * self.inv_lengthscale
|
||||
X2 = X2 * self.inv_lengthscale
|
||||
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), 1)[:, None] + np.sum(np.square(X2), 1)[None, :])
|
||||
self._K_dvar = np.exp(-0.5 * self._K_dist2)
|
||||
|
||||
def _psi_computations(self, Z, mu, S):
|
||||
# here are the "statistics" for psi1 and psi2
|
||||
if not np.array_equal(Z, self._Z):
|
||||
# Z has changed, compute Z specific stuff
|
||||
self._psi2_Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
|
||||
self._psi2_Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
|
||||
self._psi2_Zdist_sq = np.square(self._psi2_Zdist * self.inv_lengthscale) # M,M,Q
|
||||
|
||||
if not (np.array_equal(Z, self._Z) and np.array_equal(mu, self._mu) and np.array_equal(S, self._S)):
|
||||
# something's changed. recompute EVERYTHING
|
||||
|
||||
# psi1
|
||||
self._psi1_denom = S[:, None, :] * self.inv_lengthscale2 + 1.
|
||||
self._psi1_dist = Z[None, :, :] - mu[:, None, :]
|
||||
self._psi1_dist_sq = (np.square(self._psi1_dist) * self.inv_lengthscale2) / self._psi1_denom
|
||||
self._psi1_exponent = -0.5 * np.sum(self._psi1_dist_sq + np.log(self._psi1_denom), -1)
|
||||
self._psi1 = self.variance * np.exp(self._psi1_exponent)
|
||||
|
||||
# psi2
|
||||
self._psi2_denom = 2.*S[:, None, None, :] * self.inv_lengthscale2 + 1. # N,M,M,Q
|
||||
self._psi2_mudist, self._psi2_mudist_sq, self._psi2_exponent, _ = self.weave_psi2(mu, self._psi2_Zhat)
|
||||
# self._psi2_mudist = mu[:,None,None,:]-self._psi2_Zhat #N,M,M,Q
|
||||
# self._psi2_mudist_sq = np.square(self._psi2_mudist)/(self.lengthscale2*self._psi2_denom)
|
||||
# self._psi2_exponent = np.sum(-self._psi2_Zdist_sq -self._psi2_mudist_sq -0.5*np.log(self._psi2_denom),-1) #N,M,M,Q
|
||||
self._psi2 = np.square(self.variance) * np.exp(self._psi2_exponent) # N,M,M,Q
|
||||
|
||||
# store matrices for caching
|
||||
self._Z, self._mu, self._S = Z, mu, S
|
||||
|
||||
def weave_psi2(self, mu, Zhat):
|
||||
N, input_dim = mu.shape
|
||||
num_inducing = Zhat.shape[0]
|
||||
|
||||
mudist = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||
mudist_sq = np.empty((N, num_inducing, num_inducing, input_dim))
|
||||
psi2_exponent = np.zeros((N, num_inducing, num_inducing))
|
||||
psi2 = np.empty((N, num_inducing, num_inducing))
|
||||
|
||||
psi2_Zdist_sq = self._psi2_Zdist_sq
|
||||
_psi2_denom = self._psi2_denom.squeeze().reshape(N, self.input_dim)
|
||||
half_log_psi2_denom = 0.5 * np.log(self._psi2_denom).squeeze().reshape(N, self.input_dim)
|
||||
variance_sq = float(np.square(self.variance))
|
||||
if self.ARD:
|
||||
inv_lengthscale2 = self.inv_lengthscale2
|
||||
else:
|
||||
inv_lengthscale2 = np.ones(input_dim) * self.inv_lengthscale2
|
||||
code = """
|
||||
double tmp;
|
||||
|
||||
#pragma omp parallel for private(tmp)
|
||||
for (int n=0; n<N; n++){
|
||||
for (int m=0; m<num_inducing; m++){
|
||||
for (int mm=0; mm<(m+1); mm++){
|
||||
for (int q=0; q<input_dim; q++){
|
||||
//compute mudist
|
||||
tmp = mu(n,q) - Zhat(m,mm,q);
|
||||
mudist(n,m,mm,q) = tmp;
|
||||
mudist(n,mm,m,q) = tmp;
|
||||
|
||||
//now mudist_sq
|
||||
tmp = tmp*tmp*inv_lengthscale2(q)/_psi2_denom(n,q);
|
||||
mudist_sq(n,m,mm,q) = tmp;
|
||||
mudist_sq(n,mm,m,q) = tmp;
|
||||
|
||||
//now psi2_exponent
|
||||
tmp = -psi2_Zdist_sq(m,mm,q) - tmp - half_log_psi2_denom(n,q);
|
||||
psi2_exponent(n,mm,m) += tmp;
|
||||
if (m !=mm){
|
||||
psi2_exponent(n,m,mm) += tmp;
|
||||
}
|
||||
//psi2 would be computed like this, but np is faster
|
||||
//tmp = variance_sq*exp(psi2_exponent(n,m,mm));
|
||||
//psi2(n,m,mm) = tmp;
|
||||
//psi2(n,mm,m) = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
"""
|
||||
|
||||
support_code = """
|
||||
#include <omp.h>
|
||||
#include <math.h>
|
||||
"""
|
||||
weave.inline(code, support_code=support_code, libraries=['gomp'],
|
||||
arg_names=['N', 'num_inducing', 'input_dim', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'inv_lengthscale2', '_psi2_denom', 'psi2_Zdist_sq', 'psi2_exponent', 'half_log_psi2_denom', 'psi2', 'variance_sq'],
|
||||
type_converters=weave.converters.blitz, **self.weave_options)
|
||||
|
||||
return mudist, mudist_sq, psi2_exponent, psi2
|
||||
61
GPy/kern/_src/todo/spline.py
Normal file
61
GPy/kern/_src/todo/spline.py
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
from ...core.parameterization import Param
|
||||
|
||||
def theta(x):
|
||||
"""Heaviside step function"""
|
||||
return np.where(x>=0.,1.,0.)
|
||||
|
||||
class Spline(Kernpart):
|
||||
"""
|
||||
Spline kernel
|
||||
|
||||
:param input_dim: the number of input dimensions (fixed to 1 right now TODO)
|
||||
:type input_dim: int
|
||||
:param variance: the variance of the kernel
|
||||
:type variance: float
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self,input_dim,variance=1.,lengthscale=1.):
|
||||
self.input_dim = input_dim
|
||||
assert self.input_dim==1
|
||||
self.num_params = 1
|
||||
self.name = 'spline'
|
||||
self.variance = Param('variance', variance)
|
||||
self.lengthscale = Param('lengthscale', lengthscale)
|
||||
self.add_parameters(self.variance, self.lengthscale)
|
||||
|
||||
# def _get_params(self):
|
||||
# return self.variance
|
||||
#
|
||||
# def _set_params(self,x):
|
||||
# self.variance = x
|
||||
#
|
||||
# def _get_param_names(self):
|
||||
# return ['variance']
|
||||
|
||||
def K(self,X,X2,target):
|
||||
assert np.all(X>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
|
||||
assert np.all(X2>0), "Spline covariance is for +ve domain only. TODO: symmetrise"
|
||||
t = X
|
||||
s = X2.T
|
||||
s_t = s-t # broadcasted subtraction
|
||||
target += self.variance*(0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
target += self.variance*X.flatten()**3/3.
|
||||
|
||||
def _param_grad_helper(self,X,X2,target):
|
||||
target += 0.5*(t*s**2) - s**3/6. + (s_t)**3*theta(s_t)/6.
|
||||
|
||||
def dKdiag_dtheta(self,X,target):
|
||||
target += X.flatten()**3/3.
|
||||
|
||||
def dKdiag_dX(self,X,target):
|
||||
target += self.variance*X**2
|
||||
|
||||
81
GPy/kern/_src/todo/symmetric.py
Normal file
81
GPy/kern/_src/todo/symmetric.py
Normal file
|
|
@ -0,0 +1,81 @@
|
|||
# Copyright (c) 2012 James Hensman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from kernpart import Kernpart
|
||||
import numpy as np
|
||||
|
||||
class Symmetric(Kernpart):
|
||||
"""
|
||||
Symmetrical kernels
|
||||
|
||||
:param k: the kernel to symmetrify
|
||||
:type k: Kernpart
|
||||
:param transform: the transform to use in symmetrification (allows symmetry on specified axes)
|
||||
:type transform: A numpy array (input_dim x input_dim) specifiying the transform
|
||||
:rtype: Kernpart
|
||||
|
||||
"""
|
||||
def __init__(self,k,transform=None):
|
||||
if transform is None:
|
||||
transform = np.eye(k.input_dim)*-1.
|
||||
assert transform.shape == (k.input_dim, k.input_dim)
|
||||
self.transform = transform
|
||||
self.input_dim = k.input_dim
|
||||
self.num_params = k.num_params
|
||||
self.name = k.name + '_symm'
|
||||
self.k = k
|
||||
self.add_parameter(k)
|
||||
#self._set_params(k._get_params())
|
||||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
AX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.K(X,X2,target)
|
||||
self.k.K(AX,X2,target)
|
||||
self.k.K(X,AX2,target)
|
||||
self.k.K(AX,AX2,target)
|
||||
|
||||
def _param_grad_helper(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k._param_grad_helper(dL_dK,X,X2,target)
|
||||
self.k._param_grad_helper(dL_dK,AX,X2,target)
|
||||
self.k._param_grad_helper(dL_dK,X,AX2,target)
|
||||
self.k._param_grad_helper(dL_dK,AX,AX2,target)
|
||||
|
||||
|
||||
def gradients_X(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
AX = np.dot(X,self.transform)
|
||||
if X2 is None:
|
||||
X2 = X
|
||||
ZX2 = AX
|
||||
else:
|
||||
AX2 = np.dot(X2, self.transform)
|
||||
self.k.gradients_X(dL_dK, X, X2, target)
|
||||
self.k.gradients_X(dL_dK, AX, X2, target)
|
||||
self.k.gradients_X(dL_dK, X, AX2, target)
|
||||
self.k.gradients_X(dL_dK, AX ,AX2, target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
foo = np.zeros((X.shape[0],X.shape[0]))
|
||||
self.K(X,X,foo)
|
||||
target += np.diag(foo)
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
raise NotImplementedError
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
raise NotImplementedError
|
||||
Loading…
Add table
Add a link
Reference in a new issue