mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-04-24 20:36:23 +02:00
merge devel mrd
This commit is contained in:
commit
3f625a9347
8 changed files with 202 additions and 61 deletions
|
|
@ -13,6 +13,7 @@ import priors
|
|||
from ..util.linalg import jitchol
|
||||
from ..inference import optimization
|
||||
from .. import likelihoods
|
||||
import re
|
||||
|
||||
class model(parameterised):
|
||||
def __init__(self):
|
||||
|
|
@ -239,7 +240,7 @@ class model(parameterised):
|
|||
for s in positive_strings:
|
||||
for i in self.grep_param_names(s):
|
||||
if not (i in currently_constrained):
|
||||
to_make_positive.append(param_names[i])
|
||||
to_make_positive.append(re.escape(param_names[i]))
|
||||
if warn:
|
||||
print "Warning! constraining %s postive"%name
|
||||
if len(to_make_positive):
|
||||
|
|
|
|||
|
|
@ -103,10 +103,18 @@ class parameterised(object):
|
|||
return expr
|
||||
|
||||
def Nparam_transformed(self):
|
||||
ties = 0
|
||||
for ar in self.tied_indices:
|
||||
ties += ar.size - 1
|
||||
return self.Nparam - len(self.constrained_fixed_indices) - ties
|
||||
"""
|
||||
Compute the number of parameters after ties and fixing have been performed
|
||||
"""
|
||||
ties = 0
|
||||
for ti in self.tied_indices:
|
||||
ties += ti.size - 1
|
||||
|
||||
fixes = 0
|
||||
for fi in self.constrained_fixed_indices:
|
||||
fixes += len(fi)
|
||||
|
||||
return self.Nparam - fixes - ties
|
||||
|
||||
def constrain_positive(self, which):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -2,5 +2,9 @@
|
|||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, rbf_sympy, sympykern, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos
|
||||
from constructors import rbf, Matern32, Matern52, exponential, linear, white, bias, finite_dimensional, spline, Brownian, periodic_exponential, periodic_Matern32, periodic_Matern52, prod, prod_orthogonal, symmetric, coregionalise, rational_quadratic, fixed, rbfcos, independent_outputs
|
||||
try:
|
||||
from constructors import rbf_sympy, sympykern # these depend on sympy
|
||||
except:
|
||||
pass
|
||||
from kern import kern
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ from symmetric import symmetric as symmetric_part
|
|||
from coregionalise import coregionalise as coregionalise_part
|
||||
from rational_quadratic import rational_quadratic as rational_quadraticpart
|
||||
from rbfcos import rbfcos as rbfcospart
|
||||
from independent_outputs import independent_outputs as independent_output_part
|
||||
#TODO these s=constructors are not as clean as we'd like. Tidy the code up
|
||||
#using meta-classes to make the objects construct properly wthout them.
|
||||
|
||||
|
|
@ -165,34 +166,40 @@ def Brownian(D,variance=1.):
|
|||
part = Brownianpart(D,variance)
|
||||
return kern(D, [part])
|
||||
|
||||
import sympy as sp
|
||||
from sympykern import spkern
|
||||
from sympy.parsing.sympy_parser import parse_expr
|
||||
try:
|
||||
import sympy as sp
|
||||
from sympykern import spkern
|
||||
from sympy.parsing.sympy_parser import parse_expr
|
||||
sympy_available = True
|
||||
except ImportError:
|
||||
sympy_available = False
|
||||
|
||||
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
|
||||
"""
|
||||
Radial Basis Function covariance.
|
||||
"""
|
||||
X = [sp.var('x%i'%i) for i in range(D)]
|
||||
Z = [sp.var('z%i'%i) for i in range(D)]
|
||||
rbf_variance = sp.var('rbf_variance',positive=True)
|
||||
if ARD:
|
||||
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = rbf_variance*sp.exp(-dist/2.)
|
||||
else:
|
||||
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
||||
return kern(D,[spkern(D,f)])
|
||||
if sympy_available:
|
||||
def rbf_sympy(D,ARD=False,variance=1., lengthscale=1.):
|
||||
"""
|
||||
Radial Basis Function covariance.
|
||||
"""
|
||||
X = [sp.var('x%i'%i) for i in range(D)]
|
||||
Z = [sp.var('z%i'%i) for i in range(D)]
|
||||
rbf_variance = sp.var('rbf_variance',positive=True)
|
||||
if ARD:
|
||||
rbf_lengthscales = [sp.var('rbf_lengthscale_%i'%i,positive=True) for i in range(D)]
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2/rbf_lengthscale_%i**2'%(i,i,i) for i in range(D)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = rbf_variance*sp.exp(-dist/2.)
|
||||
else:
|
||||
rbf_lengthscale = sp.var('rbf_lengthscale',positive=True)
|
||||
dist_string = ' + '.join(['(x%i-z%i)**2'%(i,i) for i in range(D)])
|
||||
dist = parse_expr(dist_string)
|
||||
f = rbf_variance*sp.exp(-dist/(2*rbf_lengthscale**2))
|
||||
return kern(D,[spkern(D,f)])
|
||||
|
||||
def sympykern(D,k):
|
||||
"""
|
||||
A kernel from a symbolic sympy representation
|
||||
"""
|
||||
return kern(D,[spkern(D,k)])
|
||||
def sympykern(D,k):
|
||||
"""
|
||||
A kernel from a symbolic sympy representation
|
||||
"""
|
||||
return kern(D,[spkern(D,k)])
|
||||
del sympy_available
|
||||
|
||||
def periodic_exponential(D=1,variance=1., lengthscale=None, period=2*np.pi,n_freq=10,lower=0.,upper=4*np.pi):
|
||||
"""
|
||||
|
|
@ -318,3 +325,14 @@ def rbfcos(D,variance=1.,frequencies=None,bandwidths=None,ARD=False):
|
|||
"""
|
||||
part = rbfcospart(D,variance,frequencies,bandwidths,ARD)
|
||||
return kern(D,[part])
|
||||
|
||||
def independent_outputs(k):
|
||||
"""
|
||||
Construct a kernel with independent outputs from an existing kernel
|
||||
"""
|
||||
for sl in k.input_slices:
|
||||
assert (sl.start is None) and (sl.stop is None), "cannot adjust input slices! (TODO)"
|
||||
parts = [independent_output_part(p) for p in k.parts]
|
||||
return kern(k.D+1,parts)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -62,11 +62,16 @@ class coregionalise(kernpart):
|
|||
ii,jj = np.meshgrid(index,index2)
|
||||
ii,jj = ii.T, jj.T
|
||||
|
||||
#dL_dK_small = np.zeros_like(self.B)
|
||||
#for i in range(self.Nout):
|
||||
#for j in range(self.Nout):
|
||||
#tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
|
||||
#dL_dK_small[i,j] = tmp
|
||||
#as above, but slightly faster
|
||||
dL_dK_small = np.zeros_like(self.B)
|
||||
for i in range(self.Nout):
|
||||
for j in range(self.Nout):
|
||||
tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
|
||||
dL_dK_small[i,j] = tmp
|
||||
where_i = [ii==i for i in xrange(self.Nout)]
|
||||
where_j = [jj==j for j in xrange(self.Nout)]
|
||||
[[np.put(dL_dK_small,i+self.Nout*j,np.sum(dL_dK[np.logical_and(wi,wj)])) for i,wi in enumerate(where_i)] for j,wj in enumerate(where_j)]
|
||||
|
||||
dkappa = np.diag(dL_dK_small)
|
||||
dL_dK_small += dL_dK_small.T
|
||||
|
|
|
|||
97
GPy/kern/independent_outputs.py
Normal file
97
GPy/kern/independent_outputs.py
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
# Copyright (c) 2012, James Hesnsman
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
from kernpart import kernpart
|
||||
import numpy as np
|
||||
|
||||
def index_to_slices(index):
|
||||
"""
|
||||
take a numpy array of integers (index) and return a nested list of slices such that the slices describe the start, stop points for each integer in the index.
|
||||
|
||||
e.g.
|
||||
>>> index = np.asarray([0,0,0,1,1,1,2,2,2])
|
||||
returns
|
||||
>>> [[slice(0,3,None)],[slice(3,6,None)],[slice(6,9,None)]]
|
||||
|
||||
or, a more complicated example
|
||||
>>> index = np.asarray([0,0,1,1,0,2,2,2,1,1])
|
||||
returns
|
||||
>>> [[slice(0,2,None),slice(4,5,None)],[slice(2,4,None),slice(8,10,None)],[slice(5,8,None)]]
|
||||
"""
|
||||
|
||||
#contruct the return structure
|
||||
ind = np.asarray(index,dtype=np.int64)
|
||||
ret = [[] for i in range(ind.max()+1)]
|
||||
|
||||
#find the switchpoints
|
||||
ind_ = np.hstack((ind,ind[0]+ind[-1]+1))
|
||||
switchpoints = np.nonzero(ind_ - np.roll(ind_,+1))[0]
|
||||
|
||||
[ret[ind_i].append(slice(*indexes_i)) for ind_i,indexes_i in zip(ind[switchpoints[:-1]],zip(switchpoints,switchpoints[1:]))]
|
||||
return ret
|
||||
|
||||
class independent_outputs(kernpart):
|
||||
"""
|
||||
A kernel part shich can reopresent several independent functions.
|
||||
this kernel 'switches off' parts of the matrix where the output indexes are different.
|
||||
|
||||
The index of the functions is given by the last column in the input X
|
||||
the rest of the columns of X are passed to the kernel for computation (in blocks).
|
||||
|
||||
"""
|
||||
def __init__(self,k):
|
||||
self.D = k.D + 1
|
||||
self.Nparam = k.Nparam
|
||||
self.name = 'iops('+ k.name + ')'
|
||||
self.k = k
|
||||
|
||||
def _get_params(self):
|
||||
return self.k._get_params()
|
||||
|
||||
def _set_params(self,x):
|
||||
self.k._set_params(x)
|
||||
self.params = x
|
||||
|
||||
def _get_param_names(self):
|
||||
return self.k._get_param_names()
|
||||
|
||||
def K(self,X,X2,target):
|
||||
#Sort out the slices from the input data
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
|
||||
[[[self.k.K(X[s],X2[s2],target[s,s2]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.Kdiag(X[s],target[s]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dtheta(dL_dK[s,s2],X[s],X2[s2],target) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
if X2 is None:
|
||||
X2,slices2 = X,slices
|
||||
else:
|
||||
X2,slices2 = X2[:,:-1],index_to_slices(X2[:,-1])
|
||||
[[[self.k.dK_dX(dL_dK[s,s2],X[s],X2[s2],target[s,:-1]) for s in slices_i] for s2 in slices_j] for slices_i,slices_j in zip(slices,slices2)]
|
||||
|
||||
def dKdiag_dX(self,dL_dKdiag,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target[s,:-1]) for s in slices_i] for slices_i in slices]
|
||||
|
||||
|
||||
def dKdiag_dtheta(self,dL_dKdiag,X,target):
|
||||
X,slices = X[:,:-1],index_to_slices(X[:,-1])
|
||||
[[self.k.dKdiag_dX(dL_dKdiag[s],X[s],target) for s in slices_i] for slices_i in slices]
|
||||
|
|
@ -22,6 +22,7 @@ class prod_orthogonal(kernpart):
|
|||
self.k1 = k1
|
||||
self.k2 = k2
|
||||
self._set_params(np.hstack((k1._get_params(),k2._get_params())))
|
||||
self._X, self._X2, self._params = np.empty(shape=(3,1)) # initialize cache
|
||||
|
||||
def _get_params(self):
|
||||
"""return the value of the parameters."""
|
||||
|
|
@ -39,23 +40,38 @@ class prod_orthogonal(kernpart):
|
|||
|
||||
def K(self,X,X2,target):
|
||||
"""Compute the covariance matrix between X and X2."""
|
||||
if X2 is None: X2 = X
|
||||
target1 = np.zeros_like(target)
|
||||
target2 = np.zeros_like(target)
|
||||
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],target1)
|
||||
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
|
||||
target += target1 * target2
|
||||
self._K_computations(X,X2)
|
||||
target += self._K1*self._K2
|
||||
|
||||
def _K_computations(self,X,X2):
|
||||
"""
|
||||
Compute the two kernel matrices.
|
||||
The computation is only done if needed: many times it will be the same as the previous call
|
||||
"""
|
||||
if not (np.all(X==self._X) and np.all(X2==self._X2) and np.all(self._params == self._get_params())):
|
||||
#store new values in cache
|
||||
self._X = X.copy()
|
||||
self._X2 = X2.copy()
|
||||
self._params = self._get_params().copy()
|
||||
|
||||
#update self._K1, self._K2
|
||||
if X2 is None: X2 = X
|
||||
self._K1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self._K2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],self._K1)
|
||||
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],self._K2)
|
||||
|
||||
def dK_dtheta(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to the parameters."""
|
||||
if X2 is None: X2 = X
|
||||
K1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
K2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
|
||||
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
|
||||
self._K_computations(X,X2)
|
||||
self.k1.dK_dtheta(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
|
||||
self.k2.dK_dtheta(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
|
||||
|
||||
self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
|
||||
self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
self._K_computations(X,X2)
|
||||
self.k1.dK_dX(dL_dK*self._K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
|
||||
self.k2.dK_dX(dL_dK*self._K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
|
||||
|
||||
def Kdiag(self,X,target):
|
||||
"""Compute the diagonal of the covariance matrix associated to X."""
|
||||
|
|
@ -73,17 +89,6 @@ class prod_orthogonal(kernpart):
|
|||
self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
|
||||
self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
|
||||
|
||||
def dK_dX(self,dL_dK,X,X2,target):
|
||||
"""derivative of the covariance matrix with respect to X."""
|
||||
if X2 is None: X2 = X
|
||||
K1 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
K2 = np.zeros((X.shape[0],X2.shape[0]))
|
||||
self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
|
||||
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
|
||||
|
||||
self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
|
||||
self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
|
||||
|
||||
def dKdiag_dX(self, dL_dKdiag, X, target):
|
||||
K1 = np.zeros(X.shape[0])
|
||||
K2 = np.zeros(X.shape[0])
|
||||
|
|
|
|||
|
|
@ -148,7 +148,10 @@ class sparse_GP(GP):
|
|||
#self.dL_dKmm += np.dot(np.dot(self.E*sf2, self.psi2_beta_scaled) - self.Cpsi1VVpsi1, self.Kmmi) + 0.5*self.E # dD
|
||||
tmp = linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(self.A),lower=1,trans=1)[0]
|
||||
self.dL_dKmm = -0.5*self.D*sf2*linalg.lapack.flapack.dtrtrs(self.Lm,np.asfortranarray(tmp.T),lower=1,trans=1)[0] #dA
|
||||
self.dL_dKmm += 0.5*(self.D*(self.C/sf2 -self.Kmmi) + self.E) + np.dot(np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1,self.Kmmi) # d(C+D)
|
||||
tmp = np.dot(self.D*self.C + self.E*sf2,self.psi2_beta_scaled) - self.Cpsi1VVpsi1
|
||||
#tmp = np.dot(tmp,self.Kmmi)
|
||||
tmp = linalg.lapack.flapack.dpotrs(self.Lm,np.asfortranarray(tmp.T),lower=1)[0].T
|
||||
self.dL_dKmm += 0.5*(self.D*(self.C/sf2 - self.Kmmi) + self.E) + tmp # d(C+D)
|
||||
|
||||
#the partial derivative vector for the likelihood
|
||||
if self.likelihood.Nparams ==0:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue