Merge branch 'master' of github.com:SheffieldML/GPy

This commit is contained in:
Nicolo Fusi 2013-03-11 12:33:14 +00:00
commit 1eba520b69
33 changed files with 782 additions and 358 deletions

View file

@ -11,7 +11,7 @@ import GPy
default_seed=10000 default_seed=10000
def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME def crescent_data(seed=default_seed): #FIXME
"""Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood. """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
:param model_type: type of model to fit ['Full', 'FITC', 'DTC']. :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
@ -31,11 +31,8 @@ def crescent_data(model_type='Full', inducing=10, seed=default_seed): #FIXME
likelihood = GPy.likelihoods.EP(data['Y'],distribution) likelihood = GPy.likelihoods.EP(data['Y'],distribution)
if model_type=='Full': m = GPy.models.GP(data['X'],likelihood,kernel)
m = GPy.models.GP(data['X'],likelihood,kernel) m.ensure_default_constraints()
else:
# create sparse GP EP model
m = GPy.models.sparse_GP_EP(data['X'],likelihood=likelihood,inducing=inducing,ep_proxy=model_type)
m.update_likelihood_approximation() m.update_likelihood_approximation()
print(m) print(m)
@ -94,16 +91,13 @@ def toy_linear_1d_classification(seed=default_seed):
# Model definition # Model definition
m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel) m = GPy.models.GP(data['X'],likelihood=likelihood,kernel=kernel)
m.ensure_default_constraints()
# Optimize # Optimize
""" m.update_likelihood_approximation()
EPEM runs a loop that consists of two steps: # Parameters optimization:
1) EP likelihood approximation: m.optimize()
m.update_likelihood_approximation() #m.EPEM() #FIXME
2) Parameters optimization:
m.optimize()
"""
m.EPEM()
# Plot # Plot
pb.subplot(211) pb.subplot(211)

View file

@ -10,51 +10,86 @@ import pylab as pb
import numpy as np import numpy as np
import GPy import GPy
np.random.seed(2) np.random.seed(2)
pb.ion()
N = 500 N = 500
M = 5 M = 5
pb.close('all') default_seed=10000
######################################
## 1 dimensional example
# sample inputs and outputs def crescent_data(inducing=10, seed=default_seed):
X = np.random.uniform(-3.,3.,(N,1)) """Run a Gaussian process classification on the crescent data. The demonstration calls the basic GP classification model and uses EP to approximate the likelihood.
#Y = np.sin(X)+np.random.randn(N,1)*0.05
F = np.sin(X)+np.random.randn(N,1)*0.05
Y = np.ones([F.shape[0],1])
Y[F<0] = -1
likelihood = GPy.inference.likelihoods.probit(Y)
# construct kernel :param model_type: type of model to fit ['Full', 'FITC', 'DTC'].
rbf = GPy.kern.rbf(1) :param seed : seed value for data generation.
noise = GPy.kern.white(1) :type seed: int
kernel = rbf + noise :param inducing : number of inducing variables (only used for 'FITC' or 'DTC').
:type inducing: int
"""
# create simple GP model data = GPy.util.datasets.crescent_data(seed=seed)
#m = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood)
# contrain all parameters to be positive # Kernel object
#m.constrain_fixed('prec',100.) kernel = GPy.kern.rbf(data['X'].shape[1]) + GPy.kern.white(data['X'].shape[1])
m = GPy.models.sparse_GP(X, Y, kernel, M=M)
m.ensure_default_constraints()
#if not isinstance(m.likelihood,GPy.inference.likelihoods.gaussian):
# m.approximate_likelihood()
print m.checkgrad()
m.optimize('tnc', messages = 1)
m.plot(samples=3)
print m
n = GPy.models.sparse_GP(X,Y=None, kernel=kernel, M=M,likelihood= likelihood) # Likelihood object
n.ensure_default_constraints() distribution = GPy.likelihoods.likelihood_functions.probit()
if not isinstance(n.likelihood,GPy.inference.likelihoods.gaussian): likelihood = GPy.likelihoods.EP(data['Y'],distribution)
n.approximate_likelihood()
print n.checkgrad() sample = np.random.randint(0,data['X'].shape[0],inducing)
pb.figure() Z = data['X'][sample,:]
n.plot() #Z = (np.random.random_sample(2*inducing)*(data['X'].max()-data['X'].min())+data['X'].min()).reshape(inducing,-1)
# create sparse GP EP model
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
m.update_likelihood_approximation()
print(m)
# optimize
m.optimize()
print(m)
# plot
m.plot()
return m
def toy_linear_1d_classification(seed=default_seed):
"""
Simple 1D classification example
:param seed : seed value for data generation (default is 4).
:type seed: int
"""
data = GPy.util.datasets.toy_linear_1d_classification(seed=seed)
Y = data['Y'][:, 0:1]
Y[Y == -1] = 0
# Kernel object
kernel = GPy.kern.rbf(1)
# Likelihood object
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y,distribution)
Z = np.random.uniform(data['X'].min(),data['X'].max(),(10,1))
# Model definition
m = GPy.models.sparse_GP(data['X'],likelihood=likelihood,kernel=kernel,Z=Z)
m.ensure_default_constraints()
# Optimize
m.update_likelihood_approximation()
# Parameters optimization:
m.optimize()
#m.EPEM() #FIXME
# Plot
pb.subplot(211)
m.plot_f()
pb.subplot(212)
m.plot()
print(m)
return m
"""
m = GPy.models.sparse_GP_regression(X, Y, kernel, M=M)
m.ensure_default_constraints()
print m.checkgrad()
"""

View file

@ -0,0 +1,56 @@
# The detailed explanations of the commands used in this file can be found in the tutorial section
import pylab as pb
pb.ion()
import numpy as np
import GPy
X = np.random.uniform(-3.,3.,(20,1))
Y = np.sin(X) + np.random.randn(20,1)*0.05
kernel = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
m = GPy.models.GP_regression(X,Y,kernel)
print m
m.plot()
m.constrain_positive('')
m.unconstrain('') # Required to remove the previous constrains
m.constrain_positive('rbf_variance')
m.constrain_bounded('lengthscale',1.,10. )
m.constrain_fixed('noise',0.0025)
m.optimize()
m.optimize_restarts(Nrestarts = 10)
###########################
# 2-dimensional example #
###########################
import pylab as pb
pb.ion()
import numpy as np
import GPy
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(50,2))
Y = np.sin(X[:,0:1]) * np.sin(X[:,1:2])+np.random.randn(50,1)*0.05
# define kernel
ker = GPy.kern.Matern52(2,ARD=True) + GPy.kern.white(2)
# create simple GP model
m = GPy.models.GP_regression(X,Y,ker)
# contrain all parameters to be positive
m.constrain_positive('')
# optimize and plot
pb.figure()
m.optimize('tnc', max_f_eval = 1000)
m.plot()
print(m)

View file

@ -0,0 +1,139 @@
# The detailed explanations of the commands used in this file can be found in the tutorial section
import pylab as pb
import numpy as np
import GPy
pb.ion()
ker1 = GPy.kern.rbf(1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=2.)
ker3 = GPy.kern.rbf(1, .5, .5)
print ker2
ker1.plot()
ker2.plot()
ker3.plot()
k1 = GPy.kern.rbf(1,1.,2.)
k2 = GPy.kern.Matern32(1, 0.5, 0.2)
# Product of kernels
k_prod = k1.prod(k2)
k_prodorth = k1.prod_orthogonal(k2)
# Sum of kernels
k_add = k1.add(k2)
k_addorth = k1.add_orthogonal(k2)
pb.figure(figsize=(8,8))
pb.subplot(2,2,1)
k_prod.plot()
pb.title('prod')
pb.subplot(2,2,2)
k_prodorth.plot()
pb.title('prod_orthogonal')
pb.subplot(2,2,3)
k_add.plot()
pb.title('add')
pb.subplot(2,2,4)
k_addorth.plot()
pb.title('add_orthogonal')
pb.subplots_adjust(wspace=0.3, hspace=0.3)
k1 = GPy.kern.rbf(1,1.,2)
k2 = GPy.kern.periodic_Matern52(1,variance=1e3, lengthscale=1, period = 1.5, lower=-5., upper = 5)
k = k1 * k2 # equivalent to k = k1.prod(k2)
print k
# Simulate sample paths
X = np.linspace(-5,5,501)[:,None]
Y = np.random.multivariate_normal(np.zeros(501),k.K(X),1)
# plot
pb.figure(figsize=(10,4))
pb.subplot(1,2,1)
k.plot()
pb.subplot(1,2,2)
pb.plot(X,Y.T)
pb.ylabel("Sample path")
pb.subplots_adjust(wspace=0.3)
k = (k1+k2)*(k1+k2)
print k.parts[0].name, '\n', k.parts[1].name, '\n', k.parts[2].name, '\n', k.parts[3].name
k1 = GPy.kern.rbf(1)
k2 = GPy.kern.Matern32(1)
k3 = GPy.kern.white(1)
k = k1 + k2 + k3
print k
k.constrain_positive('var')
k.constrain_fixed(np.array([1]),1.75)
k.tie_param('len')
k.unconstrain('white')
k.constrain_bounded('white',lower=1e-5,upper=.5)
print k
k_cst = GPy.kern.bias(1,variance=1.)
k_mat = GPy.kern.Matern52(1,variance=1., lengthscale=3)
Kanova = (k_cst + k_mat).prod_orthogonal(k_cst + k_mat)
print Kanova
# sample inputs and outputs
X = np.random.uniform(-3.,3.,(40,2))
Y = 0.5*X[:,:1] + 0.5*X[:,1:] + 2*np.sin(X[:,:1]) * np.sin(X[:,1:])
# Create GP regression model
m = GPy.models.GP_regression(X,Y,Kanova)
pb.figure(figsize=(5,5))
m.plot()
pb.figure(figsize=(20,3))
pb.subplots_adjust(wspace=0.5)
pb.subplot(1,5,1)
m.plot()
pb.subplot(1,5,2)
pb.ylabel("= ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,3)
m.plot(which_functions=[False,True,False,False])
pb.ylabel("cst +",rotation='horizontal',fontsize='30')
pb.subplot(1,5,4)
m.plot(which_functions=[False,False,True,False])
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
pb.subplot(1,5,5)
pb.ylabel("+ ",rotation='horizontal',fontsize='30')
m.plot(which_functions=[False,False,False,True])
import pylab as pb
import numpy as np
import GPy
pb.ion()
ker1 = GPy.kern.rbf(D=1) # Equivalent to ker1 = GPy.kern.rbf(D=1, variance=1., lengthscale=1.)
ker2 = GPy.kern.rbf(D=1, variance = .75, lengthscale=3.)
ker3 = GPy.kern.rbf(1, .5, .25)
ker1.plot()
ker2.plot()
ker3.plot()
#pb.savefig("Figures/tuto_kern_overview_basicdef.png")
kernels = [GPy.kern.rbf(1), GPy.kern.exponential(1), GPy.kern.Matern32(1), GPy.kern.Matern52(1), GPy.kern.Brownian(1), GPy.kern.bias(1), GPy.kern.linear(1), GPy.kern.spline(1), GPy.kern.periodic_exponential(1), GPy.kern.periodic_Matern32(1), GPy.kern.periodic_Matern52(1), GPy.kern.white(1)]
kernel_names = ["GPy.kern.rbf", "GPy.kern.exponential", "GPy.kern.Matern32", "GPy.kern.Matern52", "GPy.kern.Brownian", "GPy.kern.bias", "GPy.kern.linear", "GPy.kern.spline", "GPy.kern.periodic_exponential", "GPy.kern.periodic_Matern32", "GPy.kern.periodic_Matern52", "GPy.kern.white"]
pb.figure(figsize=(16,12))
pb.subplots_adjust(wspace=.5, hspace=.5)
for i, kern in enumerate(kernels):
pb.subplot(3,4,i+1)
kern.plot(x=7.5,plot_limits=[0.00001,15.])
pb.title(kernel_names[i]+ '\n')
# actual plot for the noise
i = 11
X = np.linspace(0.,15.,201)
WN = 0*X
WN[100] = 1.
pb.subplot(3,4,i+1)
pb.plot(X,WN,'b')

View file

@ -76,7 +76,7 @@ class Matern32(kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@ -84,29 +84,29 @@ class Matern32(kernpart):
invdist = 1./np.where(dist!=0.,dist,np.inf) invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial) target[0] += np.sum(dvar*dL_dK)
if self.ARD == True: if self.ARD == True:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] #dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else: else:
dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
#dl = self.variance*dvar*dist2M.sum(-1)*invdist #dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial) target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(3*self.variance*dist*np.exp(-np.sqrt(3)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass
def Gram_matrix(self,F,F1,F2,lower,upper): def Gram_matrix(self,F,F1,F2,lower,upper):

View file

@ -74,7 +74,7 @@ class Matern52(kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
@ -82,29 +82,29 @@ class Matern52(kernpart):
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist) dvar = (1+np.sqrt(5.)*dist+5./3*dist**2)*np.exp(-np.sqrt(5.)*dist)
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[0] += np.sum(dvar*partial) target[0] += np.sum(dvar*dL_dK)
if self.ARD: if self.ARD:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis] #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist))[:,:,np.newaxis] * dist2M*invdist[:,:,np.newaxis]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else: else:
dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist dl = (self.variance * 5./3 * dist * (1 + np.sqrt(5.)*dist ) * np.exp(-np.sqrt(5.)*dist)) * dist2M.sum(-1)*invdist
#dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist #dl = (self.variance* 3 * dist * np.exp(-np.sqrt(3.)*dist)) * dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial) target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
target[0] += np.sum(partial) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(self.variance*5./3*dist*(1+np.sqrt(5)*dist)*np.exp(-np.sqrt(5)*dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass
def Gram_matrix(self,F,F1,F2,F3,lower,upper): def Gram_matrix(self,F,F1,F2,F3,lower,upper):

View file

@ -35,16 +35,17 @@ class bias(kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance target += self.variance
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dKdiag,X,X2,target):
target += partial.sum() target += dL_dKdiag.sum()
def dKdiag_dtheta(self,partial,X,target):
target += partial.sum()
def dK_dX(self, partial,X, X2, target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += dL_dKdiag.sum()
def dK_dX(self, dL_dK,X, X2, target):
pass pass
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass
#---------------------------------------# #---------------------------------------#
@ -60,30 +61,29 @@ class bias(kernpart):
def psi2(self, Z, mu, S, target): def psi2(self, Z, mu, S, target):
target += self.variance**2 target += self.variance**2
def dpsi0_dtheta(self, partial, Z, mu, S, target): def dpsi0_dtheta(self, dL_dpsi0, Z, mu, S, target):
target += partial.sum() target += dL_dpsi0.sum()
def dpsi1_dtheta(self, partial, Z, mu, S, target): def dpsi1_dtheta(self, dL_dpsi1, Z, mu, S, target):
target += partial.sum() target += dL_dpsi1.sum()
def dpsi2_dtheta(self, partial, Z, mu, S, target): def dpsi2_dtheta(self, dL_dpsi2, Z, mu, S, target):
target += 2.*self.variance*partial.sum() target += 2.*self.variance*dL_dpsi2.sum()
def dpsi0_dZ(self, dL_dpsi0, Z, mu, S, target):
def dpsi0_dZ(self, partial, Z, mu, S, target):
pass pass
def dpsi0_dmuS(self, partial, Z, mu, S, target_mu, target_S): def dpsi0_dmuS(self, dL_dpsi0, Z, mu, S, target_mu, target_S):
pass pass
def dpsi1_dZ(self, partial, Z, mu, S, target): def dpsi1_dZ(self, dL_dpsi1, Z, mu, S, target):
pass pass
def dpsi1_dmuS(self, partial, Z, mu, S, target_mu, target_S): def dpsi1_dmuS(self, dL_dpsi1, Z, mu, S, target_mu, target_S):
pass pass
def dpsi2_dZ(self, partial, Z, mu, S, target): def dpsi2_dZ(self, dL_dpsi2, Z, mu, S, target):
pass pass
def dpsi2_dmuS(self, partial, Z, mu, S, target_mu, target_S): def dpsi2_dmuS(self, dL_dpsi2, Z, mu, S, target_mu, target_S):
pass pass

View file

@ -53,7 +53,7 @@ class coregionalise(kernpart):
def Kdiag(self,index,target): def Kdiag(self,index,target):
target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()] target += np.diag(self.B)[np.asarray(index,dtype=np.int).flatten()]
def dK_dtheta(self,partial,index,index2,target): def dK_dtheta(self,dL_dK,index,index2,target):
index = np.asarray(index,dtype=np.int) index = np.asarray(index,dtype=np.int)
if index2 is None: if index2 is None:
index2 = index index2 = index
@ -62,28 +62,28 @@ class coregionalise(kernpart):
ii,jj = np.meshgrid(index,index2) ii,jj = np.meshgrid(index,index2)
ii,jj = ii.T, jj.T ii,jj = ii.T, jj.T
partial_small = np.zeros_like(self.B) dL_dK_small = np.zeros_like(self.B)
for i in range(self.Nout): for i in range(self.Nout):
for j in range(self.Nout): for j in range(self.Nout):
tmp = np.sum(partial[(ii==i)*(jj==j)]) tmp = np.sum(dL_dK[(ii==i)*(jj==j)])
partial_small[i,j] = tmp dL_dK_small[i,j] = tmp
dkappa = np.diag(partial_small) dkappa = np.diag(dL_dK_small)
partial_small += partial_small.T dL_dK_small += dL_dK_small.T
dW = (self.W[:,None,:]*partial_small[:,:,None]).sum(0) dW = (self.W[:,None,:]*dL_dK_small[:,:,None]).sum(0)
target += np.hstack([dW.flatten(),dkappa]) target += np.hstack([dW.flatten(),dkappa])
def dKdiag_dtheta(self,partial,index,target): def dKdiag_dtheta(self,dL_dKdiag,index,target):
index = np.asarray(index,dtype=np.int).flatten() index = np.asarray(index,dtype=np.int).flatten()
partial_small = np.zeros(self.Nout) dL_dKdiag_small = np.zeros(self.Nout)
for i in range(self.Nout): for i in range(self.Nout):
partial_small[i] += np.sum(partial[index==i]) dL_dKdiag_small[i] += np.sum(dL_dKdiag[index==i])
dW = 2.*self.W*partial_small[:,None] dW = 2.*self.W*dL_dKdiag_small[:,None]
dkappa = partial_small dkappa = dL_dKdiag_small
target += np.hstack([dW.flatten(),dkappa]) target += np.hstack([dW.flatten(),dkappa])
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
pass pass

View file

@ -74,35 +74,35 @@ class exponential(kernpart):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1)) dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))
invdist = 1./np.where(dist!=0.,dist,np.inf) invdist = 1./np.where(dist!=0.,dist,np.inf)
dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3 dist2M = np.square(X[:,None,:]-X2[None,:,:])/self.lengthscale**3
dvar = np.exp(-dist) dvar = np.exp(-dist)
target[0] += np.sum(dvar*partial) target[0] += np.sum(dvar*dL_dK)
if self.ARD == True: if self.ARD == True:
dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None] dl = self.variance*dvar[:,:,None]*dist2M*invdist[:,:,None]
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else: else:
dl = self.variance*dvar*dist2M.sum(-1)*invdist dl = self.variance*dvar*dist2M.sum(-1)*invdist
target[1] += np.sum(dl*partial) target[1] += np.sum(dl*dL_dK)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters.""" """derivative of the diagonal of the covariance matrix with respect to the parameters."""
#NB: derivative of diagonal elements wrt lengthscale is 0 #NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None] dist = np.sqrt(np.sum(np.square((X[:,None,:]-X2[None,:,:])/self.lengthscale),-1))[:,:,None]
ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf) ddist_dX = (X[:,None,:]-X2[None,:,:])/self.lengthscale**2/np.where(dist!=0.,dist,np.inf)
dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2)) dK_dX = - np.transpose(self.variance*np.exp(-dist)*ddist_dX,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass
def Gram_matrix(self,F,F1,lower,upper): def Gram_matrix(self,F,F1,lower,upper):

View file

@ -271,10 +271,10 @@ class kern(parameterised):
[p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.K(X[s1,i_s],X2[s2,i_s],target=target[s1,s2]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target return target
def dK_dtheta(self,partial,X,X2=None,slices1=None,slices2=None): def dK_dtheta(self,dL_dK,X,X2=None,slices1=None,slices2=None):
""" """
:param partial: An array of partial derivaties, dL_dK :param dL_dK: An array of dL_dK derivaties, dL_dK
:type partial: Np.ndarray (N x M) :type dL_dK: Np.ndarray (N x M)
:param X: Observed data inputs :param X: Observed data inputs
:type X: np.ndarray (N x D) :type X: np.ndarray (N x D)
:param X2: Observed dara inputs (optional, defaults to X) :param X2: Observed dara inputs (optional, defaults to X)
@ -288,16 +288,16 @@ class kern(parameterised):
if X2 is None: if X2 is None:
X2 = X X2 = X
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dK_dtheta(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)] [p.dK_dtheta(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[ps]) for p,i_s,ps,s1,s2 in zip(self.parts, self.input_slices, self.param_slices, slices1, slices2)]
return self._transform_gradients(target) return self._transform_gradients(target)
def dK_dX(self,partial,X,X2=None,slices1=None,slices2=None): def dK_dX(self,dL_dK,X,X2=None,slices1=None,slices2=None):
if X2 is None: if X2 is None:
X2 = X X2 = X
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(X) target = np.zeros_like(X)
[p.dK_dX(partial[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)] [p.dK_dX(dL_dK[s1,s2],X[s1,i_s],X2[s2,i_s],target[s1,i_s]) for p, i_s, s1, s2 in zip(self.parts, self.input_slices, slices1, slices2)]
return target return target
def Kdiag(self,X,slices=None): def Kdiag(self,X,slices=None):
@ -307,20 +307,20 @@ class kern(parameterised):
[p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)] [p.Kdiag(X[s,i_s],target=target[s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
return target return target
def dKdiag_dtheta(self,partial,X,slices=None): def dKdiag_dtheta(self,dL_dKdiag,X,slices=None):
assert X.shape[1]==self.D assert X.shape[1]==self.D
assert len(partial.shape)==1 assert len(dL_dKdiag.shape)==1
assert partial.size==X.shape[0] assert dL_dKdiag.size==X.shape[0]
slices = self._process_slices(slices,False) slices = self._process_slices(slices,False)
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dKdiag_dtheta(partial[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)] [p.dKdiag_dtheta(dL_dKdiag[s],X[s,i_s],target[ps]) for p,i_s,s,ps in zip(self.parts,self.input_slices,slices,self.param_slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
def dKdiag_dX(self, partial, X, slices=None): def dKdiag_dX(self, dL_dKdiag, X, slices=None):
assert X.shape[1]==self.D assert X.shape[1]==self.D
slices = self._process_slices(slices,False) slices = self._process_slices(slices,False)
target = np.zeros_like(X) target = np.zeros_like(X)
[p.dKdiag_dX(partial[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)] [p.dKdiag_dX(dL_dKdiag[s],X[s,i_s],target[s,i_s]) for p,i_s,s in zip(self.parts,self.input_slices,slices)]
return target return target
def psi0(self,Z,mu,S,slices=None): def psi0(self,Z,mu,S,slices=None):
@ -329,16 +329,16 @@ class kern(parameterised):
[p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)] [p.psi0(Z,mu[s],S[s],target[s]) for p,s in zip(self.parts,slices)]
return target return target
def dpsi0_dtheta(self,partial,Z,mu,S,slices=None): def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,slices=None):
slices = self._process_slices(slices,False) slices = self._process_slices(slices,False)
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dpsi0_dtheta(partial[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)] [p.dpsi0_dtheta(dL_dpsi0[s],Z,mu[s],S[s],target[ps]) for p,ps,s in zip(self.parts, self.param_slices,slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
def dpsi0_dmuS(self,partial,Z,mu,S,slices=None): def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,slices=None):
slices = self._process_slices(slices,False) slices = self._process_slices(slices,False)
target_mu,target_S = np.zeros_like(mu),np.zeros_like(S) target_mu,target_S = np.zeros_like(mu),np.zeros_like(S)
[p.dpsi0_dmuS(partial,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)] [p.dpsi0_dmuS(dL_dpsi0,Z,mu[s],S[s],target_mu[s],target_S[s]) for p,s in zip(self.parts,slices)]
return target_mu,target_S return target_mu,target_S
def psi1(self,Z,mu,S,slices1=None,slices2=None): def psi1(self,Z,mu,S,slices1=None,slices2=None):
@ -348,25 +348,25 @@ class kern(parameterised):
[p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)] [p.psi1(Z[s2],mu[s1],S[s1],target[s1,s2]) for p,s1,s2 in zip(self.parts,slices1,slices2)]
return target return target
def dpsi1_dtheta(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""N,M,(Ntheta)""" """N,M,(Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros((self.Nparam)) target = np.zeros((self.Nparam))
[p.dpsi1_dtheta(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)] [p.dpsi1_dtheta(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,ps,s1,s2,i_s in zip(self.parts, self.param_slices,slices1,slices2,self.input_slices)]
return self._transform_gradients(target) return self._transform_gradients(target)
def dpsi1_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""N,M,Q""" """N,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(Z) target = np.zeros_like(Z)
[p.dpsi1_dZ(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi1_dZ(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target return target
def dpsi1_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,Q""" """return shapes are N,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi1_dmuS(partial[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi1_dmuS(dL_dpsi1[s2,s1],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
return target_mu, target_S return target_mu, target_S
def psi2(self,Z,mu,S,slices1=None,slices2=None): def psi2(self,Z,mu,S,slices1=None,slices2=None):
@ -399,10 +399,11 @@ class kern(parameterised):
return target return target
def dpsi2_dtheta(self,partial,partial1,Z,mu,S,slices1=None,slices2=None): def dpsi2_dtheta(self,dL_dpsi2,partial1,Z,mu,S,slices1=None,slices2=None):
"""Returns shape (N,M,M,Ntheta)"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros(self.Nparam) target = np.zeros(self.Nparam)
[p.dpsi2_dtheta(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)] [p.dpsi2_dtheta(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[ps]) for p,i_s,s1,s2,ps in zip(self.parts,self.input_slices,slices1,slices2,self.param_slices)]
#compute the "cross" terms #compute the "cross" terms
#TODO: better looping #TODO: better looping
@ -416,11 +417,11 @@ class kern(parameterised):
pass pass
#rbf X bias #rbf X bias
elif p1.name=='bias' and p2.name=='rbf': elif p1.name=='bias' and p2.name=='rbf':
p2.dpsi1_dtheta(partial.sum(1)*p1.variance,Z,mu,S,target[ps2]) p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target[ps2])
p1.dpsi1_dtheta(partial.sum(1)*p2._psi1,Z,mu,S,target[ps1]) p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2._psi1,Z,mu,S,target[ps1])
elif p2.name=='bias' and p1.name=='rbf': elif p2.name=='bias' and p1.name=='rbf':
p1.dpsi1_dtheta(partial.sum(1)*p2.variance,Z,mu,S,target[ps1]) p1.dpsi1_dtheta(dL_dpsi2.sum(1)*p2.variance,Z,mu,S,target[ps1])
p2.dpsi1_dtheta(partial.sum(1)*p1._psi1,Z,mu,S,target[ps2]) p2.dpsi1_dtheta(dL_dpsi2.sum(1)*p1._psi1,Z,mu,S,target[ps2])
#rbf X linear #rbf X linear
elif p1.name=='linear' and p2.name=='rbf': elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO raise NotImplementedError #TODO
@ -431,10 +432,10 @@ class kern(parameterised):
return self._transform_gradients(target) return self._transform_gradients(target)
def dpsi2_dZ(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target = np.zeros_like(Z) target = np.zeros_like(Z)
[p.dpsi2_dZ(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi2_dZ(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target[s2,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#compute the "cross" terms #compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2): for p1, p2 in itertools.combinations(self.parts,2):
@ -443,9 +444,9 @@ class kern(parameterised):
pass pass
#rbf X bias #rbf X bias
elif p1.name=='bias' and p2.name=='rbf': elif p1.name=='bias' and p2.name=='rbf':
target += p2.dpsi1_dX(partial.sum(1)*p1.variance,Z,mu,S,target) target += p2.dpsi1_dX(dL_dpsi2.sum(1)*p1.variance,Z,mu,S,target)
elif p2.name=='bias' and p1.name=='rbf': elif p2.name=='bias' and p1.name=='rbf':
target += p1.dpsi1_dZ(partial.sum(2)*p2.variance,Z,mu,S,target) target += p1.dpsi1_dZ(dL_dpsi2.sum(2)*p2.variance,Z,mu,S,target)
#rbf X linear #rbf X linear
elif p1.name=='linear' and p2.name=='rbf': elif p1.name=='linear' and p2.name=='rbf':
raise NotImplementedError #TODO raise NotImplementedError #TODO
@ -457,11 +458,11 @@ class kern(parameterised):
return target return target
def dpsi2_dmuS(self,partial,Z,mu,S,slices1=None,slices2=None): def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,slices1=None,slices2=None):
"""return shapes are N,M,M,Q""" """return shapes are N,M,M,Q"""
slices1, slices2 = self._process_slices(slices1,slices2) slices1, slices2 = self._process_slices(slices1,slices2)
target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1])) target_mu, target_S = np.zeros((2,mu.shape[0],mu.shape[1]))
[p.dpsi2_dmuS(partial[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)] [p.dpsi2_dmuS(dL_dpsi2[s1,s2,s2],Z[s2,i_s],mu[s1,i_s],S[s1,i_s],target_mu[s1,i_s],target_S[s1,i_s]) for p,i_s,s1,s2 in zip(self.parts,self.input_slices,slices1,slices2)]
#compute the "cross" terms #compute the "cross" terms
for p1, p2 in itertools.combinations(self.parts,2): for p1, p2 in itertools.combinations(self.parts,2):

View file

@ -26,31 +26,31 @@ class kernpart(object):
raise NotImplementedError raise NotImplementedError
def Kdiag(self,X,target): def Kdiag(self,X,target):
raise NotImplementedError raise NotImplementedError
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
raise NotImplementedError raise NotImplementedError
def psi0(self,Z,mu,S,target): def psi0(self,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
raise NotImplementedError raise NotImplementedError
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi1_dtheta(self,Z,mu,S,target): def dpsi1_dtheta(self,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
raise NotImplementedError raise NotImplementedError
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi2_dtheta(self,partial,Z,mu,S,target): def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
raise NotImplementedError raise NotImplementedError
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
raise NotImplementedError raise NotImplementedError
def dK_dX(self,X,X2,target): def dK_dX(self,X,X2,target):
raise NotImplementedError raise NotImplementedError

View file

@ -73,16 +73,16 @@ class linear(kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
np.add(target,np.sum(self.variances*np.square(X),-1),target) np.add(target,np.sum(self.variances*np.square(X),-1),target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
if self.ARD: if self.ARD:
product = X[:,None,:]*X2[None,:,:] product = X[:,None,:]*X2[None,:,:]
target += (partial[:,:,None]*product).sum(0).sum(0) target += (dL_dK[:,:,None]*product).sum(0).sum(0)
else: else:
self._K_computations(X, X2) self._K_computations(X, X2)
target += np.sum(self._dot_product*partial) target += np.sum(self._dot_product*dL_dK)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
target += (((X2[:, None, :] * self.variances)) * partial[:,:, None]).sum(0) target += (((X2[:, None, :] * self.variances)) * dL_dK[:,:, None]).sum(0)
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
@ -92,40 +92,40 @@ class linear(kernpart):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += np.sum(self.variances*self.mu2_S,1) target += np.sum(self.variances*self.mu2_S,1)
def dKdiag_dtheta(self,partial, X, target): def dKdiag_dtheta(self,dL_dKdiag, X, target):
tmp = partial[:,None]*X**2 tmp = dL_dKdiag[:,None]*X**2
if self.ARD: if self.ARD:
target += tmp.sum(0) target += tmp.sum(0)
else: else:
target += tmp.sum() target += tmp.sum()
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = partial[:, None] * self.mu2_S tmp = dL_dpsi0[:, None] * self.mu2_S
if self.ARD: if self.ARD:
target += tmp.sum(0) target += tmp.sum(0)
else: else:
target += tmp.sum() target += tmp.sum()
def dpsi0_dmuS(self,partial, Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,dL_dpsi0, Z,mu,S,target_mu,target_S):
target_mu += partial[:, None] * (2.0*mu*self.variances) target_mu += dL_dpsi0[:, None] * (2.0*mu*self.variances)
target_S += partial[:, None] * self.variances target_S += dL_dpsi0[:, None] * self.variances
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
"""the variance, it does nothing""" """the variance, it does nothing"""
self.K(mu,Z,target) self.K(mu,Z,target)
def dpsi1_dtheta(self,partial,Z,mu,S,target): def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
"""the variance, it does nothing""" """the variance, it does nothing"""
self.dK_dtheta(partial,mu,Z,target) self.dK_dtheta(dL_dpsi1,mu,Z,target)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
"""Do nothing for S, it does not affect psi1""" """Do nothing for S, it does not affect psi1"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target_mu += (partial.T[:,:, None]*(Z*self.variances)).sum(1) target_mu += (dL_dpsi1.T[:,:, None]*(Z*self.variances)).sum(1)
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
self.dK_dX(partial.T,Z,mu,target) self.dK_dX(dL_dpsi1.T,Z,mu,target)
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
""" """
@ -135,25 +135,25 @@ class linear(kernpart):
psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :] psi2 = self.ZZ*np.square(self.variances)*self.mu2_S[:, None, None, :]
target += psi2.sum(-1) target += psi2.sum(-1)
def dpsi2_dtheta(self,partial,Z,mu,S,target): def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = (partial[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances)) tmp = (dL_dpsi2[:,:,:,None]*(2.*self.ZZ*self.mu2_S[:,None,None,:]*self.variances))
if self.ARD: if self.ARD:
target += tmp.sum(0).sum(0).sum(0) target += tmp.sum(0).sum(0).sum(0)
else: else:
target += tmp.sum() target += tmp.sum()
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self.ZZ*np.square(self.variances) # M,M,Q tmp = self.ZZ*np.square(self.variances) # M,M,Q
target_mu += (partial[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1) target_mu += (dL_dpsi2[:,:,:,None]*tmp*2.*mu[:,None,None,:]).sum(1).sum(1)
target_S += (partial[:,:,:,None]*tmp).sum(1).sum(1) target_S += (dL_dpsi2[:,:,:,None]*tmp).sum(1).sum(1)
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
mu2_S = np.sum(self.mu2_S,0)# Q, mu2_S = np.sum(self.mu2_S,0)# Q,
target += (partial[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1) target += (dL_dpsi2[:,:,:,None] * (self.mu2_S[:,None,None,:]*(Z*np.square(self.variances)[None,:])[None,None,:,:])).sum(0).sum(1)
#---------------------------------------# #---------------------------------------#
# Precomputations # # Precomputations #

View file

@ -101,7 +101,7 @@ class periodic_Matern32(kernpart):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -166,13 +166,13 @@ class periodic_Matern32(kernpart):
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0]) # np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial) target[0] += np.sum(dK_dvar*dL_dK)
#np.add(target[:,:,1],dK_dlen, target[:,:,1]) #np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial) target[1] += np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,2],dK_dper, target[:,:,2]) #np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial) target[2] += np.sum(dK_dper*dL_dK)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal covariance matrix with respect to the parameters""" """derivative of the diagonal covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -231,6 +231,6 @@ class periodic_Matern32(kernpart):
dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T) dK_dper = 2* mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*partial) target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*partial) target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*partial) target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -46,7 +46,7 @@ class periodic_Matern52(kernpart):
r = np.sqrt(r1**2 + r2**2) r = np.sqrt(r1**2 + r2**2)
psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2)) psi = np.where(r1 != 0, (np.arctan(r2/r1) + (r1<0.)*np.pi),np.arcsin(r2))
return r,omega[:,0:1], psi return r,omega[:,0:1], psi
def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2): def _int_computation(self,r1,omega1,phi1,r2,omega2,phi2):
Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) ) Gint1 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + 1./(omega1-omega2.T)*( np.sin((omega1-omega2.T)*self.upper+phi1-phi2.T) - np.sin((omega1-omega2.T)*self.lower+phi1-phi2.T) )
Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower) Gint2 = 1./(omega1+omega2.T)*( np.sin((omega1+omega2.T)*self.upper+phi1+phi2.T) - np.sin((omega1+omega2.T)*self.lower+phi1+phi2.T)) + np.cos(phi1-phi2.T)*(self.upper-self.lower)
@ -105,7 +105,7 @@ class periodic_Matern52(kernpart):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -178,13 +178,13 @@ class periodic_Matern52(kernpart):
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
# np.add(target[:,:,0],dK_dvar, target[:,:,0]) # np.add(target[:,:,0],dK_dvar, target[:,:,0])
target[0] += np.sum(dK_dvar*partial) target[0] += np.sum(dK_dvar*dL_dK)
#np.add(target[:,:,1],dK_dlen, target[:,:,1]) #np.add(target[:,:,1],dK_dlen, target[:,:,1])
target[1] += np.sum(dK_dlen*partial) target[1] += np.sum(dK_dlen*dL_dK)
#np.add(target[:,:,2],dK_dper, target[:,:,2]) #np.add(target[:,:,2],dK_dper, target[:,:,2])
target[2] += np.sum(dK_dper*partial) target[2] += np.sum(dK_dper*dL_dK)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters""" """derivative of the diagonal of the covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -251,6 +251,6 @@ class periodic_Matern52(kernpart):
dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper) dG_dper = 1./self.variance*(3*self.lengthscale**5/(400*np.sqrt(5))*dGint_dper + 0.5*dlower_terms_dper)
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T) dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*partial) target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*partial) target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*partial) target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -101,7 +101,7 @@ class periodic_exponential(kernpart):
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target) np.add(target,np.diag(mdot(FX,self.Gi,FX.T)),target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)""" """derivative of the covariance matrix with respect to the parameters (shape is NxMxNparam)"""
if X2 is None: X2 = X if X2 is None: X2 = X
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -162,11 +162,11 @@ class periodic_exponential(kernpart):
dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T) dK_dper = mdot(dFX_dper,self.Gi,FX2.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX2.T) + mdot(FX,self.Gi,dFX2_dper.T)
target[0] += np.sum(dK_dvar*partial) target[0] += np.sum(dK_dvar*dL_dK)
target[1] += np.sum(dK_dlen*partial) target[1] += np.sum(dK_dlen*dL_dK)
target[2] += np.sum(dK_dper*partial) target[2] += np.sum(dK_dper*dL_dK)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""derivative of the diagonal of the covariance matrix with respect to the parameters""" """derivative of the diagonal of the covariance matrix with respect to the parameters"""
FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X) FX = self._cos(self.basis_alpha[None,:],self.basis_omega[None,:],self.basis_phi[None,:])(X)
@ -222,7 +222,7 @@ class periodic_exponential(kernpart):
dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T) dK_dper = 2*mdot(dFX_dper,self.Gi,FX.T) - mdot(FX,self.Gi,dG_dper,self.Gi,FX.T)
target[0] += np.sum(np.diag(dK_dvar)*partial) target[0] += np.sum(np.diag(dK_dvar)*dL_dKdiag)
target[1] += np.sum(np.diag(dK_dlen)*partial) target[1] += np.sum(np.diag(dK_dlen)*dL_dKdiag)
target[2] += np.sum(np.diag(dK_dper)*partial) target[2] += np.sum(np.diag(dK_dper)*dL_dKdiag)

View file

@ -55,7 +55,7 @@ class prod(kernpart):
self.k2.Kdiag(X,target2) self.k2.Kdiag(X,target2)
target += target1 * target2 target += target1 * target2
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0])) K1 = np.zeros((X.shape[0],X2.shape[0]))
@ -65,13 +65,13 @@ class prod(kernpart):
k1_target = np.zeros(self.k1.Nparam) k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam) k2_target = np.zeros(self.k2.Nparam)
self.k1.dK_dtheta(partial*K2, X, X2, k1_target) self.k1.dK_dtheta(dL_dK*K2, X, X2, k1_target)
self.k2.dK_dtheta(partial*K1, X, X2, k2_target) self.k2.dK_dtheta(dL_dK*K1, X, X2, k2_target)
target[:self.k1.Nparam] += k1_target target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target target[self.k1.Nparam:] += k2_target
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0])) K1 = np.zeros((X.shape[0],X2.shape[0]))
@ -79,19 +79,19 @@ class prod(kernpart):
self.k1.K(X,X2,K1) self.k1.K(X,X2,K1)
self.k2.K(X,X2,K2) self.k2.K(X,X2,K2)
self.k1.dK_dX(partial*K2, X, X2, target) self.k1.dK_dX(dL_dK*K2, X, X2, target)
self.k2.dK_dX(partial*K1, X, X2, target) self.k2.dK_dX(dL_dK*K1, X, X2, target)
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
target1 = np.zeros((X.shape[0],)) target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],)) target2 = np.zeros((X.shape[0],))
self.k1.Kdiag(X,target1) self.k1.Kdiag(X,target1)
self.k2.Kdiag(X,target2) self.k2.Kdiag(X,target2)
self.k1.dKdiag_dX(partial*target2, X, target) self.k1.dKdiag_dX(dL_dKdiag*target2, X, target)
self.k2.dKdiag_dX(partial*target1, X, target) self.k2.dKdiag_dX(dL_dKdiag*target1, X, target)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
target1 = np.zeros((X.shape[0],)) target1 = np.zeros((X.shape[0],))
target2 = np.zeros((X.shape[0],)) target2 = np.zeros((X.shape[0],))
@ -100,8 +100,8 @@ class prod(kernpart):
k1_target = np.zeros(self.k1.Nparam) k1_target = np.zeros(self.k1.Nparam)
k2_target = np.zeros(self.k2.Nparam) k2_target = np.zeros(self.k2.Nparam)
self.k1.dKdiag_dtheta(partial*target2, X, k1_target) self.k1.dKdiag_dtheta(dL_dKdiag*target2, X, k1_target)
self.k2.dKdiag_dtheta(partial*target1, X, k2_target) self.k2.dKdiag_dtheta(dL_dKdiag*target1, X, k2_target)
target[:self.k1.Nparam] += k1_target target[:self.k1.Nparam] += k1_target
target[self.k1.Nparam:] += k2_target target[self.k1.Nparam:] += k2_target

View file

@ -46,7 +46,7 @@ class prod_orthogonal(kernpart):
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2) self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],target2)
target += target1 * target2 target += target1 * target2
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
if X2 is None: X2 = X if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0])) K1 = np.zeros((X.shape[0],X2.shape[0]))
@ -54,8 +54,8 @@ class prod_orthogonal(kernpart):
self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1) self.k1.K(X[:,:self.k1.D],X2[:,:self.k1.D],K1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2) self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
self.k1.dK_dtheta(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam]) self.k1.dK_dtheta(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target[:self.k1.Nparam])
self.k2.dK_dtheta(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:]) self.k2.dK_dtheta(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target[self.k1.Nparam:])
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
@ -65,15 +65,15 @@ class prod_orthogonal(kernpart):
self.k2.Kdiag(X[:,self.k1.D:],target2) self.k2.Kdiag(X[:,self.k1.D:],target2)
target += target1 * target2 target += target1 * target2
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
K1 = np.zeros(X.shape[0]) K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,:self.k1.D],K1) self.k1.Kdiag(X[:,:self.k1.D],K1)
self.k2.Kdiag(X[:,self.k1.D:],K2) self.k2.Kdiag(X[:,self.k1.D:],K2)
self.k1.dKdiag_dtheta(partial*K2,X[:,:self.k1.D],target[:self.k1.Nparam]) self.k1.dKdiag_dtheta(dL_dKdiag*K2,X[:,:self.k1.D],target[:self.k1.Nparam])
self.k2.dKdiag_dtheta(partial*K1,X[:,self.k1.D:],target[self.k1.Nparam:]) self.k2.dKdiag_dtheta(dL_dKdiag*K1,X[:,self.k1.D:],target[self.k1.Nparam:])
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
if X2 is None: X2 = X if X2 is None: X2 = X
K1 = np.zeros((X.shape[0],X2.shape[0])) K1 = np.zeros((X.shape[0],X2.shape[0]))
@ -81,15 +81,15 @@ class prod_orthogonal(kernpart):
self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1) self.k1.K(X[:,0:self.k1.D],X2[:,0:self.k1.D],K1)
self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2) self.k2.K(X[:,self.k1.D:],X2[:,self.k1.D:],K2)
self.k1.dK_dX(partial*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target) self.k1.dK_dX(dL_dK*K2, X[:,:self.k1.D], X2[:,:self.k1.D], target)
self.k2.dK_dX(partial*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target) self.k2.dK_dX(dL_dK*K1, X[:,self.k1.D:], X2[:,self.k1.D:], target)
def dKdiag_dX(self, partial, X, target): def dKdiag_dX(self, dL_dKdiag, X, target):
K1 = np.zeros(X.shape[0]) K1 = np.zeros(X.shape[0])
K2 = np.zeros(X.shape[0]) K2 = np.zeros(X.shape[0])
self.k1.Kdiag(X[:,0:self.k1.D],K1) self.k1.Kdiag(X[:,0:self.k1.D],K1)
self.k2.Kdiag(X[:,self.k1.D:],K2) self.k2.Kdiag(X[:,self.k1.D:],K2)
self.k1.dK_dX(partial*K2, X[:,:self.k1.D], target) self.k1.dK_dX(dL_dKdiag*K2, X[:,:self.k1.D], target)
self.k2.dK_dX(partial*K1, X[:,self.k1.D:], target) self.k2.dK_dX(dL_dKdiag*K1, X[:,self.k1.D:], target)

View file

@ -82,27 +82,27 @@ class rbf(kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
np.add(target,self.variance,target) np.add(target,self.variance,target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
self._K_computations(X,X2) self._K_computations(X,X2)
target[0] += np.sum(self._K_dvar*partial) target[0] += np.sum(self._K_dvar*dL_dK)
if self.ARD == True: if self.ARD == True:
dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale dl = self._K_dvar[:,:,None]*self.variance*self._K_dist2/self.lengthscale
target[1:] += (dl*partial[:,:,None]).sum(0).sum(0) target[1:] += (dl*dL_dK[:,:,None]).sum(0).sum(0)
else: else:
target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*partial) target[1] += np.sum(self._K_dvar*self.variance*(self._K_dist2.sum(-1))/self.lengthscale*dL_dK)
#np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*partial) #np.sum(self._K_dvar*self.variance*self._K_dist2/self.lengthscale*dL_dK)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
#NB: derivative of diagonal elements wrt lengthscale is 0 #NB: derivative of diagonal elements wrt lengthscale is 0
target[0] += np.sum(partial) target[0] += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
self._K_computations(X,X2) self._K_computations(X,X2)
_K_dist = X[:,None,:]-X2[None,:,:] _K_dist = X[:,None,:]-X2[None,:,:]
dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2)) dK_dX = np.transpose(-self.variance*self._K_dvar[:,:,np.newaxis]*_K_dist/self.lengthscale2,(1,0,2))
target += np.sum(dK_dX*partial.T[:,:,None],0) target += np.sum(dK_dX*dL_dK.T[:,:,None],0)
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass
@ -113,69 +113,69 @@ class rbf(kernpart):
def psi0(self,Z,mu,S,target): def psi0(self,Z,mu,S,target):
target += self.variance target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
target[0] += np.sum(partial) target[0] += np.sum(dL_dpsi0)
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
pass pass
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += self._psi1 target += self._psi1
def dpsi1_dtheta(self,partial,Z,mu,S,target): def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:]) denom_deriv = S[:,None,:]/(self.lengthscale**3+self.lengthscale*S[:,None,:])
d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv) d_length = self._psi1[:,:,None]*(self.lengthscale*np.square(self._psi1_dist/(self.lengthscale2+S[:,None,:])) + denom_deriv)
target[0] += np.sum(partial*self._psi1/self.variance) target[0] += np.sum(dL_dpsi1*self._psi1/self.variance)
dpsi1_dlength = d_length*partial[:,:,None] dpsi1_dlength = d_length*dL_dpsi1[:,:,None]
if not self.ARD: if not self.ARD:
target[1] += dpsi1_dlength.sum() target[1] += dpsi1_dlength.sum()
else: else:
target[1:] += dpsi1_dlength.sum(0).sum(0) target[1:] += dpsi1_dlength.sum(0).sum(0)
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
denominator = (self.lengthscale2*(self._psi1_denom)) denominator = (self.lengthscale2*(self._psi1_denom))
dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator)) dpsi1_dZ = - self._psi1[:,:,None] * ((self._psi1_dist/denominator))
target += np.sum(partial.T[:,:,None] * dpsi1_dZ, 0) target += np.sum(dL_dpsi1.T[:,:,None] * dpsi1_dZ, 0)
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom tmp = self._psi1[:,:,None]/self.lengthscale2/self._psi1_denom
target_mu += np.sum(partial.T[:, :, None]*tmp*self._psi1_dist,1) target_mu += np.sum(dL_dpsi1.T[:, :, None]*tmp*self._psi1_dist,1)
target_S += np.sum(partial.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1) target_S += np.sum(dL_dpsi1.T[:, :, None]*0.5*tmp*(self._psi1_dist_sq-1),1)
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
target += self._psi2 target += self._psi2
def dpsi2_dtheta(self,partial,Z,mu,S,target): def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
"""Shape N,M,M,Ntheta""" """Shape N,M,M,Ntheta"""
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
d_var = 2.*self._psi2/self.variance d_var = 2.*self._psi2/self.variance
d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom) d_length = self._psi2[:,:,:,None]*(0.5*self._psi2_Zdist_sq*self._psi2_denom + 2.*self._psi2_mudist_sq + 2.*S[:,None,None,:]/self.lengthscale2)/(self.lengthscale*self._psi2_denom)
target[0] += np.sum(partial*d_var) target[0] += np.sum(dL_dpsi2*d_var)
dpsi2_dlength = d_length*partial[:,:,:,None] dpsi2_dlength = d_length*dL_dpsi2[:,:,:,None]
if not self.ARD: if not self.ARD:
target[1] += dpsi2_dlength.sum() target[1] += dpsi2_dlength.sum()
else: else:
target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0) target[1:] += dpsi2_dlength.sum(0).sum(0).sum(0)
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q term1 = 0.5*self._psi2_Zdist/self.lengthscale2 # M, M, Q
term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q term2 = self._psi2_mudist/self._psi2_denom/self.lengthscale2 # N, M, M, Q
dZ = self._psi2[:,:,:,None] * (term1[None] + term2) dZ = self._psi2[:,:,:,None] * (term1[None] + term2)
target += (partial[:,:,:,None]*dZ).sum(0).sum(0) target += (dL_dpsi2[:,:,:,None]*dZ).sum(0).sum(0)
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
"""Think N,M,M,Q """ """Think N,M,M,Q """
self._psi_computations(Z,mu,S) self._psi_computations(Z,mu,S)
tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom tmp = self._psi2[:,:,:,None]/self.lengthscale2/self._psi2_denom
target_mu += (partial[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1) target_mu += (dL_dpsi2[:,:,:,None]*-tmp*2.*self._psi2_mudist).sum(1).sum(1)
target_S += (partial[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1) target_S += (dL_dpsi2[:,:,:,None]*tmp*(2.*self._psi2_mudist_sq-1)).sum(1).sum(1)
#---------------------------------------# #---------------------------------------#

View file

@ -51,7 +51,7 @@ class symmetric(kernpart):
self.k.K(X,AX2,target) self.k.K(X,AX2,target)
self.k.K(AX,AX2,target) self.k.K(AX,AX2,target)
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to the parameters.""" """derivative of the covariance matrix with respect to the parameters."""
AX = np.dot(X,self.transform) AX = np.dot(X,self.transform)
if X2 is None: if X2 is None:
@ -59,13 +59,13 @@ class symmetric(kernpart):
ZX2 = AX ZX2 = AX
else: else:
AX2 = np.dot(X2, self.transform) AX2 = np.dot(X2, self.transform)
self.k.dK_dtheta(partial,X,X2,target) self.k.dK_dtheta(dL_dK,X,X2,target)
self.k.dK_dtheta(partial,AX,X2,target) self.k.dK_dtheta(dL_dK,AX,X2,target)
self.k.dK_dtheta(partial,X,AX2,target) self.k.dK_dtheta(dL_dK,X,AX2,target)
self.k.dK_dtheta(partial,AX,AX2,target) self.k.dK_dtheta(dL_dK,AX,AX2,target)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
"""derivative of the covariance matrix with respect to X.""" """derivative of the covariance matrix with respect to X."""
AX = np.dot(X,self.transform) AX = np.dot(X,self.transform)
if X2 is None: if X2 is None:
@ -73,10 +73,10 @@ class symmetric(kernpart):
ZX2 = AX ZX2 = AX
else: else:
AX2 = np.dot(X2, self.transform) AX2 = np.dot(X2, self.transform)
self.k.dK_dX(partial, X, X2, target) self.k.dK_dX(dL_dK, X, X2, target)
self.k.dK_dX(partial, AX, X2, target) self.k.dK_dX(dL_dK, AX, X2, target)
self.k.dK_dX(partial, X, AX2, target) self.k.dK_dX(dL_dK, X, AX2, target)
self.k.dK_dX(partial, AX ,AX2, target) self.k.dK_dX(dL_dK, AX ,AX2, target)
def Kdiag(self,X,target): def Kdiag(self,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
@ -84,9 +84,9 @@ class symmetric(kernpart):
self.K(X,X,foo) self.K(X,X,foo)
target += np.diag(foo) target += np.diag(foo)
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
raise NotImplementedError raise NotImplementedError
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
"""Compute the diagonal of the covariance matrix associated to X.""" """Compute the diagonal of the covariance matrix associated to X."""
raise NotImplementedError raise NotImplementedError

View file

@ -37,50 +37,50 @@ class white(kernpart):
def Kdiag(self,X,target): def Kdiag(self,X,target):
target += self.variance target += self.variance
def dK_dtheta(self,partial,X,X2,target): def dK_dtheta(self,dL_dK,X,X2,target):
if X.shape==X2.shape: if X.shape==X2.shape:
if np.all(X==X2): if np.all(X==X2):
target += np.trace(partial) target += np.trace(dL_dK)
def dKdiag_dtheta(self,partial,X,target): def dKdiag_dtheta(self,dL_dKdiag,X,target):
target += np.sum(partial) target += np.sum(dL_dKdiag)
def dK_dX(self,partial,X,X2,target): def dK_dX(self,dL_dK,X,X2,target):
pass pass
def dKdiag_dX(self,partial,X,target): def dKdiag_dX(self,dL_dKdiag,X,target):
pass pass
def psi0(self,Z,mu,S,target): def psi0(self,Z,mu,S,target):
target += self.variance target += self.variance
def dpsi0_dtheta(self,partial,Z,mu,S,target): def dpsi0_dtheta(self,dL_dpsi0,Z,mu,S,target):
target += partial.sum() target += dL_dpsi0.sum()
def dpsi0_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi0_dmuS(self,dL_dpsi0,Z,mu,S,target_mu,target_S):
pass pass
def psi1(self,Z,mu,S,target): def psi1(self,Z,mu,S,target):
pass pass
def dpsi1_dtheta(self,partial,Z,mu,S,target): def dpsi1_dtheta(self,dL_dpsi1,Z,mu,S,target):
pass pass
def dpsi1_dZ(self,partial,Z,mu,S,target): def dpsi1_dZ(self,dL_dpsi1,Z,mu,S,target):
pass pass
def dpsi1_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi1_dmuS(self,dL_dpsi1,Z,mu,S,target_mu,target_S):
pass pass
def psi2(self,Z,mu,S,target): def psi2(self,Z,mu,S,target):
pass pass
def dpsi2_dZ(self,partial,Z,mu,S,target): def dpsi2_dZ(self,dL_dpsi2,Z,mu,S,target):
pass pass
def dpsi2_dtheta(self,partial,Z,mu,S,target): def dpsi2_dtheta(self,dL_dpsi2,Z,mu,S,target):
pass pass
def dpsi2_dmuS(self,partial,Z,mu,S,target_mu,target_S): def dpsi2_dmuS(self,dL_dpsi2,Z,mu,S,target_mu,target_S):
pass pass

View file

@ -17,7 +17,7 @@ class EP(likelihood):
self.epsilon = epsilon self.epsilon = epsilon
self.eta, self.delta = power_ep self.eta, self.delta = power_ep
self.data = data self.data = data
self.N = self.data.size self.N, self.D = self.data.shape
self.is_heteroscedastic = True self.is_heteroscedastic = True
self.Nparams = 0 self.Nparams = 0
@ -29,7 +29,7 @@ class EP(likelihood):
#initial values for the GP variables #initial values for the GP variables
self.Y = np.zeros((self.N,1)) self.Y = np.zeros((self.N,1))
self.covariance_matrix = np.eye(self.N) self.covariance_matrix = np.eye(self.N)
self.precision = np.ones(self.N) self.precision = np.ones(self.N)[:,None]
self.Z = 0 self.Z = 0
self.YYT = None self.YYT = None
@ -54,18 +54,14 @@ class EP(likelihood):
self.Y = mu_tilde[:,None] self.Y = mu_tilde[:,None]
self.YYT = np.dot(self.Y,self.Y.T) self.YYT = np.dot(self.Y,self.Y.T)
self.precision = self.tau_tilde self.covariance_matrix = np.diag(1./self.tau_tilde)
self.covariance_matrix = np.diag(1./self.precision) self.precision = self.tau_tilde[:,None]
def fit_full(self,K): def fit_full(self,K):
""" """
The expectation-propagation algorithm. The expectation-propagation algorithm.
For nomenclature see Rasmussen & Williams 2006. For nomenclature see Rasmussen & Williams 2006.
""" """
#Prior distribution parameters: p(f|X) = N(f|0,K)
self.tau_tilde = np.zeros(self.N)
self.v_tilde = np.zeros(self.N)
#Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma) #Initial values - Posterior distribution parameters: q(f|X,Y) = N(f|mu,Sigma)
mu = np.zeros(self.N) mu = np.zeros(self.N)
Sigma = K.copy() Sigma = K.copy()
@ -114,7 +110,7 @@ class EP(likelihood):
Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K Sroot_tilde_K = np.sqrt(self.tau_tilde)[:,None]*K
B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K B = np.eye(self.N) + np.sqrt(self.tau_tilde)[None,:]*Sroot_tilde_K
L = jitchol(B) L = jitchol(B)
V,info = linalg.flapack.dtrtrs(L,Sroot_tilde_K,lower=1) V,info = linalg.lapack.flapack.dtrtrs(L,Sroot_tilde_K,lower=1)
Sigma = K - np.dot(V.T,V) Sigma = K - np.dot(V.T,V)
mu = np.dot(Sigma,self.v_tilde) mu = np.dot(Sigma,self.v_tilde)
epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-self.np1[-1])**2)/self.N
@ -124,13 +120,14 @@ class EP(likelihood):
return self._compute_GP_variables() return self._compute_GP_variables()
def fit_DTC(self, Knn_diag, Kmn, Kmm): #def fit_DTC(self, Knn_diag, Kmn, Kmm):
def fit_DTC(self, Kmm, Kmn):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see ... 2013. For nomenclature see ... 2013.
""" """
#TODO: this doesn;t work with uncertain inputs! #TODO: this doesn't work with uncertain inputs!
""" """
Prior approximation parameters: Prior approximation parameters:
@ -158,12 +155,12 @@ class EP(likelihood):
sigma_ = 1./tau_ sigma_ = 1./tau_
mu_ = v_/tau_ mu_ = v_/tau_
""" """
tau_ = np.empty(self.N,dtype=float) self.tau_ = np.empty(self.N,dtype=float)
v_ = np.empty(self.N,dtype=float) self.v_ = np.empty(self.N,dtype=float)
#Initial values - Marginal moments #Initial values - Marginal moments
z = np.empty(self.N,dtype=float) z = np.empty(self.N,dtype=float)
Z_hat = np.empty(self.N,dtype=float) self.Z_hat = np.empty(self.N,dtype=float)
phi = np.empty(self.N,dtype=float) phi = np.empty(self.N,dtype=float)
mu_hat = np.empty(self.N,dtype=float) mu_hat = np.empty(self.N,dtype=float)
sigma2_hat = np.empty(self.N,dtype=float) sigma2_hat = np.empty(self.N,dtype=float)
@ -172,49 +169,50 @@ class EP(likelihood):
epsilon_np1 = 1 epsilon_np1 = 1
epsilon_np2 = 1 epsilon_np2 = 1
self.iterations = 0 self.iterations = 0
np1 = [tau_tilde.copy()] np1 = [self.tau_tilde.copy()]
np2 = [v_tilde.copy()] np2 = [self.v_tilde.copy()]
while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon: while epsilon_np1 > self.epsilon or epsilon_np2 > self.epsilon:
update_order = np.random.permutation(self.N) update_order = np.random.permutation(self.N)
for i in update_order: for i in update_order:
#Cavity distribution parameters #Cavity distribution parameters
tau_[i] = 1./Sigma_diag[i] - self.eta*tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
v_[i] = mu[i]/Sigma_diag[i] - self.eta*v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],tau_[i],v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
tau_tilde[i] = tau_tilde[i] + Delta_tau self.tau_tilde[i] = self.tau_tilde[i] + Delta_tau
v_tilde[i] = v_tilde[i] + Delta_v self.v_tilde[i] = self.v_tilde[i] + Delta_v
#Posterior distribution parameters update #Posterior distribution parameters update
LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau LLT = LLT + np.outer(Kmn[:,i],Kmn[:,i])*Delta_tau
L = jitchol(LLT) L = jitchol(LLT)
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1) V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
Sigma_diag = np.sum(V*V,-2) Sigma_diag = np.sum(V*V,-2)
si = np.sum(V.T*V[:,i],-1) si = np.sum(V.T*V[:,i],-1)
mu = mu + (Delta_v-Delta_tau*mu[i])*si mu = mu + (Delta_v-Delta_tau*mu[i])*si
self.iterations += 1 self.iterations += 1
#Sigma recomputation with Cholesky decompositon #Sigma recomputation with Cholesky decompositon
LLT0 = LLT0 + np.dot(Kmn*tau_tilde[None,:],Kmn.T) LLT0 = LLT0 + np.dot(Kmn*self.tau_tilde[None,:],Kmn.T)
L = jitchol(LLT) L = jitchol(LLT)
V,info = linalg.flapack.dtrtrs(L,Kmn,lower=1) V,info = linalg.lapack.flapack.dtrtrs(L,Kmn,lower=1)
V2,info = linalg.flapack.dtrtrs(L.T,V,lower=0) V2,info = linalg.lapack.flapack.dtrtrs(L.T,V,lower=0)
Sigma_diag = np.sum(V*V,-2) Sigma_diag = np.sum(V*V,-2)
Knmv_tilde = np.dot(Kmn,v_tilde) Knmv_tilde = np.dot(Kmn,self.v_tilde)
mu = np.dot(V2.T,Knmv_tilde) mu = np.dot(V2.T,Knmv_tilde)
epsilon_np1 = sum((tau_tilde-np1[-1])**2)/self.N epsilon_np1 = sum((self.tau_tilde-np1[-1])**2)/self.N
epsilon_np2 = sum((v_tilde-np2[-1])**2)/self.N epsilon_np2 = sum((self.v_tilde-np2[-1])**2)/self.N
np1.append(tau_tilde.copy()) np1.append(self.tau_tilde.copy())
np2.append(v_tilde.copy()) np2.append(self.v_tilde.copy())
self._compute_GP_variables() self._compute_GP_variables()
def fit_FITC(self, Knn_diag, Kmn): def fit_FITC(self, Kmm, Kmn, Knn_diag):
""" """
The expectation-propagation algorithm with sparse pseudo-input. The expectation-propagation algorithm with sparse pseudo-input.
For nomenclature see Naish-Guzman and Holden, 2008. For nomenclature see Naish-Guzman and Holden, 2008.
""" """
M = Kmm.shape[0]
""" """
Prior approximation parameters: Prior approximation parameters:
@ -235,7 +233,7 @@ class EP(likelihood):
mu = w + P*gamma mu = w + P*gamma
""" """
self.w = np.zeros(self.N) self.w = np.zeros(self.N)
self.gamma = np.zeros(self.M) self.gamma = np.zeros(M)
mu = np.zeros(self.N) mu = np.zeros(self.N)
P = P0.copy() P = P0.copy()
R = R0.copy() R = R0.copy()
@ -271,7 +269,7 @@ class EP(likelihood):
self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i] self.tau_[i] = 1./Sigma_diag[i] - self.eta*self.tau_tilde[i]
self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i] self.v_[i] = mu[i]/Sigma_diag[i] - self.eta*self.v_tilde[i]
#Marginal moments #Marginal moments
self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(data[i],self.tau_[i],self.v_[i]) self.Z_hat[i], mu_hat[i], sigma2_hat[i] = self.likelihood_function.moments_match(self.data[i],self.tau_[i],self.v_[i])
#Site parameters update #Site parameters update
Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i]) Delta_tau = self.delta/self.eta*(1./sigma2_hat[i] - 1./Sigma_diag[i])
Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i]) Delta_v = self.delta/self.eta*(mu_hat[i]/sigma2_hat[i] - mu[i]/Sigma_diag[i])
@ -281,10 +279,10 @@ class EP(likelihood):
dtd1 = Delta_tau*Diag[i] + 1. dtd1 = Delta_tau*Diag[i] + 1.
dii = Diag[i] dii = Diag[i]
Diag[i] = dii - (Delta_tau * dii**2.)/dtd1 Diag[i] = dii - (Delta_tau * dii**2.)/dtd1
pi_ = P[i,:].reshape(1,self.M) pi_ = P[i,:].reshape(1,M)
P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_ P[i,:] = pi_ - (Delta_tau*dii)/dtd1 * pi_
Rp_i = np.dot(R,pi_.T) Rp_i = np.dot(R,pi_.T)
RTR = np.dot(R.T,np.dot(np.eye(self.M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R)) RTR = np.dot(R.T,np.dot(np.eye(M) - Delta_tau/(1.+Delta_tau*Sigma_diag[i]) * np.dot(Rp_i,Rp_i.T),R))
R = jitchol(RTR).T R = jitchol(RTR).T
self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1 self.w[i] = self.w[i] + (Delta_v - Delta_tau*self.w[i])*dii/dtd1
self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T) self.gamma = self.gamma + (Delta_v - Delta_tau*mu[i])*np.dot(RTR,P[i,:].T)
@ -296,8 +294,8 @@ class EP(likelihood):
Diag = Diag0/(1.+ Diag0 * self.tau_tilde) Diag = Diag0/(1.+ Diag0 * self.tau_tilde)
P = (Diag / Diag0)[:,None] * P0 P = (Diag / Diag0)[:,None] * P0
RPT0 = np.dot(R0,P0.T) RPT0 = np.dot(R0,P0.T)
L = jitchol(np.eye(self.M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T)) L = jitchol(np.eye(M) + np.dot(RPT0,(1./Diag0 - Diag/(Diag0**2))[:,None]*RPT0.T))
R,info = linalg.flapack.dtrtrs(L,R0,lower=1) R,info = linalg.lapack.flapack.dtrtrs(L,R0,lower=1)
RPT = np.dot(R,P.T) RPT = np.dot(R,P.T)
Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1) Sigma_diag = Diag + np.sum(RPT.T*RPT.T,-1)
self.w = Diag * self.v_tilde self.w = Diag * self.v_tilde

View file

@ -37,8 +37,8 @@ class probit(likelihood_function):
:param tau_i: precision of the cavity distribution (float) :param tau_i: precision of the cavity distribution (float)
:param v_i: mean/variance of the cavity distribution (float) :param v_i: mean/variance of the cavity distribution (float)
""" """
# TODO: some version of assert np.sum(np.abs(Y)-1) == 0, "Output values must be either -1 or 1" if data_i == 0: data_i = -1 #NOTE Binary classification algorithm works better with classes {-1,1}, 1D-plotting works better with classes {0,1}.
if data_i == 0: data_i = -1 #NOTE Binary classification works better classes {-1,1}, 1D-plotting works better with classes {0,1}. # TODO: some version of assert
z = data_i*v_i/np.sqrt(tau_i**2 + tau_i) z = data_i*v_i/np.sqrt(tau_i**2 + tau_i)
Z_hat = stats.norm.cdf(z) Z_hat = stats.norm.cdf(z)
phi = stats.norm.pdf(z) phi = stats.norm.pdf(z)

View file

@ -129,7 +129,7 @@ class GP(model):
For the likelihood parameters, pass in alpha = K^-1 y For the likelihood parameters, pass in alpha = K^-1 y
""" """
return np.hstack((self.kern.dK_dtheta(partial=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK)))) return np.hstack((self.kern.dK_dtheta(dL_dK=self.dL_dK,X=self.X,slices1=self.Xslices,slices2=self.Xslices), self.likelihood._gradients(partial=np.diag(self.dL_dK))))
def _raw_predict(self,_Xnew,slices=None, full_cov=False): def _raw_predict(self,_Xnew,slices=None, full_cov=False):
""" """
@ -269,6 +269,8 @@ class GP(model):
if hasattr(self,'Z'): if hasattr(self,'Z'):
Zu = self.Z*self._Xstd + self._Xmean Zu = self.Z*self._Xstd + self._Xmean
pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12) pb.plot(Zu,Zu*0+pb.ylim()[0],'r|',mew=1.5,markersize=12)
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
elif self.X.shape[1]==2: #FIXME elif self.X.shape[1]==2: #FIXME
resolution = resolution or 50 resolution = resolution or 50
@ -281,5 +283,8 @@ class GP(model):
pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.) pb.scatter(self.X[:,0], self.X[:,1], 40, Yf, cmap=pb.cm.jet,vmin=m.min(),vmax=m.max(), linewidth=0.)
pb.xlim(xmin[0],xmax[0]) pb.xlim(xmin[0],xmax[0])
pb.ylim(xmin[1],xmax[1]) pb.ylim(xmin[1],xmax[1])
if hasattr(self,'Z'):
pb.plot(self.Z[:,0],self.Z[:,1],'wo')
else: else:
raise NotImplementedError, "Cannot define a frame with more than two input dimensions" raise NotImplementedError, "Cannot define a frame with more than two input dimensions"

View file

@ -72,7 +72,7 @@ class sparse_GP(GP):
self.psi2 = None self.psi2 = None
def _computations(self): def _computations(self):
# TODO find routine to multiply triangular matrices #TODO: find routine to multiply triangular matrices
#TODO: slices for psi statistics (easy enough) #TODO: slices for psi statistics (easy enough)
sf = self.scale_factor sf = self.scale_factor
@ -82,9 +82,9 @@ class sparse_GP(GP):
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs? assert self.likelihood.D == 1 #TODO: what is the likelihood is heterscedatic and there are multiple independent outputs?
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.reshape(self.N,1,1)/sf2)).sum(0) self.psi2_beta_scaled = (self.psi2*(self.likelihood.precision.flatten().reshape(self.N,1,1)/sf2)).sum(0)
else: else:
tmp = self.psi1.T*(np.sqrt(self.likelihood.precision.reshape(1,self.N))/sf) tmp = self.psi1*(np.sqrt(self.likelihood.precision.flatten().reshape(1,self.N))/sf)
self.psi2_beta_scaled = np.dot(tmp,tmp.T) self.psi2_beta_scaled = np.dot(tmp,tmp.T)
else: else:
if self.has_uncertain_inputs: if self.has_uncertain_inputs:
@ -106,15 +106,19 @@ class sparse_GP(GP):
self.C = mdot(self.Lmi.T, self.Bi, self.Lmi) self.C = mdot(self.Lmi.T, self.Bi, self.Lmi)
self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T) self.E = mdot(self.C, self.psi1VVpsi1/sf2, self.C.T)
# Compute dL_dpsi # FIXME: this is untested for the het. case # Compute dL_dpsi # FIXME: this is untested for the heterscedastic + uncertin inputs case
self.dL_dpsi0 = - 0.5 * self.D * self.likelihood.precision * np.ones(self.N) self.dL_dpsi0 = - 0.5 * self.D * (self.likelihood.precision * np.ones([self.N,1])).flatten()
self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T self.dL_dpsi1 = mdot(self.V, self.psi1V.T,self.C).T
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB if self.has_uncertain_inputs:
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC self.dL_dpsi2 = 0.5 * self.likelihood.precision[:,None,None] * self.D * self.Kmmi[None,:,:] # dB
self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]/sf2 * self.D * self.C[None,:,:] # dC
if not self.has_uncertain_inputs: self.dL_dpsi2 += - 0.5 * self.likelihood.precision[:,None,None]* self.E[None,:,:] # dD
raise NotImplementedError, "TODO: recaste derivatibes in psi2 back into psi1" else:
self.dL_dpsi1 += mdot(self.Kmmi,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dB
self.dL_dpsi1 += -mdot(self.C,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)/sf2) #dC
self.dL_dpsi1 += -mdot(self.E,self.psi1*self.likelihood.precision.flatten().reshape(1,self.N)) #dD
self.dL_dpsi2 = None
else: else:
self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB self.dL_dpsi2 = 0.5 * self.likelihood.precision * self.D * self.Kmmi # dB
@ -166,14 +170,29 @@ class sparse_GP(GP):
def _get_param_names(self): def _get_param_names(self):
return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self) return sum([['iip_%i_%i'%(i,j) for j in range(self.Z.shape[1])] for i in range(self.Z.shape[0])],[]) + GP._get_param_names(self)
def update_likelihood_approximation(self):
"""
Approximates a non-gaussian likelihood using Expectation Propagation
For a Gaussian (or direct: TODO) likelihood, no iteration is required:
this function does nothing
"""
if self.has_uncertain_inputs:
raise NotImplementedError, "EP approximation not implemented for uncertain inputs"
else:
self.likelihood.fit_DTC(self.Kmm,self.psi1)
#self.likelihood.fit_FITC(self.Kmm,self.psi1,self.psi0)
self._set_params(self._get_params()) # update the GP
def log_likelihood(self): def log_likelihood(self):
""" Compute the (lower bound on the) log marginal likelihood """ """ Compute the (lower bound on the) log marginal likelihood """
sf2 = self.scale_factor**2 sf2 = self.scale_factor**2
if self.likelihood.is_heteroscedastic: if self.likelihood.is_heteroscedastic:
A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y) A = -0.5*self.N*self.D*np.log(2.*np.pi) +0.5*np.sum(np.log(self.likelihood.precision)) -0.5*np.sum(self.V*self.likelihood.Y)
B = -0.5*self.D*(np.sum(self.likelihood.precision.flatten()*self.psi0) - np.trace(self.A)*sf2)
else: else:
A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT A = -0.5*self.N*self.D*(np.log(2.*np.pi) - np.log(self.likelihood.precision)) -0.5*self.likelihood.precision*self.likelihood.trYYT
B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2) B = -0.5*self.D*(np.sum(self.likelihood.precision*self.psi0) - np.trace(self.A)*sf2)
C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2)) C = -0.5*self.D * (self.B_logdet + self.M*np.log(sf2))
D = +0.5*np.sum(self.psi1VVpsi1 * self.C) D = +0.5*np.sum(self.psi1VVpsi1 * self.C)
return A+B+C+D return A+B+C+D
@ -221,14 +240,3 @@ class sparse_GP(GP):
var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0) var = Kxx - np.sum(Kx*np.dot(self.Kmmi - self.C/self.scale_factor**2, Kx),0)
return mu,var[:,None] return mu,var[:,None]
def plot(self, *args, **kwargs):
"""
Plot the fitted model: just call the GP plot function and then add inducing inputs
"""
GP.plot(self,*args,**kwargs)
if self.Q==1:
if self.has_uncertain_inputs:
pb.errorbar(self.X[:,0], pb.ylim()[0]+np.zeros(self.N), xerr=2*np.sqrt(self.X_uncertainty.flatten()))
if self.Q==2:
pb.plot(self.Z[:,0],self.Z[:,1],'wo')

View file

@ -43,7 +43,7 @@ class sparse_GPLVM(sparse_GP_regression, GPLVM):
def dL_dX(self): def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X) dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0,self.X)
dL_dX += self.kern.dK_dX(self.dL_dpsi1,self.X,self.Z) dL_dX += self.kern.dK_dX(self.dL_dpsi1.T,self.X,self.Z)
return dL_dX return dL_dX

View file

@ -0,0 +1,47 @@
# Copyright (c) 2012, Nicolo Fusi
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class GPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.GPLVM(Y, Q, kernel = k)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
def test_linear_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.GPLVM(Y, Q, kernel = k)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.GPLVM(Y, Q, kernel = k)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -0,0 +1,48 @@
# Copyright (c) 2012, Nicolo Fusi, James Hensman
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import unittest
import numpy as np
import GPy
class sparse_GPLVMTests(unittest.TestCase):
def test_bias_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.bias(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
@unittest.skip('linear kernels do not have dKdiag_dX')
def test_linear_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.linear(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
def test_rbf_kern(self):
N, M, Q, D = 10, 3, 2, 4
X = np.random.rand(N, Q)
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
K = k.K(X)
Y = np.random.multivariate_normal(np.zeros(N),K,D).T
k = GPy.kern.rbf(Q) + GPy.kern.white(Q, 0.00001)
m = GPy.models.sparse_GPLVM(Y, Q, kernel = k, M=M)
m.ensure_default_constraints()
m.randomize()
self.assertTrue(m.checkgrad())
if __name__ == "__main__":
print "Running unit tests, please be (very) patient..."
unittest.main()

View file

@ -157,13 +157,28 @@ class GradientTests(unittest.TestCase):
def test_GP_EP_probit(self): def test_GP_EP_probit(self):
N = 20 N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None] X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.repeat(-1,N/2)])[:,None] Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
kernel = GPy.kern.rbf(1) kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.probit() distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y, distribution) likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.GP(X, likelihood, kernel) m = GPy.models.GP(X, likelihood, kernel)
m.ensure_default_constraints() m.ensure_default_constraints()
self.assertTrue(m.EPEM) m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
#self.assertTrue(m.EPEM)
def test_sparse_EP_DTC_probit(self):
N = 20
X = np.hstack([np.random.normal(5,2,N/2),np.random.normal(10,2,N/2)])[:,None]
Y = np.hstack([np.ones(N/2),np.zeros(N/2)])[:,None]
Z = np.linspace(0,15,4)[:,None]
kernel = GPy.kern.rbf(1)
distribution = GPy.likelihoods.likelihood_functions.probit()
likelihood = GPy.likelihoods.EP(Y, distribution)
m = GPy.models.sparse_GP(X, likelihood, kernel,Z)
m.ensure_default_constraints()
m.update_likelihood_approximation()
self.assertTrue(m.checkgrad())
@unittest.skip("FITC will be broken for a while") @unittest.skip("FITC will be broken for a while")
def test_generalized_FITC(self): def test_generalized_FITC(self):

View file

@ -11,7 +11,7 @@ import re
import pdb import pdb
import cPickle import cPickle
import types import types
import scipy.lib.lapack.flapack #import scipy.lib.lapack.flapack
import scipy as sp import scipy as sp
def mdot(*args): def mdot(*args):
@ -101,7 +101,7 @@ def chol_inv(L):
""" """
return linalg.flapack.dtrtri(L, lower = True)[0] return linalg.lapack.flapack.dtrtri(L, lower = True)[0]
def multiple_pdinv(A): def multiple_pdinv(A):
@ -118,7 +118,7 @@ def multiple_pdinv(A):
N = A.shape[-1] N = A.shape[-1]
chols = [jitchol(A[:,:,i]) for i in range(N)] chols = [jitchol(A[:,:,i]) for i in range(N)]
halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols] halflogdets = [np.sum(np.log(np.diag(L[0]))) for L in chols]
invs = [linalg.flapack.dpotri(L[0],True)[0] for L in chols] invs = [linalg.lapack.flapack.dpotri(L[0],True)[0] for L in chols]
invs = [np.triu(I)+np.triu(I,1).T for I in invs] invs = [np.triu(I)+np.triu(I,1).T for I in invs]
return np.dstack(invs),np.array(halflogdets) return np.dstack(invs),np.array(halflogdets)

View file

@ -0,0 +1,17 @@
***************************
List of implemented kernels
***************************
The :math:`\checkmark` symbol represents the functions that have been implemented for each kernel.
.. |tick|
.. |tick| image:: tick.png
====== =========== === ======= =========== =============== ======= =========== ====== ====== =======
NAME get/set K Kdiag dK_dtheta dKdiag_dtheta dK_dX dKdiag_dX psi0 psi1 psi2
====== =========== === ======= =========== =============== ======= =========== ====== ====== =======
rbf \\checkmark y
====== =========== === ======= =========== =============== ======= =========== ====== ====== =======

View file

@ -2,7 +2,7 @@
Gaussian process regression tutorial Gaussian process regression tutorial
************************************* *************************************
We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. We will see in this tutorial the basics for building a 1 dimensional and a 2 dimensional Gaussian process regression model, also known as a kriging model. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_GP_regression.py.
We first import the libraries we will need: :: We first import the libraries we will need: ::

View file

@ -0,0 +1,60 @@
*************************************
Interacting with models
*************************************
The GPy model class has a set of features which are designed to make it simple to explore the parameter space of the model. By default, the scipy optimisers are used to fit GPy models (via model.optimize()), for which we provide mechanisms for 'free' optimisation: GPy can ensure that naturally positive parameters (such as variances) remain positive. But these mechanisms are much more powerful than simple reparameterisation, as we shall see.
All of the examples included in GPy return an instance of a model class. We'll use GPy.examples.?? as an example::
import pylab as pb
pb.ion()
import GPy
m = GPy.examples.??
Examining the model using print
===============================
To see the current state of the model parameters, and the model's (marginal) likelihood just print the model::
print m
?? output
Getting the model's likelihood and gradients
===========================================
foobar
Setting and fetching parameters by name
=======================================
foobar
Constraining and optimising the model
=====================================
A simple task in GPy is to ensure that the models' variances remain positive during optimisation. the models class has a function called constrain_positive(), which accepts a regex string as above. To constrain the models' variance to be positive::
m.constrain_positive('variance')
print m
Now we see that the variance of the model is constrained to be postive. GPy handles the effective change of gradients: see how m.objective_gradients has changed approriately
For convenience, we also provide a catch all function which ensures that anything which appears to require positivity is constrianed appropriately::
m.ensure_default_constraints()
Fixing parameters
=================
Tying Parameters
================
Bounding parameters
===================
Further Reading
===============
All of the mechansiams for dealing with parameters are baked right into GPy.core.model, from which all of the classes in GPy.models inherrit. To learn how to construct your own model, you might want to read ??link?? creating_new_models.
By deafult, GPy uses the tnc optimizer (from scipy.optimize.tnc). To use other optimisers, and to control the setting of those optimisers, as well as other funky features like automated restarts and diagnostics, you can read the optimization tutorial ??link??.

View file

@ -2,6 +2,7 @@
**************************** ****************************
tutorial : A kernel overview tutorial : A kernel overview
**************************** ****************************
The aim of this tutorial is to give a better understanding of the kernel objects in GPy and to list the ones that are already implemented. The code shown in this tutorial can be found without the comments at GPy/examples/tuto_kernel_overview.py.
First we import the libraries we will need :: First we import the libraries we will need ::
@ -38,7 +39,7 @@ return::
Implemented kernels Implemented kernels
=================== ===================
Many kernels are already implemented in GPy. Here is a summary of most of them: Many kernels are already implemented in GPy. A comprehensive list can be found `here <kernel_implementation.html>`_ . The following figure gives a summary of most of them:
.. figure:: Figures/tuto_kern_overview_allkern.png .. figure:: Figures/tuto_kern_overview_allkern.png
:align: center :align: center