bugfix merge!

This commit is contained in:
James Hensman 2014-08-12 11:58:28 +01:00
commit 4139397dd3
37 changed files with 2415 additions and 1975 deletions

View file

@ -22,15 +22,44 @@ def extract_properties_to_index(index, props):
class ParameterIndexOperations(object): class ParameterIndexOperations(object):
''' """
Index operations for storing param index _properties This object wraps a dictionary, whos keys are _operations_ that we'd like
This class enables index with slices retrieved from object.__getitem__ calls. to apply to a parameter array, and whose values are np integer arrays which
Adding an index will add the selected indexes by the slice of an indexarray index the parameter array appropriately.
indexing a shape shaped array to the flattened index array. Remove will
remove the selected slice indices from the flattened array. A model instance will contain one instance of this class for each thing
You can give an offset to set an offset for the given indices in the that needs indexing (i.e. constraints, ties and priors). Parameters within
index array, for multi-param handling. the model constain instances of the ParameterIndexOperationsView class,
''' which can map from a 'local' index (starting 0) to this global index.
Here's an illustration:
#=======================================================================
model : 0 1 2 3 4 5 6 7 8 9
key1: 4 5
key2: 7 8
param1: 0 1 2 3 4 5
key1: 2 3
key2: 5
param2: 0 1 2 3 4
key1: 0
key2: 2 3
#=======================================================================
The views of this global index have a subset of the keys in this global
(model) index.
Adding a new key (e.g. a constraint) to a view will cause the view to pass
the new key to the global index, along with the local index and an offset.
This global index then stores the key and the appropriate global index
(which can be seen by the view).
See also:
ParameterIndexOperationsView
"""
_offset = 0 _offset = 0
def __init__(self, constraints=None): def __init__(self, constraints=None):
self._properties = IntArrayDict() self._properties = IntArrayDict()

View file

@ -0,0 +1,91 @@
# Copyright (c) 2014, James Hensman, Max Zwiessele
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from parameterized import Parameterized
from param import Param
class Remapping(Parameterized):
def mapping(self):
"""
The return value of this function gives the values which the re-mapped
parameters should take. Implement in sub-classes.
"""
raise NotImplementedError
def callback(self):
raise NotImplementedError
def __str__(self):
return self.name
def parameters_changed(self):
#ensure all out parameters have the correct value, as specified by our mapping
index = self._highest_parent_.constraints[self]
self._highest_parent_.param_array[index] = self.mapping()
[p.notify_observers(which=self) for p in self.tied_parameters]
class Fix(Remapping):
pass
class Tie(Remapping):
def __init__(self, value, name):
super(Tie, self).__init__(name)
self.tied_parameters = []
self.value = Param('val', value)
self.add_parameter(self.value)
def add_tied_parameter(self, p):
self.tied_parameters.append(p)
p.add_observer(self, self.callback)
self.parameters_changed()
def callback(self, param=None, which=None):
"""
This gets called whenever any of the tied parameters changes. we spend
considerable effort working out what has changed and to what value.
Then we store that value in self.value, and broadcast it everywhere
with parameters_changed.
"""
if which is self:return
index = self._highest_parent_.constraints[self]
if len(index)==0:
return # nothing to tie together, this tie exists without any tied parameters
self.collate_gradient()
vals = self._highest_parent_.param_array[index]
uvals = np.unique(vals)
if len(uvals)==1:
#all of the tied things are at the same value
if np.all(self.value==uvals[0]):
return # DO NOT DO ANY CHANGES IF THE TIED PART IS NOT CHANGED!
self.value[...] = uvals[0]
elif len(uvals)==2:
#only *one* of the tied things has changed. it must be different to self.value
newval = uvals[uvals != self.value*1]
self.value[...] = newval
else:
#more than one of the tied things changed. panic.
raise ValueError, "something is wrong with the tieing"
def parameters_changed(self):
#ensure all out parameters have the correct value, as specified by our mapping
index = self._highest_parent_.constraints[self]
if np.all(self._highest_parent_.param_array[index]==self.value):
return # STOP TRIGGER THE UPDATE LOOP MULTIPLE TIMES!!!
self._highest_parent_.param_array[index] = self.mapping()
[p.notify_observers(which=self) for p in self.tied_parameters]
self.collate_gradient()
def mapping(self):
return self.value
def collate_gradient(self):
index = self._highest_parent_.constraints[self]
self.value.gradient = np.sum(self._highest_parent_.gradient[index])

View file

@ -195,6 +195,9 @@ class Logistic(Transformation):
self.lower, self.upper = float(lower), float(upper) self.lower, self.upper = float(lower), float(upper)
self.difference = self.upper - self.lower self.difference = self.upper - self.lower
def f(self, x): def f(self, x):
if (x<-300.).any():
x = x.copy()
x[x<-300.] = -300.
return self.lower + self.difference / (1. + np.exp(-x)) return self.lower + self.difference / (1. + np.exp(-x))
def finv(self, f): def finv(self, f):
return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf)) return np.log(np.clip(f - self.lower, 1e-10, np.inf) / np.clip(self.upper - f, 1e-10, np.inf))

View file

@ -34,36 +34,38 @@ class NormalPrior(VariationalPrior):
variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5 variational_posterior.variance.gradient -= (1. - (1. / (variational_posterior.variance))) * 0.5
class SpikeAndSlabPrior(VariationalPrior): class SpikeAndSlabPrior(VariationalPrior):
def __init__(self, pi, variance = 1.0, name='SpikeAndSlabPrior', **kw): def __init__(self, pi=None, learnPi=False, variance = 1.0, name='SpikeAndSlabPrior', **kw):
super(VariationalPrior, self).__init__(name=name, **kw) super(VariationalPrior, self).__init__(name=name, **kw)
assert variance==1.0, "Not Implemented!"
self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10)) self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
self.variance = Param('variance',variance) self.variance = Param('variance',variance)
self.add_parameters(self.pi) self.learnPi = learnPi
self.group_spike_prob = False if learnPi:
self.add_parameters(self.pi)
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
var_mean = np.square(mu) var_mean = np.square(mu)/self.variance
var_S = (S - np.log(S)) var_S = (S/self.variance - np.log(S))
var_gamma = (gamma*np.log(gamma/self.pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-self.pi))).sum() var_gamma = (gamma*np.log(gamma/self.pi)).sum()+((1-gamma)*np.log((1-gamma)/(1-self.pi))).sum()
return var_gamma+ 0.5 * (gamma* (var_mean + var_S -1)).sum() return var_gamma+ (gamma* (np.log(self.variance)-1. +var_mean + var_S)).sum()/2.
def update_gradients_KL(self, variational_posterior): def update_gradients_KL(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
if self.group_spike_prob: gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+((np.square(mu)+S)/self.variance-np.log(S)+np.log(self.variance)-1.)/2.
gamma_grad = np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. mu.gradient -= gamma*mu/self.variance
gamma.gradient -= gamma_grad.mean(axis=0) S.gradient -= (1./self.variance - 1./S) * gamma /2.
else: if self.learnPi:
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. if len(self.pi)==1:
mu.gradient -= gamma*mu self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum()
S.gradient -= (1. - (1. / (S))) * gamma /2. elif len(self.pi.shape)==1:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0) self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
else:
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi))
class VariationalPosterior(Parameterized): class VariationalPosterior(Parameterized):
def __init__(self, means=None, variances=None, name='latent space', *a, **kw): def __init__(self, means=None, variances=None, name='latent space', *a, **kw):
@ -155,6 +157,9 @@ class SpikeAndSlabPosterior(VariationalPosterior):
n.parameters[dc['mean']._parent_index_] = dc['mean'] n.parameters[dc['mean']._parent_index_] = dc['mean']
n.parameters[dc['variance']._parent_index_] = dc['variance'] n.parameters[dc['variance']._parent_index_] = dc['variance']
n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob'] n.parameters[dc['binary_prob']._parent_index_] = dc['binary_prob']
n._gradient_array_ = None
oversize = self.size - self.mean.size - self.variance.size
n.size = n.mean.size + n.variance.size + oversize
n.ndim = n.mean.ndim n.ndim = n.mean.ndim
n.shape = n.mean.shape n.shape = n.mean.shape
n.num_data = n.mean.shape[0] n.num_data = n.mean.shape[0]
@ -163,7 +168,7 @@ class SpikeAndSlabPosterior(VariationalPosterior):
else: else:
return super(VariationalPrior, self).__getitem__(s) return super(VariationalPrior, self).__getitem__(s)
def plot(self, *args): def plot(self, *args, **kwargs):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
@ -172,4 +177,4 @@ class SpikeAndSlabPosterior(VariationalPosterior):
import sys import sys
assert "matplotlib" in sys.modules, "matplotlib package has not been imported." assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ...plotting.matplot_dep import variational_plots from ...plotting.matplot_dep import variational_plots
return variational_plots.plot_SpikeSlab(self,*args) return variational_plots.plot_SpikeSlab(self,*args, **kwargs)

View file

@ -118,5 +118,3 @@ class SparseGP(GP):
psi2 = kern.psi2(self.Z, Xnew) psi2 = kern.psi2(self.Z, Xnew)
var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1) var = Kxx - np.sum(np.sum(psi2 * Kmmi_LmiBLmi[None, :, :], 1), 1)
return mu, var return mu, var

View file

@ -183,6 +183,33 @@ def bgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40,
plt.close(fig) plt.close(fig)
return m return m
def ssgplvm_oil(optimize=True, verbose=1, plot=True, N=200, Q=7, num_inducing=40, max_iters=1000, **k):
import GPy
from matplotlib import pyplot as plt
from ..util.misc import param_to_array
import numpy as np
_np.random.seed(0)
data = GPy.util.datasets.oil()
kernel = GPy.kern.RBF(Q, 1., 1./_np.random.uniform(0,1,(Q,)), ARD=True)# + GPy.kern.Bias(Q, _np.exp(-2))
Y = data['X'][:N]
m = GPy.models.SSGPLVM(Y, Q, kernel=kernel, num_inducing=num_inducing, **k)
m.data_labels = data['Y'][:N].argmax(axis=1)
if optimize:
m.optimize('bfgs', messages=verbose, max_iters=max_iters, gtol=.05)
if plot:
fig, (latent_axes, sense_axes) = plt.subplots(1, 2)
m.plot_latent(ax=latent_axes, labels=m.data_labels)
data_show = GPy.plotting.matplot_dep.visualize.vector_show((m.Y[0,:]))
lvm_visualizer = GPy.plotting.matplot_dep.visualize.lvm_dimselect(param_to_array(m.X.mean)[0:1,:], # @UnusedVariable
m, data_show, latent_axes=latent_axes, sense_axes=sense_axes, labels=m.data_labels)
raw_input('Press enter to finish')
plt.close(fig)
return m
def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False): def _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim=False):
_np.random.seed(1234) _np.random.seed(1234)
@ -288,6 +315,31 @@ def bgplvm_simulation(optimize=True, verbose=1,
m.kern.plot_ARD('BGPLVM Simulation ARD Parameters') m.kern.plot_ARD('BGPLVM Simulation ARD Parameters')
return m return m
def ssgplvm_simulation(optimize=True, verbose=1,
plot=True, plot_sim=False,
max_iters=2e4, useGPU=False
):
from GPy import kern
from GPy.models import SSGPLVM
D1, D2, D3, N, num_inducing, Q = 13, 5, 8, 45, 3, 9
_, _, Ylist = _simulate_sincos(D1, D2, D3, N, num_inducing, Q, plot_sim)
Y = Ylist[0]
k = kern.Linear(Q, ARD=True, useGPU=useGPU)# + kern.white(Q, _np.exp(-2)) # + kern.bias(Q)
#k = kern.RBF(Q, ARD=True, lengthscale=10.)
m = SSGPLVM(Y, Q, init="pca", num_inducing=num_inducing, kernel=k)
m.X.variance[:] = _np.random.uniform(0,.01,m.X.shape)
m.likelihood.variance = .1
if optimize:
print "Optimizing model:"
m.optimize('scg', messages=verbose, max_iters=max_iters,
gtol=.05)
if plot:
m.X.plot("SSGPLVM Latent Space 1D")
m.kern.plot_ARD('SSGPLVM Simulation ARD Parameters')
return m
def bgplvm_simulation_missing_data(optimize=True, verbose=1, def bgplvm_simulation_missing_data(optimize=True, verbose=1,
plot=True, plot_sim=False, plot=True, plot_sim=False,
max_iters=2e4, max_iters=2e4,

View file

@ -153,9 +153,14 @@ class Posterior(object):
$$ $$
""" """
if self._woodbury_inv is None: if self._woodbury_inv is None:
self._woodbury_inv, _ = dpotri(self.woodbury_chol, lower=1) if self._woodbury_chol is not None:
#self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1) self._woodbury_inv, _ = dpotri(self._woodbury_chol, lower=1)
symmetrify(self._woodbury_inv) #self._woodbury_inv, _ = dpotrs(self.woodbury_chol, np.eye(self.woodbury_chol.shape[0]), lower=1)
symmetrify(self._woodbury_inv)
elif self._covariance is not None:
B = self._K - self._covariance
tmp, _ = dpotrs(self.K_chol, B)
self._woodbury_inv, _ = dpotrs(self.K_chol, tmp.T)
return self._woodbury_inv return self._woodbury_inv
@property @property

View file

@ -64,20 +64,9 @@ class VarDTC(LatentFunctionInference):
return Y * prec # TODO chache this, and make it effective return Y * prec # TODO chache this, and make it effective
def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None): def inference(self, kern, X, Z, likelihood, Y, Y_metadata=None):
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
psi0 = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X)
psi2 = kern.psi2(Z, X)
else:
uncertain_inputs = False
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
psi2 = None
#see whether we're using variational uncertain inputs
_, output_dim = Y.shape _, output_dim = Y.shape
uncertain_inputs = isinstance(X, VariationalPosterior)
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6) beta = 1./np.fmax(likelihood.gaussian_variance(Y_metadata), 1e-6)
@ -98,23 +87,21 @@ class VarDTC(LatentFunctionInference):
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
# The rather complex computations of A
# The rather complex computations of A, and the psi stats
if uncertain_inputs: if uncertain_inputs:
psi0 = kern.psi0(Z, X)
psi1 = kern.psi1(Z, X)
if het_noise: if het_noise:
psi2_beta = psi2 * (beta.flatten().reshape(num_data, 1, 1)).sum(0) psi2_beta = np.sum([kern.psi2(Z,X[i:i+1,:]) * beta_i for i,beta_i in enumerate(beta)],0)
else: else:
psi2_beta = psi2.sum(0) * beta psi2_beta = kern.psi2(Z,X) * beta
#if 0:
# evals, evecs = linalg.eigh(psi2_beta)
# clipped_evals = np.clip(evals, 0., 1e6) # TODO: make clipping configurable
# if not np.array_equal(evals, clipped_evals):
# pass # print evals
# tmp = evecs * np.sqrt(clipped_evals)
# tmp = tmp.T
# no backsubstitution because of bound explosion on tr(A) if not...
LmInv = dtrtri(Lm) LmInv = dtrtri(Lm)
A = LmInv.dot(psi2_beta.dot(LmInv.T)) A = LmInv.dot(psi2_beta.dot(LmInv.T))
else: else:
psi0 = kern.Kdiag(X)
psi1 = kern.K(X, Z)
psi2 = None
if het_noise: if het_noise:
tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1))) tmp = psi1 * (np.sqrt(beta.reshape(num_data, 1)))
else: else:
@ -151,7 +138,7 @@ class VarDTC(LatentFunctionInference):
log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise, log_marginal = _compute_log_marginal_likelihood(likelihood, num_data, output_dim, beta, het_noise,
psi0, A, LB, trYYT, data_fit, VVT_factor) psi0, A, LB, trYYT, data_fit, VVT_factor)
#put the gradients in the right places #noise derivatives
dL_dR = _compute_dL_dR(likelihood, dL_dR = _compute_dL_dR(likelihood,
het_noise, uncertain_inputs, LB, het_noise, uncertain_inputs, LB,
_LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A, _LBi_Lmi_psi1Vf, DBi_plus_BiPBi, Lm, A,
@ -160,6 +147,7 @@ class VarDTC(LatentFunctionInference):
dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata) dL_dthetaL = likelihood.exact_inference_gradients(dL_dR,Y_metadata)
#put the gradients in the right places
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dKmm': dL_dKmm, grad_dict = {'dL_dKmm': dL_dKmm,
'dL_dpsi0':dL_dpsi0, 'dL_dpsi0':dL_dpsi0,
@ -403,10 +391,7 @@ def _compute_dL_dpsi(num_inducing, num_data, output_dim, beta, Lm, VVT_factor, C
dL_dpsi2 = None dL_dpsi2 = None
else: else:
dL_dpsi2 = beta * dL_dpsi2_beta dL_dpsi2 = beta * dL_dpsi2_beta
if uncertain_inputs: if not uncertain_inputs:
# repeat for each of the N psi_2 matrices
dL_dpsi2 = np.repeat(dL_dpsi2[None, :, :], num_data, axis=0)
else:
# subsume back into psi1 (==Kmn) # subsume back into psi1 (==Kmn)
dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2) dL_dpsi1 += 2.*np.dot(psi1, dL_dpsi2)
dL_dpsi2 = None dL_dpsi2 = None

View file

@ -16,7 +16,7 @@ try:
import scikits.cuda.linalg as culinalg import scikits.cuda.linalg as culinalg
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
from scikits.cuda import cublas from scikits.cuda import cublas
from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod, traceDot
except: except:
pass pass
@ -66,18 +66,13 @@ class VarDTC_GPU(LatentFunctionInference):
'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'), 'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'),
'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN 'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN
'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN 'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN
'psi2_t_gpu' :gpuarray.empty((num_inducing*num_inducing*self.batchsize),np.float64,order='F'),
# inference_minibatch # inference_minibatch
'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), 'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), 'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'), 'dL_dpsi2_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'dL_dthetaL_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'betapsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'thetaL_t_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'betaYT2_gpu' :gpuarray.empty((output_dim,self.batchsize),np.float64,order='F'),
'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), 'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), 'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'psi2p_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'), 'psi2p_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
} }
self.gpuCache['ones_gpu'].fill(1.0) self.gpuCache['ones_gpu'].fill(1.0)
@ -126,6 +121,89 @@ class VarDTC_GPU(LatentFunctionInference):
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs, het_noise):
num_inducing, input_dim = Z.shape[0], Z.shape[1]
num_data, output_dim = Y.shape
trYYT = self._trYYT
psi1Y_gpu = self.gpuCache['psi1Y_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
beta_gpu = self.gpuCache['beta_gpu']
YT_gpu = self.gpuCache['YT_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
beta_gpu.fill(beta)
betaYT_gpu.fill(0.)
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
YRY_full = trYYT*beta
if kern.useGPU:
psi1Y_gpu.fill(0.)
psi2_gpu.fill(0.)
psi0_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
ndata = n_end - n_start
X_slice = X[n_start:n_end]
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1p_gpu = kern.psi1(Z, X_slice)
psi2p_gpu = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
psi0_full += psi0.sum()
if uncertain_inputs:
sum_axis(psi2_gpu,psi2p_gpu,1,1)
else:
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
psi0_full *= beta
if uncertain_inputs:
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
else:
psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0.
YRY_full = 0.
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if het_noise:
b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b
else:
b = beta
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2_full += kern.psi2(Z, X_slice)*b
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi2_full += np.dot(psi1.T,psi1)*b
psi0_full += psi0.sum()*b
psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
if not het_noise:
YRY_full = trYYT*beta
psi1Y_gpu.set(psi1Y_full)
psi2_gpu.set(psi2_full)
return psi0_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y): def inference_likelihood(self, kern, X, Z, likelihood, Y):
""" """
The first phase of inference: The first phase of inference:
@ -137,6 +215,12 @@ class VarDTC_GPU(LatentFunctionInference):
num_inducing, input_dim = Z.shape[0], Z.shape[1] num_inducing, input_dim = Z.shape[0], Z.shape[1]
num_data, output_dim = Y.shape num_data, output_dim = Y.shape
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
if het_noise:
self.batchsize=0
self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y) self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
@ -144,123 +228,10 @@ class VarDTC_GPU(LatentFunctionInference):
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
trYYT = self._trYYT
psi1Y_gpu = self.gpuCache['psi1Y_gpu'] psi1Y_gpu = self.gpuCache['psi1Y_gpu']
psi2_gpu = self.gpuCache['psi2_gpu'] psi2_gpu = self.gpuCache['psi2_gpu']
beta_gpu = self.gpuCache['beta_gpu']
YT_gpu = self.gpuCache['YT_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
psi2_t_gpu = self.gpuCache['psi2_t_gpu']
if het_noise: psi0_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs, het_noise)
beta_gpu.set(np.asfortranarray(beta))
mul_bcast(betaYT_gpu,beta_gpu,YT_gpu,beta_gpu.size)
YRY_full = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, betaYT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
else:
beta_gpu.fill(beta)
betaYT_gpu.fill(0.)
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
YRY_full = trYYT*beta
if kern.useGPU:
psi1Y_gpu.fill(0.)
psi2_gpu.fill(0.)
psi0_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
ndata = n_end - n_start
X_slice = X[n_start:n_end]
beta_gpu_slice = beta_gpu[n_start:n_end]
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
if ndata==self.batchsize:
psi2_t_gpu_slice = psi2_t_gpu
else:
psi2_t_gpu_slice = psi2_t_gpu[:num_inducing*num_inducing*ndata]
if uncertain_inputs:
psi0p_gpu = kern.psi0(Z, X_slice)
psi1p_gpu = kern.psi1(Z, X_slice)
psi2p_gpu = kern.psi2(Z, X_slice)
else:
psi0p_gpu = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
if het_noise:
psi0_full += cublas.cublasDdot(self.cublas_handle, psi0p_gpu.size, beta_gpu_slice.gpudata, 1, psi0p_gpu.gpudata, 1)
else:
psi0_full += gpuarray.sum(psi0p_gpu).get()
if uncertain_inputs:
if het_noise:
mul_bcast(psi2_t_gpu_slice,beta_gpu_slice,psi2p_gpu,beta_gpu_slice.size)
sum_axis(psi2_gpu,psi2_t_gpu_slice,1,ndata)
else:
sum_axis(psi2_gpu,psi2p_gpu,1,ndata)
else:
if het_noise:
psi1_t_gpu = psi2_t_gpu_slice[:,num_inducing*ndata]
mul_bcast(psi1_t_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, 1.0, psi1p_gpu.gpudata, ndata, psi1_t_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
else:
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
if not het_noise:
psi0_full *= beta
if uncertain_inputs:
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
else:
psi2_full = np.zeros((num_inducing,num_inducing),order='F')
psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD
psi0_full = 0
# YRY_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
if het_noise:
beta_slice = beta[n_start:n_end]
psi0_full += (beta_slice*psi0).sum()
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
# YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else:
psi0_full += psi0.sum()
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
if uncertain_inputs:
if het_noise:
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
else:
psi2_full += psi2.sum(axis=0)
else:
if het_noise:
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
else:
psi2_full += tdot(psi1.T)
if not het_noise:
psi0_full *= beta
psi1Y_full *= beta
psi2_full *= beta
# YRY_full = trYYT*beta
psi1Y_gpu.set(psi1Y_full)
psi2_gpu.set(psi2_full)
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components
@ -373,6 +344,16 @@ class VarDTC_GPU(LatentFunctionInference):
post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get()) post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
#======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
if not het_noise:
dL_dthetaL = (YRY_full + output_dim*psi0_full - num_data*output_dim)/-2.
dL_dthetaL += cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2_gpu.gpudata,1)
dL_dthetaL += cublas.cublasDdot(self.cublas_handle,v_gpu.size, v_gpu.gpudata,1,psi1Y_gpu.gpudata,1)
self.midRes['dL_dthetaL'] = -beta*dL_dthetaL
return logL, dL_dKmm_gpu.get(), post return logL, dL_dKmm_gpu.get(), post
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
@ -404,26 +385,26 @@ class VarDTC_GPU(LatentFunctionInference):
nSlice = n_end-n_start nSlice = n_end-n_start
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if het_noise:
beta = beta[n_start] # nSlice==1
if kern.useGPU: if kern.useGPU:
if uncertain_inputs: if not uncertain_inputs:
psi0p_gpu = kern.psi0(Z, X_slice)
psi1p_gpu = kern.psi1(Z, X_slice)
psi2p_gpu = kern.psi2(Z, X_slice)
else:
psi0p_gpu = kern.Kdiag(X_slice) psi0p_gpu = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z) psi1p_gpu = kern.K(X_slice, Z)
psi2p_gpu = self.gpuCache['psi2p_gpu'] psi2p_gpu = self.gpuCache['psi2p_gpu']
if psi2p_gpu.shape[0] > nSlice: elif het_noise:
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing) psi0p_gpu = kern.psi0(Z, X_slice)
else: psi1p_gpu = kern.psi1(Z, X_slice)
if uncertain_inputs: psi2p_gpu = kern.psi2(Z, X_slice)
elif not uncertain_inputs or het_noise:
if not uncertain_inputs:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
elif het_noise:
psi0 = kern.psi0(Z, X_slice) psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice) psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice) psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi0p_gpu = self.gpuCache['psi0p_gpu'] psi0p_gpu = self.gpuCache['psi0p_gpu']
psi1p_gpu = self.gpuCache['psi1p_gpu'] psi1p_gpu = self.gpuCache['psi1p_gpu']
@ -431,91 +412,46 @@ class VarDTC_GPU(LatentFunctionInference):
if psi0p_gpu.shape[0] > nSlice: if psi0p_gpu.shape[0] > nSlice:
psi0p_gpu = psi0p_gpu[:nSlice] psi0p_gpu = psi0p_gpu[:nSlice]
psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing) psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
psi0p_gpu.set(np.asfortranarray(psi0)) psi0p_gpu.set(np.asfortranarray(psi0))
psi1p_gpu.set(np.asfortranarray(psi1)) psi1p_gpu.set(np.asfortranarray(psi1))
if uncertain_inputs: if uncertain_inputs:
psi2p_gpu.set(np.asfortranarray(psi2)) psi2p_gpu.set(np.asfortranarray(psi2))
#======================================================================
# Prepare gpu memory
#======================================================================
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
v_gpu = self.gpuCache['v_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
beta_gpu = self.gpuCache['beta_gpu']
dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dthetaL_gpu = self.gpuCache['dL_dthetaL_gpu']
psi2R_gpu = self.gpuCache['psi2_t_gpu'][:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
betapsi1_gpu = self.gpuCache['betapsi1_gpu']
thetaL_t_gpu = self.gpuCache['thetaL_t_gpu']
betaYT2_gpu = self.gpuCache['betaYT2_gpu']
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
beta_gpu_slice = beta_gpu[n_start:n_end]
# Adjust to the batch size
if dL_dpsi0_gpu.shape[0] > nSlice:
betaYT2_gpu = betaYT2_gpu[:,:nSlice]
dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
dL_dpsi2_gpu = dL_dpsi2_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
dL_dthetaL_gpu = dL_dthetaL_gpu.ravel()[:nSlice]
psi2R_gpu = psi2R_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
thetaL_t_gpu = thetaL_t_gpu.ravel()[:nSlice]
betapsi1_gpu = betapsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
mul_bcast(betapsi1_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
#====================================================================== #======================================================================
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
v_gpu = self.gpuCache['v_gpu']
dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
dL_dpsi0_gpu.fill(0.) # Adjust to the batch size
cublas.cublasDaxpy(self.cublas_handle, dL_dpsi0_gpu.size, output_dim/(-2.), beta_gpu_slice.gpudata, 1, dL_dpsi0_gpu.gpudata, 1) if dL_dpsi0_gpu.shape[0] > nSlice:
dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
dL_dpsi0_gpu.fill(-output_dim *beta/2.)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice) cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
if uncertain_inputs: if uncertain_inputs:
outer_prod(dL_dpsi2_gpu,beta_gpu_slice,dL_dpsi2R_gpu,beta_gpu_slice.size) cublas.cublasDcopy(self.cublas_handle, dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata, 1, dL_dpsi2_gpu.gpudata, 1)
cublas.cublasDscal(self.cublas_handle, dL_dpsi2_gpu.size, beta, dL_dpsi2_gpu.gpudata, 1)
else: else:
cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, 1.0, betapsi1_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice) cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, beta, psi1p_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice)
#====================================================================== #======================================================================
# Compute dL_dthetaL # Compute dL_dthetaL
#====================================================================== #======================================================================
if het_noise:
if not uncertain_inputs: betaY = betaYT_gpu_slice.get()
join_prod(psi2p_gpu,psi1p_gpu,psi1p_gpu,nSlice,num_inducing) dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0p_gpu.get())-output_dim*beta)/2.
dL_dthetaL += -beta*beta*cublas.cublasDdot(self.cublas_handle,dL_dpsi2R_gpu.size, dL_dpsi2R_gpu.gpudata,1,psi2p_gpu.gpudata,1)
mul_bcast_first(psi2R_gpu,dL_dpsi2R_gpu,psi2p_gpu,nSlice) dL_dthetaL += -beta*(betaY*np.dot(psi1p_gpu.get(),v_gpu.get())).sum(axis=-1)
dL_dthetaL_gpu.fill(0.)
cublas.cublasDcopy(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata, 1, betaYT2_gpu.gpudata, 1)
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT2_gpu,betaYT2_gpu.size)
cublas.cublasDscal(self.cublas_handle, betaYT2_gpu.size, 0.5, betaYT2_gpu.gpudata, 1)
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/(-2.0), beta_gpu_slice.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
cublas.cublasDcopy(self.cublas_handle, beta_gpu_slice.size, beta_gpu_slice.gpudata, 1, thetaL_t_gpu.gpudata, 1)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu.size)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,psi0p_gpu,thetaL_t_gpu.size)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/2.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
thetaL_t_gpu.fill(0.)
sum_axis(thetaL_t_gpu, psi2R_gpu, nSlice, num_inducing*num_inducing)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, -1.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', output_dim, nSlice, num_inducing, -1.0, v_gpu.gpudata, num_inducing, betapsi1_gpu.gpudata, nSlice, 0.0, betaYT2_gpu.gpudata, output_dim)
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT_gpu_slice,betaYT2_gpu.size)
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
if kern.useGPU: if kern.useGPU:
dL_dpsi0 = dL_dpsi0_gpu dL_dpsi0 = dL_dpsi0_gpu
@ -528,10 +464,11 @@ class VarDTC_GPU(LatentFunctionInference):
dL_dpsi2 = dL_dpsi2_gpu dL_dpsi2 = dL_dpsi2_gpu
else: else:
dL_dpsi2 = dL_dpsi2_gpu.get() dL_dpsi2 = dL_dpsi2_gpu.get()
if het_noise: if not het_noise:
dL_dthetaL = dL_dthetaL_gpu.get() if isEnd:
else: dL_dthetaL = self.midRes['dL_dthetaL']
dL_dthetaL = gpuarray.sum(dL_dthetaL_gpu).get() else:
dL_dthetaL = 0.
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dpsi0':dL_dpsi0, grad_dict = {'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1, 'dL_dpsi1':dL_dpsi1,
@ -541,6 +478,6 @@ class VarDTC_GPU(LatentFunctionInference):
grad_dict = {'dL_dKdiag':dL_dpsi0, grad_dict = {'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1, 'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict return isEnd, (n_start,n_end), grad_dict

View file

@ -10,6 +10,11 @@ from ...util.misc import param_to_array
from . import LatentFunctionInference from . import LatentFunctionInference
log_2_pi = np.log(2*np.pi) log_2_pi = np.log(2*np.pi)
try:
from mpi4py import MPI
except:
pass
class VarDTC_minibatch(LatentFunctionInference): class VarDTC_minibatch(LatentFunctionInference):
""" """
An object for inference when the likelihood is Gaussian, but we want to do sparse inference. An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
@ -21,22 +26,39 @@ class VarDTC_minibatch(LatentFunctionInference):
""" """
const_jitter = 1e-6 const_jitter = 1e-6
def __init__(self, batchsize, limit=1): def __init__(self, batchsize=None, limit=1, mpi_comm=None):
self.batchsize = batchsize self.batchsize = batchsize
self.mpi_comm = mpi_comm
self.limit = limit
# Cache functions # Cache functions
from ...util.caching import Cacher from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, limit) self.get_trYYT = Cacher(self._get_trYYT, limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, limit) self.get_YYTfactor = Cacher(self._get_YYTfactor, limit)
self.midRes = {} self.midRes = {}
self.batch_pos = 0 # the starting position of the current mini-batch self.batch_pos = 0 # the starting position of the current mini-batch
self.Y_speedup = False # Replace Y with the cholesky factor of YY.T, but the posterior inference will be wrong
def __getstate__(self):
# has to be overridden, as Cacher objects cannot be pickled.
return self.batchsize, self.limit, self.Y_speedup
def __setstate__(self, state):
# has to be overridden, as Cacher objects cannot be pickled.
self.batchsize, self.limit, self.Y_speedup = state
self.mpi_comm = None
self.midRes = {}
self.batch_pos = 0
from ...util.caching import Cacher
self.get_trYYT = Cacher(self._get_trYYT, self.limit)
self.get_YYTfactor = Cacher(self._get_YYTfactor, self.limit)
def set_limit(self, limit): def set_limit(self, limit):
self.get_trYYT.limit = limit self.get_trYYT.limit = limit
self.get_YYTfactor.limit = limit self.get_YYTfactor.limit = limit
def _get_trYYT(self, Y): def _get_trYYT(self, Y):
return param_to_array(np.sum(np.square(Y))) return param_to_array(np.sum(np.square(Y)))
@ -51,88 +73,103 @@ class VarDTC_minibatch(LatentFunctionInference):
return param_to_array(Y) return param_to_array(Y)
else: else:
return jitchol(tdot(Y)) return jitchol(tdot(Y))
def gatherPsiStat(self, kern, X, Z, Y, beta, uncertain_inputs):
het_noise = beta.size > 1
trYYT = self.get_trYYT(Y)
if self.Y_speedup and not het_noise:
Y = self.get_YYTfactor(Y)
num_inducing = Z.shape[0]
num_data, output_dim = Y.shape
if self.batchsize == None:
self.batchsize = num_data
psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0.
YRY_full = 0.
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
if (n_end-n_start)==num_data:
Y_slice = Y
X_slice = X
else:
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if het_noise:
b = beta[n_start]
YRY_full += np.inner(Y_slice, Y_slice)*b
else:
b = beta
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2_full += kern.psi2(Z, X_slice)*b
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi2_full += np.dot(psi1.T,psi1)*b
psi0_full += psi0.sum()*b
psi1Y_full += np.dot(Y_slice.T,psi1)*b # DxM
if not het_noise:
YRY_full = trYYT*beta
if self.mpi_comm != None:
psi0_all = np.array(psi0_full)
psi1Y_all = psi1Y_full.copy()
psi2_all = psi2_full.copy()
YRY_all = np.array(YRY_full)
self.mpi_comm.Allreduce([psi0_full, MPI.DOUBLE], [psi0_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi1Y_full, MPI.DOUBLE], [psi1Y_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([psi2_full, MPI.DOUBLE], [psi2_all, MPI.DOUBLE])
self.mpi_comm.Allreduce([YRY_full, MPI.DOUBLE], [YRY_all, MPI.DOUBLE])
return psi0_all, psi1Y_all, psi2_all, YRY_all
return psi0_full, psi1Y_full, psi2_full, YRY_full
def inference_likelihood(self, kern, X, Z, likelihood, Y): def inference_likelihood(self, kern, X, Z, likelihood, Y):
""" """
The first phase of inference: The first phase of inference:
Compute: log-likelihood, dL_dKmm Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv, Cached intermediate results: Kmm, KmmInv,
""" """
num_inducing = Z.shape[0] num_data, output_dim = Y.shape
num_data, output_dim = Y.shape if self.mpi_comm != None:
num_data_all = np.array(num_data,dtype=np.int32)
self.mpi_comm.Allreduce([np.int32(num_data), MPI.INT], [num_data_all, MPI.INT])
num_data = num_data_all
if isinstance(X, VariationalPosterior): if isinstance(X, VariationalPosterior):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
if het_noise: if het_noise:
self.batchsize = 1 self.batchsize = 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y
trYYT = self.get_trYYT(Y)
psi2_full = np.zeros((num_inducing,num_inducing))
psi1Y_full = np.zeros((output_dim,num_inducing)) # DxM
psi0_full = 0
YRY_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end]
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi2 = None
if het_noise:
beta_slice = beta[n_start:n_end]
psi0_full += (beta_slice*psi0).sum()
psi1Y_full += np.dot(beta_slice*Y_slice.T,psi1) # DxM
YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else:
psi0_full += psi0.sum()
psi1Y_full += np.dot(Y_slice.T,psi1) # DxM
if uncertain_inputs:
if het_noise:
psi2_full += beta_slice*psi2
else:
psi2_full += psi2.sum(0)
else:
if het_noise:
psi2_full += beta_slice*np.outer(psi1,psi1)
else:
psi2_full += np.einsum('nm,jk->mk',psi1,psi1)
if not het_noise:
psi0_full *= beta
psi1Y_full *= beta
psi2_full *= beta
YRY_full = trYYT*beta
psi0_full, psi1Y_full, psi2_full, YRY_full = self.gatherPsiStat(kern, X, Z, Y, beta, uncertain_inputs)
#====================================================================== #======================================================================
# Compute Common Components # Compute Common Components
#====================================================================== #======================================================================
self.psi1Y = psi1Y_full
Kmm = kern.K(Z).copy() Kmm = kern.K(Z).copy()
diag.add(Kmm, self.const_jitter) diag.add(Kmm, self.const_jitter)
Lm = jitchol(Kmm) Lm = jitchol(Kmm)
Lambda = Kmm+psi2_full Lambda = Kmm+psi2_full
LL = jitchol(Lambda) LL = jitchol(Lambda)
b,_ = dtrtrs(LL, psi1Y_full.T) b,_ = dtrtrs(LL, psi1Y_full.T)
@ -140,18 +177,18 @@ class VarDTC_minibatch(LatentFunctionInference):
v,_ = dtrtrs(LL.T,b,lower=False) v,_ = dtrtrs(LL.T,b,lower=False)
vvt = np.einsum('md,od->mo',v,v) vvt = np.einsum('md,od->mo',v,v)
LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0] LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0] KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T
dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2 dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
# Cache intermediate results # Cache intermediate results
self.midRes['dL_dpsi2R'] = dL_dpsi2R self.midRes['dL_dpsi2R'] = dL_dpsi2R
self.midRes['v'] = v self.midRes['v'] = v
#====================================================================== #======================================================================
# Compute log-likelihood # Compute log-likelihood
#====================================================================== #======================================================================
@ -159,33 +196,36 @@ class VarDTC_minibatch(LatentFunctionInference):
logL_R = -np.log(beta).sum() logL_R = -np.log(beta).sum()
else: else:
logL_R = -num_data*np.log(beta) logL_R = -num_data*np.log(beta)
logL = ( logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
-(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.
-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
)
#====================================================================== #======================================================================
# Compute dL_dKmm # Compute dL_dKmm
#====================================================================== #======================================================================
dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2. dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
#====================================================================== #======================================================================
# Compute the Posterior distribution of inducing points p(u|Y) # Compute the Posterior distribution of inducing points p(u|Y)
#====================================================================== #======================================================================
# phi_u_mean = np.dot(Kmm,v) if not self.Y_speedup or het_noise:
# LLInvKmm,_ = dtrtrs(LL,Kmm) post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm)
# # phi_u_var = np.einsum('ma,mb->ab',LLInvKmm,LLInvKmm) else:
# phi_u_var = Kmm - np.dot(LLInvKmm.T,LLInvKmm) post = None
post = Posterior(woodbury_inv=KmmInvPsi2P, woodbury_vector=v, K=Kmm, mean=None, cov=None, K_chol=Lm) #======================================================================
# Compute dL_dthetaL for uncertian input and non-heter noise
#======================================================================
if not het_noise:
dL_dthetaL = (YRY_full*beta + beta*output_dim*psi0_full - num_data*output_dim*beta)/2. - beta*(dL_dpsi2R*psi2_full).sum() - beta*(v.T*psi1Y_full).sum()
self.midRes['dL_dthetaL'] = dL_dthetaL
return logL, dL_dKmm, post return logL, dL_dKmm, post
def inference_minibatch(self, kern, X, Z, likelihood, Y): def inference_minibatch(self, kern, X, Z, likelihood, Y):
""" """
The second phase of inference: Computing the derivatives over a minibatch of Y The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd) return a flag showing whether it reached the end of Y (isEnd)
""" """
@ -196,14 +236,17 @@ class VarDTC_minibatch(LatentFunctionInference):
uncertain_inputs = True uncertain_inputs = True
else: else:
uncertain_inputs = False uncertain_inputs = False
#see whether we've got a different noise variance for each datum #see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6) beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1 het_noise = beta.size > 1
# VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency! # VVT_factor is a matrix such that tdot(VVT_factor) = VVT...this is for efficiency!
#self.YYTfactor = beta*self.get_YYTfactor(Y) #self.YYTfactor = beta*self.get_YYTfactor(Y)
YYT_factor = Y if self.Y_speedup and not het_noise:
YYT_factor = self.get_YYTfactor(Y)
else:
YYT_factor = Y
n_start = self.batch_pos n_start = self.batch_pos
n_end = min(self.batchsize+n_start, num_data) n_end = min(self.batchsize+n_start, num_data)
if n_end==num_data: if n_end==num_data:
@ -212,66 +255,64 @@ class VarDTC_minibatch(LatentFunctionInference):
else: else:
isEnd = False isEnd = False
self.batch_pos = n_end self.batch_pos = n_end
num_slice = n_end-n_start
Y_slice = YYT_factor[n_start:n_end] Y_slice = YYT_factor[n_start:n_end]
X_slice = X[n_start:n_end] X_slice = X[n_start:n_end]
if uncertain_inputs: if not uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice) psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z) psi1 = kern.K(X_slice, Z)
psi2 = None psi2 = None
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
elif het_noise:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
if het_noise: if het_noise:
beta = beta[n_start] # assuming batchsize==1 beta = beta[n_start] # assuming batchsize==1
betaY = beta*Y_slice betaY = beta*Y_slice
betapsi1 = np.einsum('n,nm->nm',beta,psi1)
#====================================================================== #======================================================================
# Load Intermediate Results # Load Intermediate Results
#====================================================================== #======================================================================
dL_dpsi2R = self.midRes['dL_dpsi2R'] dL_dpsi2R = self.midRes['dL_dpsi2R']
v = self.midRes['v'] v = self.midRes['v']
#====================================================================== #======================================================================
# Compute dL_dpsi # Compute dL_dpsi
#====================================================================== #======================================================================
dL_dpsi0 = -0.5 * output_dim * (beta * np.ones((n_end-n_start,))) dL_dpsi0 = -output_dim * (beta * np.ones((n_end-n_start,)))/2.
dL_dpsi1 = np.dot(betaY,v.T) dL_dpsi1 = np.dot(betaY,v.T)
if uncertain_inputs: if uncertain_inputs:
dL_dpsi2 = beta* dL_dpsi2R dL_dpsi2 = beta* dL_dpsi2R
else: else:
dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2. dL_dpsi1 += np.dot(betapsi1,dL_dpsi2R)*2.
dL_dpsi2 = None dL_dpsi2 = None
#====================================================================== #======================================================================
# Compute dL_dthetaL # Compute dL_dthetaL
#====================================================================== #======================================================================
if het_noise: if het_noise:
if uncertain_inputs: if uncertain_inputs:
psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2) psiR = np.einsum('mo,mo->',dL_dpsi2R,psi2)
else: else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R)
dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1) dL_dthetaL = ((np.square(betaY)).sum(axis=-1) + np.square(beta)*(output_dim*psi0)-output_dim*beta)/2. - np.square(beta)*psiR- (betaY*np.dot(betapsi1,v)).sum(axis=-1)
else: else:
if uncertain_inputs: if isEnd:
psiR = np.einsum('mo,nmo->',dL_dpsi2R,psi2) dL_dthetaL = self.midRes['dL_dthetaL']
else: else:
psiR = np.einsum('nm,no,mo->',psi1,psi1,dL_dpsi2R) dL_dthetaL = 0.
dL_dthetaL = ((np.square(betaY)).sum() + beta*beta*output_dim*(psi0.sum())-num_slice*output_dim*beta)/2. - beta*beta*psiR- (betaY*np.dot(betapsi1,v)).sum()
if uncertain_inputs: if uncertain_inputs:
grad_dict = {'dL_dpsi0':dL_dpsi0, grad_dict = {'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1, 'dL_dpsi1':dL_dpsi1,
@ -281,71 +322,92 @@ class VarDTC_minibatch(LatentFunctionInference):
grad_dict = {'dL_dKdiag':dL_dpsi0, grad_dict = {'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1, 'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL} 'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict return isEnd, (n_start,n_end), grad_dict
def update_gradients(model): def update_gradients(model, mpi_comm=None):
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) if mpi_comm == None:
Y = model.Y
X = model.X
else:
Y = model.Y_local
X = model.X[model.N_range[0]:model.N_range[1]]
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, X, model.Z, model.likelihood, Y)
het_noise = model.likelihood.variance.size > 1 het_noise = model.likelihood.variance.size > 1
if het_noise: if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],)) dL_dthetaL = np.empty((model.Y.shape[0],))
else: else:
dL_dthetaL = 0 dL_dthetaL = np.float64(0.)
#gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
kern_grad = model.kern.gradient.copy() kern_grad = model.kern.gradient.copy()
kern_grad[:] = 0.
#gradients w.r.t. Z model.Z.gradient = 0.
model.Z.gradient = model.kern.gradients_X(dL_dKmm, model.Z)
isEnd = False isEnd = False
while not isEnd: while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, X, model.Z, model.likelihood, Y)
if isinstance(model.X, VariationalPosterior): if isinstance(model.X, VariationalPosterior):
X_slice = model.X[n_range[0]:n_range[1]] if (n_range[1]-n_range[0])==X.shape[0]:
X_slice = X
dL_dpsi1 = grad_dict['dL_dpsi1']#[None, :] elif mpi_comm ==None:
dL_dpsi2 = grad_dict['dL_dpsi2'][None, :, :] X_slice = model.X[n_range[0]:n_range[1]]
else:
X_slice = model.X[model.N_range[0]+n_range[0]:model.N_range[0]+n_range[1]]
#gradients w.r.t. kernel #gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=X_slice,Z=model.Z,dL_dpsi0=grad_dict['dL_dpsi0'],dL_dpsi1=dL_dpsi1,dL_dpsi2=dL_dpsi2) model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
kern_grad += model.kern.gradient kern_grad += model.kern.gradient
#gradients w.r.t. Z #gradients w.r.t. Z
model.Z.gradient += model.kern.gradients_Z_expectations( model.Z.gradient += model.kern.gradients_Z_expectations(
dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2,
Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X #gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations( X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
variational_posterior=X_slice, model.set_X_gradients(X_slice, X_grad)
Z=model.Z,
dL_dpsi0=grad_dict['dL_dpsi0'],
dL_dpsi1=dL_dpsi1,
dL_dpsi2=dL_dpsi2)
model.X.mean[n_range[0]:n_range[1]].gradient = X_grad[0]
model.X.variance[n_range[0]:n_range[1]].gradient = X_grad[1]
if het_noise: if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else: else:
dL_dthetaL += grad_dict['dL_dthetaL'] dL_dthetaL += grad_dict['dL_dthetaL']
#import ipdb;ipdb.set_trace()
model.grad_dict = grad_dict # Gather the gradients from multiple MPI nodes
if isinstance(model.X, VariationalPosterior): if mpi_comm != None:
# Update Log-likelihood if het_noise:
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X) assert False, "Not implemented!"
# update for the KL divergence kern_grad_all = kern_grad.copy()
model.variational_prior.update_gradients_KL(model.X) Z_grad_all = model.Z.gradient.copy()
mpi_comm.Allreduce([kern_grad, MPI.DOUBLE], [kern_grad_all, MPI.DOUBLE])
mpi_comm.Allreduce([model.Z.gradient, MPI.DOUBLE], [Z_grad_all, MPI.DOUBLE])
kern_grad = kern_grad_all
model.Z.gradient = Z_grad_all
#gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
model.kern.gradient += kern_grad
# Set the gradients w.r.t. kernel #gradients w.r.t. Z
model.kern.gradient = kern_grad model.Z.gradient += model.kern.gradients_X(dL_dKmm, model.Z)
# Update Log-likelihood
KL_div = model.variational_prior.KL_divergence(X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(X)
if mpi_comm != None:
KL_div_all = np.array(KL_div)
mpi_comm.Allreduce([np.float64(KL_div), MPI.DOUBLE], [KL_div_all, MPI.DOUBLE])
KL_div = KL_div_all
[mpi_comm.Allgatherv([pp.copy(), MPI.DOUBLE], [pa, (model.N_list*pa.shape[-1], None), MPI.DOUBLE]) for pp,pa in zip(model.get_X_gradients(X),model.get_X_gradients(model.X))]
from ...models import SSGPLVM
if isinstance(model, SSGPLVM):
grad_pi = np.array(model.variational_prior.pi.gradient)
mpi_comm.Allreduce([grad_pi.copy(), MPI.DOUBLE], [model.variational_prior.pi.gradient, MPI.DOUBLE])
model._log_marginal_likelihood -= KL_div
# dL_dthetaL # dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL) model.likelihood.update_gradients(dL_dthetaL)

View file

@ -8,12 +8,13 @@ from _src.mlp import MLP
from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52 from _src.periodic import PeriodicExponential, PeriodicMatern32, PeriodicMatern52
from _src.independent_outputs import IndependentOutputs, Hierarchical from _src.independent_outputs import IndependentOutputs, Hierarchical
from _src.coregionalize import Coregionalize from _src.coregionalize import Coregionalize
from _src.ssrbf import SSRBF # TODO: ZD: did you remove this?
from _src.ODE_UY import ODE_UY from _src.ODE_UY import ODE_UY
from _src.ODE_UYC import ODE_UYC from _src.ODE_UYC import ODE_UYC
from _src.ODE_st import ODE_st from _src.ODE_st import ODE_st
from _src.ODE_t import ODE_t from _src.ODE_t import ODE_t
from _src.poly import Poly from _src.poly import Poly
from _src.trunclinear import TruncLinear,TruncLinear_inf
from _src.splitKern import SplitKern,DiffGenomeKern from _src.splitKern import SplitKern,DiffGenomeKern
# TODO: put this in an init file somewhere # TODO: put this in an init file somewhere

View file

@ -63,13 +63,16 @@ class Add(CombinationKernel):
target = np.zeros(X.shape) target = np.zeros(X.shape)
[target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts] [target.__iadd__(p.gradients_X_diag(dL_dKdiag, X)) for p in self.parts]
return target return target
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts)) return reduce(np.add, (p.psi0(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts)) return reduce(np.add, (p.psi1(Z, variational_posterior) for p in self.parts))
@Cache_this(limit=2, force_kwargs=['which_parts'])
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts)) psi2 = reduce(np.add, (p.psi2(Z, variational_posterior) for p in self.parts))
#return psi2 #return psi2
@ -88,17 +91,18 @@ class Add(CombinationKernel):
# rbf X bias # rbf X bias
#elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)): #elif isinstance(p1, (Bias, Fixed)) and isinstance(p2, (RBF, RBFInv)):
elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)): elif isinstance(p1, Bias) and isinstance(p2, (RBF, Linear)):
tmp = p2.psi1(Z, variational_posterior) tmp = p2.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p1.variance * (tmp[:, :, None] + tmp[:, None, :]) psi2 += p1.variance * (tmp[:,None]+tmp[None,:]) #(tmp[:, :, None] + tmp[:, None, :])
#elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)): #elif isinstance(p2, (Bias, Fixed)) and isinstance(p1, (RBF, RBFInv)):
elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)): elif isinstance(p2, Bias) and isinstance(p1, (RBF, Linear)):
tmp = p1.psi1(Z, variational_posterior) tmp = p1.psi1(Z, variational_posterior).sum(axis=0)
psi2 += p2.variance * (tmp[:, :, None] + tmp[:, None, :]) psi2 += p2.variance * (tmp[:,None]+tmp[None,:]) #(tmp[:, :, None] + tmp[:, None, :])
elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)): elif isinstance(p2, (RBF, Linear)) and isinstance(p1, (RBF, Linear)):
assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far" assert np.intersect1d(p1.active_dims, p2.active_dims).size == 0, "only non overlapping kernel dimensions allowed so far"
tmp1 = p1.psi1(Z, variational_posterior) tmp1 = p1.psi1(Z, variational_posterior)
tmp2 = p2.psi1(Z, variational_posterior) tmp2 = p2.psi1(Z, variational_posterior)
psi2 += (tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :]) psi2 += np.einsum('nm,no->mo',tmp1,tmp2)+np.einsum('nm,no->mo',tmp2,tmp1)
#(tmp1[:, :, None] * tmp2[:, None, :]) + (tmp2[:, :, None] * tmp1[:, None, :])
else: else:
raise NotImplementedError, "psi2 cannot be computed for this kernel" raise NotImplementedError, "psi2 cannot be computed for this kernel"
return psi2 return psi2

View file

@ -3,16 +3,13 @@
import numpy as np import numpy as np
from scipy import weave
from kern import Kern from kern import Kern
from ...util.linalg import tdot from ...util.linalg import tdot
from ...util.misc import param_to_array
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this from ...util.caching import Cache_this
from ...core.parameterization import variational
from psi_comp import linear_psi_comp
from ...util.config import * from ...util.config import *
from .psi_comp import PSICOMP_Linear
class Linear(Kern): class Linear(Kern):
""" """
@ -53,7 +50,8 @@ class Linear(Kern):
self.variances = Param('variances', variances, Logexp()) self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances) self.add_parameter(self.variances)
self.psicomp = PSICOMP_Linear()
@Cache_this(limit=2) @Cache_this(limit=2)
def K(self, X, X2=None): def K(self, X, X2=None):
if self.ARD: if self.ARD:
@ -95,243 +93,43 @@ class Linear(Kern):
def gradients_X(self, dL_dK, X, X2=None): def gradients_X(self, dL_dK, X, X2=None):
if X2 is None: if X2 is None:
return 2.*(((X[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) return np.einsum('mq,nm->nq',X*self.variances,dL_dK)+np.einsum('nq,nm->mq',X*self.variances,dL_dK)
else: else:
return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1) return (((X2[None,:, :] * self.variances)) * dL_dK[:, :, None]).sum(1)
def gradients_X_diag(self, dL_dKdiag, X): def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*X return 2.*self.variances*dL_dKdiag[:,None]*X
def input_sensitivity(self):
return np.ones(self.input_dim) * self.variances
#---------------------------------------# #---------------------------------------#
# PSI statistics # # PSI statistics #
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[0]
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean
S = variational_posterior.variance
return np.einsum('q,nq,nq->n',self.variances,gamma,np.square(mu)+S)
# return (self.variances*gamma*(np.square(mu)+S)).sum(axis=1)
else:
return np.sum(self.variances * self._mu2S(variational_posterior), 1)
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[1]
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean
return np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
# return (self.variances*gamma*mu).sum(axis=1)
else:
return self.K(variational_posterior.mean, Z) #the variance, it does nothing
@Cache_this(limit=1) @Cache_this(limit=1)
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psicomputations(self.variances, Z, variational_posterior)[2]
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean
S = variational_posterior.variance
mu2 = np.square(mu)
variances2 = np.square(self.variances)
tmp = np.einsum('nq,q,mq,nq->nm',gamma,self.variances,Z,mu)
return np.einsum('nq,q,mq,oq,nq->nmo',gamma,variances2,Z,Z,mu2+S)+\
np.einsum('nm,no->nmo',tmp,tmp) - np.einsum('nq,q,mq,oq,nq->nmo',np.square(gamma),variances2,Z,Z,mu2)
else:
ZA = Z * self.variances
ZAinner = self._ZAinner(variational_posterior, Z)
return np.dot(ZAinner, ZA.T)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): dL_dvar = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[0]
gamma = variational_posterior.binary_prob if self.ARD:
mu = variational_posterior.mean self.variances.gradient = dL_dvar
S = variational_posterior.variance
mu2S = np.square(mu)+S
_dpsi2_dvariance, _, _, _, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu) +\
np.einsum('nmo,nmoq->q',dL_dpsi2,_dpsi2_dvariance)
if self.ARD:
self.variances.gradient = grad
else:
self.variances.gradient = grad.sum()
else: else:
#psi1 self.variances.gradient = dL_dvar.sum()
self.update_gradients_full(dL_dpsi1, variational_posterior.mean, Z)
# psi0:
tmp = dL_dpsi0[:, None] * self._mu2S(variational_posterior)
if self.ARD: self.variances.gradient += tmp.sum(0)
else: self.variances.gradient += tmp.sum()
#psi2
if self.ARD:
tmp = dL_dpsi2[:, :, :, None] * (self._ZAinner(variational_posterior, Z)[:, :, None, :] * Z[None, None, :, :])
self.variances.gradient += 2.*tmp.sum(0).sum(0).sum(0)
else:
self.variances.gradient += 2.*np.sum(dL_dpsi2 * self.psi2(Z, variational_posterior))/self.variances
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[1]
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean
S = variational_posterior.variance
_, _, _, _, _dpsi2_dZ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad = np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, self.variances,mu) +\
np.einsum('nmo,noq->mq',dL_dpsi2,_dpsi2_dZ)
return grad
else:
#psi1
grad = self.gradients_X(dL_dpsi1.T, Z, variational_posterior.mean)
#psi2
self._weave_dpsi2_dZ(dL_dpsi2, Z, variational_posterior, grad)
return grad
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variances, Z, variational_posterior)[2:]
gamma = variational_posterior.binary_prob
mu = variational_posterior.mean
S = variational_posterior.variance
mu2S = np.square(mu)+S
_, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _ = linear_psi_comp._psi2computations(self.variances, Z, mu, S, gamma)
grad_gamma = np.einsum('n,q,nq->nq',dL_dpsi0,self.variances,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,self.variances,Z,mu) +\
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dgamma)
grad_mu = np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*self.variances,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,self.variances,Z) +\
np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dmu)
grad_S = np.einsum('n,nq,q->nq',dL_dpsi0,gamma,self.variances) + np.einsum('nmo,nmoq->nq',dL_dpsi2,_dpsi2_dS)
return grad_mu, grad_S, grad_gamma
else:
grad_mu, grad_S = np.zeros(variational_posterior.mean.shape), np.zeros(variational_posterior.mean.shape)
# psi0
grad_mu += dL_dpsi0[:, None] * (2.0 * variational_posterior.mean * self.variances)
grad_S += dL_dpsi0[:, None] * self.variances
# psi1
grad_mu += (dL_dpsi1[:, :, None] * (Z * self.variances)).sum(1)
# psi2
self._weave_dpsi2_dmuS(dL_dpsi2, Z, variational_posterior, grad_mu, grad_S)
return grad_mu, grad_S
#--------------------------------------------------#
# Helpers for psi statistics #
#--------------------------------------------------#
def _weave_dpsi2_dmuS(self, dL_dpsi2, Z, vp, target_mu, target_S):
# Think N,num_inducing,num_inducing,input_dim
ZA = Z * self.variances
AZZA = ZA.T[:, None, :, None] * ZA[None, :, None, :]
AZZA = AZZA + AZZA.swapaxes(1, 2)
AZZA_2 = AZZA/2.
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(m,mm,q,qq,factor,tmp)'
header_string = '#include <omp.h>'
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
else:
pragma_string = ''
header_string = ''
weave_options = {'extra_compile_args': ['-O3']}
#Using weave, we can exploit the symmetry of this problem:
code = """
int n, m, mm,q,qq;
double factor,tmp;
%s
for(n=0;n<N;n++){
for(m=0;m<num_inducing;m++){
for(mm=0;mm<=m;mm++){
//add in a factor of 2 for the off-diagonal terms (and then count them only once)
if(m==mm)
factor = dL_dpsi2(n,m,mm);
else
factor = 2.0*dL_dpsi2(n,m,mm);
for(q=0;q<input_dim;q++){
//take the dot product of mu[n,:] and AZZA[:,m,mm,q] TODO: blas!
tmp = 0.0;
for(qq=0;qq<input_dim;qq++){
tmp += mu(n,qq)*AZZA(qq,m,mm,q);
}
target_mu(n,q) += factor*tmp;
target_S(n,q) += factor*AZZA_2(q,m,mm,q);
}
}
}
}
""" % pragma_string
support_code = """
%s
#include <math.h>
""" % header_string
mu = vp.mean
N,num_inducing,input_dim,mu = mu.shape[0],Z.shape[0],mu.shape[1],param_to_array(mu)
weave.inline(code, support_code=support_code,
arg_names=['N','num_inducing','input_dim','mu','AZZA','AZZA_2','target_mu','target_S','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
def _weave_dpsi2_dZ(self, dL_dpsi2, Z, vp, target):
AZA = self.variances*self._ZAinner(vp, Z)
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(n,mm,q)'
header_string = '#include <omp.h>'
weave_options = {'headers' : ['<omp.h>'],
'extra_compile_args': ['-fopenmp -O3'],
'extra_link_args' : ['-lgomp'],
'libraries': ['gomp']}
else:
pragma_string = ''
header_string = ''
weave_options = {'extra_compile_args': ['-O3']}
code="""
int n,m,mm,q;
%s
for(m=0;m<num_inducing;m++){
for(q=0;q<input_dim;q++){
for(mm=0;mm<num_inducing;mm++){
for(n=0;n<N;n++){
target(m,q) += 2*dL_dpsi2(n,m,mm)*AZA(n,mm,q);
}
}
}
}
""" % pragma_string
support_code = """
%s
#include <math.h>
""" % header_string
N,num_inducing,input_dim = vp.mean.shape[0],Z.shape[0],vp.mean.shape[1]
mu = param_to_array(vp.mean)
weave.inline(code, support_code=support_code,
arg_names=['N','num_inducing','input_dim','AZA','target','dL_dpsi2'],
type_converters=weave.converters.blitz,**weave_options)
@Cache_this(limit=1, ignore_args=(0,))
def _mu2S(self, vp):
return np.square(vp.mean) + vp.variance
@Cache_this(limit=1)
def _ZAinner(self, vp, Z):
ZA = Z*self.variances
inner = (vp.mean[:, None, :] * vp.mean[:, :, None])
diag_indices = np.diag_indices(vp.mean.shape[1], 2)
inner[:, diag_indices[0], diag_indices[1]] += vp.variance
return np.dot(ZA, inner).swapaxes(0, 1) # NOTE: self.ZAinner \in [num_inducing x num_data x input_dim]!
def input_sensitivity(self):
return np.ones(self.input_dim) * self.variances
class LinearFull(Kern): class LinearFull(Kern):
def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'): def __init__(self, input_dim, rank, W=None, kappa=None, active_dims=None, name='linear_full'):

View file

@ -1,2 +1,50 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt). # Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
from ....core.parameterization.parameter_core import Pickleable
from GPy.util.caching import Cache_this
from ....core.parameterization import variational
import rbf_psi_comp
import ssrbf_psi_comp
import sslinear_psi_comp
import linear_psi_comp
class PSICOMP_RBF(object):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psicomputations(variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return rbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
class PSICOMP_Linear(object):
@Cache_this(limit=2, ignore_args=(0,))
def psicomputations(self, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psicomputations(variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psicomputations(variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
@Cache_this(limit=2, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
if isinstance(variational_posterior, variational.NormalPosterior):
return linear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
elif isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return sslinear_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior)
else:
raise ValueError, "unknown distriubtion received for psi-statistics"

View file

@ -2,14 +2,47 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
""" """
The package for the Psi statistics computation of the linear kernel for SSGPLVM The package for the Psi statistics computation of the linear kernel for Bayesian GPLVM
""" """
import numpy as np import numpy as np
from GPy.util.caching import Cache_this
#@Cache_this(limit=1) def psicomputations(variance, Z, variational_posterior):
def _psi2computations(variance, Z, mu, S, gamma): """
Compute psi-statistics for ss-linear kernel
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# psi0 N
# psi1 NxM
# psi2 MxM
mu = variational_posterior.mean
S = variational_posterior.variance
psi0 = np.einsum('q,nq->n',variance,np.square(mu)+S)
psi1 = np.einsum('q,mq,nq->nm',variance,Z,mu)
tmp = np.einsum('q,mq,nq->nm',variance,Z,mu)
psi2 = np.einsum('q,mq,oq,nq->mo',np.square(variance),Z,Z,S) + np.einsum('nm,no->mo',tmp,tmp)
return psi0, psi1, psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
dL_dvar, dL_dmu, dL_dS, dL_dZ = _psi2computations(dL_dpsi2, variance, Z, mu, S)
# Compute for psi0 and psi1
mu2S = np.square(mu)+S
dL_dvar += np.einsum('n,nq->q',dL_dpsi0,mu2S) + np.einsum('nm,mq,nq->q',dL_dpsi1,Z,mu)
dL_dmu += np.einsum('n,q,nq->nq',dL_dpsi0,2.*variance,mu) + np.einsum('nm,q,mq->nq',dL_dpsi1,variance,Z)
dL_dS += np.einsum('n,q->nq',dL_dpsi0,variance)
dL_dZ += np.einsum('nm,q,nq->mq',dL_dpsi1, variance,mu)
return dL_dvar, dL_dZ, dL_dmu, dL_dS
def _psi2computations(dL_dpsi2, variance, Z, mu, S):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
@ -18,34 +51,24 @@ def _psi2computations(variance, Z, mu, S, gamma):
""" """
# here are the "statistics" for psi1 and psi2 # here are the "statistics" for psi1 and psi2
# Produced intermediate results: # Produced intermediate results:
# _psi2 NxMxM # _psi2_dvariance Q
# _psi2_dvariance NxMxMxQ # _psi2_dZ MxQ
# _psi2_dZ NxMxQ # _psi2_dmu NxQ
# _psi2_dgamma NxMxMxQ # _psi2_dS NxQ
# _psi2_dmu NxMxMxQ
# _psi2_dS NxMxMxQ
mu2 = np.square(mu)
gamma2 = np.square(gamma)
variance2 = np.square(variance) variance2 = np.square(variance)
mu2S = mu2+S # NxQ common_sum = np.einsum('q,mq,nq->nm',variance,Z,mu) # NxM
common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
_dpsi2_dvariance = np.einsum('nq,q,mq,oq->nmoq',2.*(gamma*mu2S-gamma2*mu2),variance,Z,Z)+\
np.einsum('nq,mq,nq,no->nmoq',gamma,Z,mu,common_sum)+\
np.einsum('nq,oq,nq,nm->nmoq',gamma,Z,mu,common_sum)
_dpsi2_dgamma = np.einsum('q,mq,oq,nq->nmoq',variance2,Z,Z,(mu2S-2.*gamma*mu2))+\
np.einsum('q,mq,nq,no->nmoq',variance,Z,mu,common_sum)+\
np.einsum('q,oq,nq,nm->nmoq',variance,Z,mu,common_sum)
_dpsi2_dmu = np.einsum('q,mq,oq,nq,nq->nmoq',variance2,Z,Z,mu,2.*(gamma-gamma2))+\
np.einsum('nq,q,mq,no->nmoq',gamma,variance,Z,common_sum)+\
np.einsum('nq,q,oq,nm->nmoq',gamma,variance,Z,common_sum)
_dpsi2_dS = np.einsum('nq,q,mq,oq->nmoq',gamma,variance2,Z,Z)
_dpsi2_dZ = 2.*(np.einsum('nq,q,mq,nq->nmq',gamma,variance2,Z,mu2S)+np.einsum('nq,q,nq,nm->nmq',gamma,variance,mu,common_sum)
-np.einsum('nq,q,mq,nq->nmq',gamma2,variance2,Z,mu2))
return _dpsi2_dvariance, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _dpsi2_dZ dL_dvar = np.einsum('mo,nq,q,mq,oq->q',dL_dpsi2,2.*S,variance,Z,Z)+\
np.einsum('mo,mq,nq,no->q',dL_dpsi2,Z,mu,common_sum)+\
np.einsum('mo,oq,nq,nm->q',dL_dpsi2,Z,mu,common_sum)
dL_dmu = np.einsum('mo,q,mq,no->nq',dL_dpsi2,variance,Z,common_sum)+\
np.einsum('mo,q,oq,nm->nq',dL_dpsi2,variance,Z,common_sum)
dL_dS = np.empty(S.shape)
dL_dS[:] = np.einsum('mo,q,mq,oq->q',dL_dpsi2,variance2,Z,Z)
dL_dZ = 2.*(np.einsum('om,q,mq,nq->oq',dL_dpsi2,variance2,Z,S)+np.einsum('om,q,nq,nm->oq',dL_dpsi2,variance,mu,common_sum))
return dL_dvar, dL_dmu, dL_dS, dL_dZ

View file

@ -0,0 +1,162 @@
"""
The module for psi-statistics for RBF kernel
"""
import numpy as np
from GPy.util.caching import Cacher
def psicomputations(variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
psi0 = np.empty(mu.shape[0])
psi0[:] = variance
psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
psi2 = _psi2computations(variance, lengthscale, Z, mu, S).sum(axis=0)
return psi0, psi1, psi2
def __psi1computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results:
# _psi1 NxM
lengthscale2 = np.square(lengthscale)
# psi1
_psi1_logdenom = np.log(S/lengthscale2+1.).sum(axis=-1) # N
_psi1_log = (_psi1_logdenom[:,None]+np.einsum('nmq,nq->nm',np.square(mu[:,None,:]-Z[None,:,:]),1./(S+lengthscale2)))/(-2.)
_psi1 = variance*np.exp(_psi1_log)
return _psi1
def __psi2computations(variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi2
# Produced intermediate results:
# _psi2 MxM
lengthscale2 = np.square(lengthscale)
_psi2_logdenom = np.log(2.*S/lengthscale2+1.).sum(axis=-1)/(-2.) # N
_psi2_exp1 = (np.square(Z[:,None,:]-Z[None,:,:])/lengthscale2).sum(axis=-1)/(-4.) #MxM
Z_hat = (Z[:,None,:]+Z[None,:,:])/2. #MxMxQ
denom = 1./(2.*S+lengthscale2)
_psi2_exp2 = -(np.square(mu)*denom).sum(axis=-1)[:,None,None]+2.*np.einsum('nq,moq,nq->nmo',mu,Z_hat,denom)-np.einsum('moq,nq->nmo',np.square(Z_hat),denom)
_psi2 = variance*variance*np.exp(_psi2_logdenom[:,None,None]+_psi2_exp1[None,:,:]+_psi2_exp2)
return _psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
dvar_psi1, dl_psi1, dZ_psi1, dmu_psi1, dS_psi1 = _psi1compDer(dL_dpsi1, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dvar_psi2, dl_psi2, dZ_psi2, dmu_psi2, dS_psi2 = _psi2compDer(dL_dpsi2, variance, lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
dL_dvar = np.sum(dL_dpsi0) + dvar_psi1 + dvar_psi2
dL_dlengscale = dl_psi1 + dl_psi2
if not ARD:
dL_dlengscale = dL_dlengscale.sum()
dL_dmu = dmu_psi1 + dmu_psi2
dL_dS = dS_psi1 + dS_psi2
dL_dZ = dZ_psi1 + dZ_psi2
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS
def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S):
"""
dL_dpsi1 - NxM
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1
# Produced intermediate results: dL_dparams w.r.t. psi1
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
_psi1 = _psi1computations(variance, lengthscale, Z, mu, S)
Lpsi1 = dL_dpsi1*_psi1
Zmu = Z[None,:,:]-mu[:,None,:] # NxMxQ
denom = 1./(S+lengthscale2)
Zmu2_denom = np.square(Zmu)*denom[:,None,:] #NxMxQ
_dL_dvar = Lpsi1.sum()/variance
_dL_dmu = np.einsum('nm,nmq,nq->nq',Lpsi1,Zmu,denom)
_dL_dS = np.einsum('nm,nmq,nq->nq',Lpsi1,(Zmu2_denom-1.),denom)/2.
_dL_dZ = -np.einsum('nm,nmq,nq->mq',Lpsi1,Zmu,denom)
_dL_dl = np.einsum('nm,nmq,nq->q',Lpsi1,(Zmu2_denom+(S/lengthscale2)[:,None,:]),denom*lengthscale)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
dL_dpsi2 - MxM
"""
# here are the "statistics" for psi2
# Produced the derivatives w.r.t. psi2:
# _dL_dvariance 1
# _dL_dlengthscale Q
# _dL_dZ MxQ
# _dL_dgamma NxQ
# _dL_dmu NxQ
# _dL_dS NxQ
lengthscale2 = np.square(lengthscale)
denom = 1./(2*S+lengthscale2)
denom2 = np.square(denom)
_psi2 = _psi2computations(variance, lengthscale, Z, mu, S) # NxMxM
Lpsi2 = dL_dpsi2[None,:,:]*_psi2
Lpsi2sum = np.einsum('nmo->n',Lpsi2) #N
Lpsi2Z = np.einsum('nmo,oq->nq',Lpsi2,Z) #NxQ
Lpsi2Z2 = np.einsum('nmo,oq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Z2p = np.einsum('nmo,mq,oq->nq',Lpsi2,Z,Z) #NxQ
Lpsi2Zhat = Lpsi2Z
Lpsi2Zhat2 = (Lpsi2Z2+Lpsi2Z2p)/2
_dL_dvar = Lpsi2sum.sum()*2/variance
_dL_dmu = (-2*denom) * (mu*Lpsi2sum[:,None]-Lpsi2Zhat)
_dL_dS = (2*np.square(denom))*(np.square(mu)*Lpsi2sum[:,None]-2*mu*Lpsi2Zhat+Lpsi2Zhat2) - denom*Lpsi2sum[:,None]
_dL_dZ = -np.einsum('nmo,oq->oq',Lpsi2,Z)/lengthscale2+np.einsum('nmo,oq->mq',Lpsi2,Z)/lengthscale2+ \
2*np.einsum('nmo,nq,nq->mq',Lpsi2,mu,denom) - np.einsum('nmo,nq,mq->mq',Lpsi2,denom,Z) - np.einsum('nmo,oq,nq->mq',Lpsi2,Z,denom)
_dL_dl = 2*lengthscale* ((S/lengthscale2*denom+np.square(mu*denom))*Lpsi2sum[:,None]+(Lpsi2Z2-Lpsi2Z2p)/(2*np.square(lengthscale2))-
(2*mu*denom2)*Lpsi2Zhat+denom2*Lpsi2Zhat2).sum(axis=0)
return _dL_dvar, _dL_dl, _dL_dZ, _dL_dmu, _dL_dS
_psi1computations = Cacher(__psi1computations, limit=1)
_psi2computations = Cacher(__psi2computations, limit=1)

View file

@ -0,0 +1,411 @@
"""
The module for psi-statistics for RBF kernel
"""
import numpy as np
from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
from ....util.linalg_gpu import sum_axis
except:
pass
gpu_code = """
// define THREADNUM
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NMM(n,m1,m2) ((m2*M+m1)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_NM(n,m) (m*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define IDX_MM(m1,m2) (m2*M+m1)
#define IDX_NQB(n,q,b) ((b*Q+q)*N+n)
#define IDX_QB(q,b) (b*Q+q)
// Divide data evenly
__device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) {
int residue = (total_data)%psize;
if(pidx<residue) {
int size = total_data/psize+1;
*start = size*pidx;
*end = *start+size;
} else {
int size = total_data/psize;
*start = size*pidx+residue;
*end = *start+size;
}
}
__device__ void reduce_sum(double* array, int array_size) {
int s;
if(array_size >= blockDim.x) {
for(int i=blockDim.x+threadIdx.x; i<array_size; i+= blockDim.x) {
array[threadIdx.x] += array[i];
}
array_size = blockDim.x;
}
__syncthreads();
for(int i=1; i<=array_size;i*=2) {s=i;}
if(threadIdx.x < array_size-s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
for(s=s/2;s>=1;s=s/2) {
if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
}
}
__global__ void compDenom(double *log_denom1, double *log_denom2, double *l, double *S, int N, int Q)
{
int n_start, n_end;
divide_data(N, gridDim.x, blockIdx.x, &n_start, &n_end);
for(int i=n_start*Q+threadIdx.x; i<n_end*Q; i+=blockDim.x) {
int n=i/Q;
int q=i%Q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_denom1[IDX_NQ(n,q)] = log(Snq/lq+1.);
log_denom2[IDX_NQ(n,q)] = log(2.*Snq/lq+1.);
}
}
__global__ void psi1computations(double *psi1, double *log_denom1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
for(int m=m_start; m<m_end; m++) {
for(int n=threadIdx.x; n<N; n+= blockDim.x) {
double log_psi1 = 0;
for(int q=0;q<Q;q++) {
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_psi1 += (muZ*muZ/(Snq+lq)+log_denom1[IDX_NQ(n,q)])/(-2.);
}
psi1[IDX_NM(n,m)] = var*exp(log_psi1);
}
}
}
__global__ void psi2computations(double *psi2, double *psi2n, double *log_denom2, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int psi2_idx_start, psi2_idx_end;
__shared__ double psi2_local[THREADNUM];
divide_data((M+1)*M/2, gridDim.x, blockIdx.x, &psi2_idx_start, &psi2_idx_end);
for(int psi2_idx=psi2_idx_start; psi2_idx<psi2_idx_end; psi2_idx++) {
int m1 = int((sqrt(8.*psi2_idx+1.)-1.)/2.);
int m2 = psi2_idx - (m1+1)*m1/2;
psi2_local[threadIdx.x] = 0;
for(int n=threadIdx.x;n<N;n+=blockDim.x) {
double log_psi2_n = 0;
for(int q=0;q<Q;q++) {
double dZ = Z[IDX_MQ(m1,q)] - Z[IDX_MQ(m2,q)];
double muZhat = mu[IDX_NQ(n,q)]- (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
log_psi2_n += dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) + log_denom2[IDX_NQ(n,q)]/(-2.);
}
double exp_psi2_n = exp(log_psi2_n);
psi2n[IDX_NMM(n,m1,m2)] = var*var*exp_psi2_n;
if(m1!=m2) { psi2n[IDX_NMM(n,m2,m1)] = var*var*exp_psi2_n;}
psi2_local[threadIdx.x] += exp_psi2_n;
}
__syncthreads();
reduce_sum(psi2_local, THREADNUM);
if(threadIdx.x==0) {
psi2[IDX_MM(m1,m2)] = var*var*psi2_local[0];
if(m1!=m2) { psi2[IDX_MM(m2,m1)] = var*var*psi2_local[0]; }
}
__syncthreads();
}
}
__global__ void psi1compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dL_dpsi1, double *psi1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double Snq,mu_nq;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)];}
for(int m=m_start; m<m_end; m++) {
if(n<N) {
double lpsi1 = psi1[IDX_NM(n,m)]*dL_dpsi1[IDX_NM(n,m)];
if(q==0) {dvar_local += lpsi1;}
double Zmu = Z[IDX_MQ(m,q)] - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
dmu_local += lpsi1*Zmu/denom;
dS_local += lpsi1*(Zmu2_denom-1.)/denom;
dl_local += lpsi1*(Zmu2_denom+Snq/lq)/denom;
g_local[threadIdx.x] = -lpsi1*Zmu/denom;
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local/2.;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]/var;}
}
__global__ void psi2compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dL_dpsi2, double *psi2n, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double Snq,mu_nq;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)];}
for(int m1=m_start; m1<m_end; m1++) {
g_local[threadIdx.x] = 0;
for(int m2=0;m2<M;m2++) {
if(n<N) {
double lpsi2 = psi2n[IDX_NMM(n,m1,m2)]*dL_dpsi2[IDX_MM(m1,m2)];
if(q==0) {dvar_local += lpsi2;}
double dZ = Z[IDX_MQ(m1,q)] - Z[IDX_MQ(m2,q)];
double muZhat = mu_nq - (Z[IDX_MQ(m1,q)] + Z[IDX_MQ(m2,q)])/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
dmu_local += lpsi2*muZhat/denom;
dS_local += lpsi2*(2.*muZhat2_denom-1.)/denom;
dl_local += lpsi2*((Snq/lq+muZhat2_denom)/denom+dZ*dZ/(4.*lq*lq));
g_local[threadIdx.x] += 2.*lpsi2*(muZhat/denom-dZ/(2*lq));
}
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m1,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += -2.*dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]*2/var;}
}
"""
class PSICOMP_RBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
self.GPU_direct = GPU_direct
self.gpuCache = None
self.threadnum = threadnum
self.blocknum = blocknum
module = SourceModule("#define THREADNUM "+str(self.threadnum)+"\n"+gpu_code)
self.g_psi1computations = module.get_function('psi1computations')
self.g_psi1computations.prepare('PPdPPPPiii')
self.g_psi2computations = module.get_function('psi2computations')
self.g_psi2computations.prepare('PPPdPPPPiii')
self.g_psi1compDer = module.get_function('psi1compDer')
self.g_psi1compDer.prepare('PPPPPPPdPPPPiii')
self.g_psi2compDer = module.get_function('psi2compDer')
self.g_psi2compDer.prepare('PPPPPPPdPPPPiii')
self.g_compDenom = module.get_function('compDenom')
self.g_compDenom.prepare('PPPPii')
def __deepcopy__(self, memo):
s = PSICOMP_RBF_GPU(threadnum=self.threadnum, blocknum=self.blocknum, GPU_direct=self.GPU_direct)
memo[id(self)] = s
return s
def _initGPUCache(self, N, M, Q):
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'log_denom1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log_denom2_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
# derivatives
'dvar_gpu' :gpuarray.empty((self.blocknum,),np.float64, order='F'),
'dl_gpu' :gpuarray.empty((Q,self.blocknum),np.float64, order='F'),
'dZ_gpu' :gpuarray.empty((M,Q),np.float64, order='F'),
'dmu_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dS_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
# grad
'grad_l_gpu' :gpuarray.empty((Q,),np.float64, order='F'),
'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
}
else:
assert N==self.gpuCache['mu_gpu'].shape[0]
assert M==self.gpuCache['Z_gpu'].shape[0]
assert Q==self.gpuCache['l_gpu'].shape[0]
def sync_params(self, lengthscale, Z, mu, S):
if len(lengthscale)==1:
self.gpuCache['l_gpu'].fill(lengthscale)
else:
self.gpuCache['l_gpu'].set(np.asfortranarray(lengthscale))
self.gpuCache['Z_gpu'].set(np.asfortranarray(Z))
self.gpuCache['mu_gpu'].set(np.asfortranarray(mu))
self.gpuCache['S_gpu'].set(np.asfortranarray(S))
N,Q = self.gpuCache['S_gpu'].shape
# t=self.g_compDenom(self.gpuCache['log_denom1_gpu'],self.gpuCache['log_denom2_gpu'],self.gpuCache['l_gpu'],self.gpuCache['S_gpu'], np.int32(N), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_compDenom '+str(t)
self.g_compDenom.prepared_call((self.blocknum,1),(self.threadnum,1,1), self.gpuCache['log_denom1_gpu'].gpudata,self.gpuCache['log_denom2_gpu'].gpudata,self.gpuCache['l_gpu'].gpudata,self.gpuCache['S_gpu'].gpudata, np.int32(N), np.int32(Q))
def reset_derivative(self):
self.gpuCache['dvar_gpu'].fill(0.)
self.gpuCache['dl_gpu'].fill(0.)
self.gpuCache['dZ_gpu'].fill(0.)
self.gpuCache['dmu_gpu'].fill(0.)
self.gpuCache['dS_gpu'].fill(0.)
self.gpuCache['grad_l_gpu'].fill(0.)
self.gpuCache['grad_mu_gpu'].fill(0.)
self.gpuCache['grad_S_gpu'].fill(0.)
def get_dimensions(self, Z, variational_posterior):
return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
@Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, variational_posterior):
"""
Z - MxQ
mu - NxQ
S - NxQ
"""
N,M,Q = self.get_dimensions(Z, variational_posterior)
self._initGPUCache(N,M,Q)
self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
psi0 = np.empty((N,))
psi0[:] = variance
self.g_psi1computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi1_gpu.gpudata, log_denom1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi2_gpu.gpudata, psi2n_gpu.gpudata, log_denom2_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
# t = self.g_psi1computations(psi1_gpu, log_denom1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1computations '+str(t)
# t = self.g_psi2computations(psi2_gpu, psi2n_gpu, log_denom2_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2computations '+str(t)
if self.GPU_direct:
return psi0, psi1_gpu, psi2_gpu
else:
return psi0, psi1_gpu.get(), psi2_gpu.get()
@Cache_this(limit=1, ignore_args=(0,1,2,3))
def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
ARD = (len(lengthscale)!=1)
N,M,Q = self.get_dimensions(Z, variational_posterior)
psi1_gpu = self.gpuCache['psi1_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
dvar_gpu = self.gpuCache['dvar_gpu']
dl_gpu = self.gpuCache['dl_gpu']
dZ_gpu = self.gpuCache['dZ_gpu']
dmu_gpu = self.gpuCache['dmu_gpu']
dS_gpu = self.gpuCache['dS_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu']
if self.GPU_direct:
dL_dpsi1_gpu = dL_dpsi1
dL_dpsi2_gpu = dL_dpsi2
dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
else:
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
self.reset_derivative()
# t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi1compDer '+str(t)
# t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
# print 'g_psi2compDer '+str(t)
self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
dL_dS = grad_S_gpu.get()
dL_dZ = dZ_gpu.get()
if ARD:
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
dL_dlengscale = grad_l_gpu.get()
else:
dL_dlengscale = gpuarray.sum(dl_gpu).get()
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS

View file

@ -0,0 +1,88 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the Psi statistics computation of the linear kernel for SSGPLVM
"""
import numpy as np
def psicomputations(variance, Z, variational_posterior):
"""
Compute psi-statistics for ss-linear kernel
"""
# here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results:
# psi0 N
# psi1 NxM
# psi2 MxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = np.einsum('q,nq,nq->n',variance,gamma,np.square(mu)+S)
psi1 = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu)
mu2 = np.square(mu)
variances2 = np.square(variance)
tmp = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu)
psi2 = np.einsum('nq,q,mq,oq,nq->mo',gamma,variances2,Z,Z,mu2+S)+\
np.einsum('nm,no->mo',tmp,tmp) - np.einsum('nq,q,mq,oq,nq->mo',np.square(gamma),variances2,Z,Z,mu2)
return psi0, psi1, psi2
def psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ = _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma)
# Compute for psi0 and psi1
mu2S = np.square(mu)+S
dL_dvar += np.einsum('n,nq,nq->q',dL_dpsi0,gamma,mu2S) + np.einsum('nm,nq,mq,nq->q',dL_dpsi1,gamma,Z,mu)
dL_dgamma += np.einsum('n,q,nq->nq',dL_dpsi0,variance,mu2S) + np.einsum('nm,q,mq,nq->nq',dL_dpsi1,variance,Z,mu)
dL_dmu += np.einsum('n,nq,q,nq->nq',dL_dpsi0,gamma,2.*variance,mu) + np.einsum('nm,nq,q,mq->nq',dL_dpsi1,gamma,variance,Z)
dL_dS += np.einsum('n,nq,q->nq',dL_dpsi0,gamma,variance)
dL_dZ += np.einsum('nm,nq,q,nq->mq',dL_dpsi1,gamma, variance,mu)
return dL_dvar, dL_dZ, dL_dmu, dL_dS, dL_dgamma
def _psi2computations(dL_dpsi2, variance, Z, mu, S, gamma):
"""
Z - MxQ
mu - NxQ
S - NxQ
gamma - NxQ
"""
# here are the "statistics" for psi1 and psi2
# Produced intermediate results:
# _psi2_dvariance Q
# _psi2_dZ MxQ
# _psi2_dgamma NxQ
# _psi2_dmu NxQ
# _psi2_dS NxQ
mu2 = np.square(mu)
gamma2 = np.square(gamma)
variance2 = np.square(variance)
mu2S = mu2+S # NxQ
common_sum = np.einsum('nq,q,mq,nq->nm',gamma,variance,Z,mu) # NxM
dL_dvar = np.einsum('mo,nq,q,mq,oq->q',dL_dpsi2,2.*(gamma*mu2S-gamma2*mu2),variance,Z,Z)+\
np.einsum('mo,nq,mq,nq,no->q',dL_dpsi2,gamma,Z,mu,common_sum)+\
np.einsum('mo,nq,oq,nq,nm->q',dL_dpsi2,gamma,Z,mu,common_sum)
dL_dgamma = np.einsum('mo,q,mq,oq,nq->nq',dL_dpsi2,variance2,Z,Z,(mu2S-2.*gamma*mu2))+\
np.einsum('mo,q,mq,nq,no->nq',dL_dpsi2,variance,Z,mu,common_sum)+\
np.einsum('mo,q,oq,nq,nm->nq',dL_dpsi2,variance,Z,mu,common_sum)
dL_dmu = np.einsum('mo,q,mq,oq,nq,nq->nq',dL_dpsi2,variance2,Z,Z,mu,2.*(gamma-gamma2))+\
np.einsum('mo,nq,q,mq,no->nq',dL_dpsi2,gamma,variance,Z,common_sum)+\
np.einsum('mo,nq,q,oq,nm->nq',dL_dpsi2,gamma,variance,Z,common_sum)
dL_dS = np.einsum('mo,nq,q,mq,oq->nq',dL_dpsi2,gamma,variance2,Z,Z)
dL_dZ = 2.*(np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma,variance2,Z,mu2S)+np.einsum('om,nq,q,nq,nm->oq',dL_dpsi2,gamma,variance,mu,common_sum)
-np.einsum('om,nq,q,mq,nq->oq',dL_dpsi2,gamma2,variance2,Z,mu2))
return dL_dvar, dL_dgamma, dL_dmu, dL_dS, dL_dZ

View file

@ -6,10 +6,8 @@ The package for the psi statistics computation
""" """
import numpy as np import numpy as np
from GPy.util.caching import Cache_this
@Cache_this(limit=1) def psicomputations(variance, lengthscale, Z, variational_posterior):
def psicomputations(variance, lengthscale, Z, mu, S, gamma):
""" """
Z - MxQ Z - MxQ
mu - NxQ mu - NxQ
@ -19,6 +17,9 @@ def psicomputations(variance, lengthscale, Z, mu, S, gamma):
# here are the "statistics" for psi0, psi1 and psi2 # here are the "statistics" for psi0, psi1 and psi2
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
psi0 = np.empty(mu.shape[0]) psi0 = np.empty(mu.shape[0])
psi0[:] = variance psi0[:] = variance
@ -37,7 +38,6 @@ def _psi1computations(variance, lengthscale, Z, mu, S, gamma):
# Produced intermediate results: # Produced intermediate results:
# _psi1 NxM # _psi1 NxM
lengthscale2 = np.square(lengthscale) lengthscale2 = np.square(lengthscale)
# psi1 # psi1
@ -147,11 +147,6 @@ def _psi1compDer(dL_dpsi1, variance, lengthscale, Z, mu, S, gamma):
_dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ _dL_dS = np.einsum('nm,nmq,nmq,nq,nmq->nq',dL_dpsi1,_psi1_q,_psi1_exp_dist_sq,_psi1_common,(_psi1_dist_sq-1.))/2. # NxQ
_dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z)) _dL_dZ = np.einsum('nm,nmq,nmq->mq',dL_dpsi1,_psi1_q, (- _psi1_common[:,None,:] * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z))
_dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z)) _dL_dlengthscale = lengthscale* np.einsum('nm,nmq,nmq->q',dL_dpsi1,_psi1_q,(_psi1_common[:,None,:]*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + (1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z))
# _dpsi1_dmu = _psi1_q * (_psi1_exp_dist_sq * _psi1_dist * _psi1_common) # NxMxQ
# _dpsi1_dS = _psi1_q * (_psi1_exp_dist_sq * _psi1_common * 0.5 * (_psi1_dist_sq - 1.)) # NxMxQ
# _dpsi1_dZ = _psi1_q * (- _psi1_common * _psi1_dist * _psi1_exp_dist_sq - (1-gamma[:,None,:])/lengthscale2*Z[None,:,:]*_psi1_exp_Z) # NxMxQ
# _dpsi1_dlengthscale = 2.*lengthscale*_psi1_q * (0.5*_psi1_common*(S[:,None,:]/lengthscale2+_psi1_dist_sq)*_psi1_exp_dist_sq + 0.5*(1-gamma[:,None,:])*np.square(Z[None,:,:]/lengthscale2)*_psi1_exp_Z) # NxMxQ
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma
@ -199,15 +194,6 @@ def _psi2compDer(dL_dpsi2, variance, lengthscale, Z, mu, S, gamma):
_dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq) _dL_dmu = -2.*np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q,_psi2_common,_psi2_mudist,_psi2_exp_dist_sq)
_dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq) _dL_dS = np.einsum('mo,nmoq,nq,nmoq,nmoq->nq',dL_dpsi2,_psi2_q, _psi2_common, (2.*_psi2_mudist_sq-1.), _psi2_exp_dist_sq)
_dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z)) _dL_dZ = 2.*np.einsum('mo,nmoq,nmoq->mq',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(-_psi2_Zdist*_psi2_denom[:,None,None,:]+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z))
# print _psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq #+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)
_dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z)) _dL_dlengthscale = 2.*lengthscale* np.einsum('mo,nmoq,nmoq->q',dL_dpsi2,_psi2_q,(_psi2_common[:,None,None,:]*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom[:,None,None,:]+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z))
# _dpsi2_dvariance = 2. * _psi2/variance # NxMxM
# _dpsi2_dgamma = _psi2_q * (_psi2_exp_dist_sq/_psi2_denom_sqrt - _psi2_exp_Z) # NxMxMxQ
# _dpsi2_dmu = _psi2_q * (-2.*_psi2_common*_psi2_mudist * _psi2_exp_dist_sq) # NxMxMxQ
# _dpsi2_dS = _psi2_q * (_psi2_common * (2.*_psi2_mudist_sq - 1.) * _psi2_exp_dist_sq) # NxMxMxQ
# _dpsi2_dZ = 2.*_psi2_q * (_psi2_common*(-_psi2_Zdist*_psi2_denom+_psi2_mudist)*_psi2_exp_dist_sq - (1-gamma[:,None,None,:])*Z[:,None,:]/lengthscale2*_psi2_exp_Z) # NxMxMxQ
# _dpsi2_dlengthscale = 2.*lengthscale* _psi2_q * (_psi2_common*(S[:,None,None,:]/lengthscale2+_psi2_Zdist_sq*_psi2_denom+_psi2_mudist_sq)*_psi2_exp_dist_sq+(1-gamma[:,None,None,:])*_psi2_Z_sq_sum*0.5/lengthscale2*_psi2_exp_Z) # NxMxMxQ
return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma return _dL_dvariance, _dL_dlengthscale, _dL_dZ, _dL_dmu, _dL_dS, _dL_dgamma

View file

@ -1,535 +1,474 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
""" """
The package for the psi statistics computation on GPU The module for psi-statistics for RBF kernel for Spike-and-Slab GPLVM
""" """
import numpy as np import numpy as np
from GPy.util.caching import Cache_this from ....util.caching import Cache_this
from . import PSICOMP_RBF
from ....util import gpu_init from ....util import gpu_init
try: try:
import pycuda.gpuarray as gpuarray import pycuda.gpuarray as gpuarray
from scikits.cuda import cublas from pycuda.compiler import SourceModule
from pycuda.reduction import ReductionKernel from ....util.linalg_gpu import sum_axis
from pycuda.elementwise import ElementwiseKernel
from ....util import linalg_gpu
# The kernel form computing psi1 het_noise
comp_psi1 = ElementwiseKernel(
"double *psi1, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
"psi1[i] = comp_psi1_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
"comp_psi1",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_psi1_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
{
int n = idx%N;
int m = idx/N;
double psi1_exp=0;
for(int q=0;q<Q;q++){
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)] + muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
double exp2 = log1Gamma[IDX_NQ(n,q)] - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
psi1_exp += LOGEXPSUM(exp1,exp2);
}
return var*exp(psi1_exp);
}
""")
# The kernel form computing psi2 het_noise
comp_psi2 = ElementwiseKernel(
"double *psi2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
"psi2[i] = comp_psi2_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
"comp_psi2",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_psi2_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
{
// psi2 (n,m1,m2)
int m2 = idx/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double psi2_exp=0;
for(int q=0;q<Q;q++){
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
double exp2 = log1Gamma[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
psi2_exp += LOGEXPSUM(exp1,exp2);
}
return var*var*exp(psi2_exp);
}
""")
# compute psidenom
comp_logpsidenom = ElementwiseKernel(
"double *out, double *S, double *l, double scale, int N",
"out[i] = comp_logpsidenom_element(S, l, scale, N, i)",
"comp_logpsidenom",
preamble="""
__device__ double comp_logpsidenom_element(double *S, double *l, double scale, int N, int idx)
{
int q = idx/N;
return log(scale*S[idx]/l[q]+1.0);
}
""")
# The kernel form computing psi1 het_noise
comp_dpsi1_dvar = ElementwiseKernel(
"double *dpsi1_dvar, double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
"dpsi1_dvar[i] = comp_dpsi1_dvar_element(psi1_neq, psi1exp1, psi1exp2, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
"comp_dpsi1_dvar",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_dpsi1_dvar_element(double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
{
int n = idx%N;
int m = idx/N;
double psi1_sum = 0;
for(int q=0;q<Q;q++){
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double exp1_e = -(muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)])/2.0 + exp1_e;
double exp2_e = - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
double psi1_q = LOGEXPSUM(exp1,exp2);
psi1_neq[IDX_NMQ(n,m,q)] = -psi1_q;
psi1exp1[IDX_NMQ(n,m,q)] = exp(exp1_e);
psi1exp2[IDX_MQ(m,q)] = exp(exp2_e);
psi1_sum += psi1_q;
}
for(int q=0;q<Q;q++) {
psi1_neq[IDX_NMQ(n,m,q)] = exp(psi1_neq[IDX_NMQ(n,m,q)]+psi1_sum);
}
return exp(psi1_sum);
}
""")
# The kernel form computing psi1 het_noise
comp_psi1_der = ElementwiseKernel(
"double *dpsi1_dl, double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
"dpsi1_dl[i] = comp_psi1_der_element(dpsi1_dmu, dpsi1_dS, dpsi1_dgamma, dpsi1_dZ, psi1_neq, psi1exp1, psi1exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
"comp_psi1_der",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
__device__ double comp_psi1_der_element(double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
{
int q = idx/(M*N);
int m = (idx%(M*N))/N;
int n = idx%N;
double neq = psi1_neq[IDX_NMQ(n,m,q)];
double gamma_c = gamma[IDX_NQ(n,q)];
double Z_c = Z[IDX_MQ(m,q)];
double S_c = S[IDX_NQ(n,q)];
double l_c = l[q];
double l_sqrt_c = sqrt(l[q]);
double psi1exp1_c = psi1exp1[IDX_NMQ(n,m,q)];
double psi1exp2_c = psi1exp2[IDX_MQ(m,q)];
double denom = S_c/l_c+1.0;
double denom_sqrt = sqrt(denom);
double Zmu = Z_c-mu[IDX_NQ(n,q)];
double psi1_common = gamma_c/(denom_sqrt*denom*l_c);
double gamma1 = 1-gamma_c;
dpsi1_dgamma[IDX_NMQ(n,m,q)] = var*neq*(psi1exp1_c/denom_sqrt - psi1exp2_c);
dpsi1_dmu[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*Zmu*psi1exp1_c);
dpsi1_dS[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*(Zmu*Zmu/(S_c+l_c)-1.0)*psi1exp1_c)/2.0;
dpsi1_dZ[IDX_NMQ(n,m,q)] = var*neq*(-psi1_common*Zmu*psi1exp1_c-gamma1*Z_c/l_c*psi1exp2_c);
return var*neq*(psi1_common*(S_c/l_c+Zmu*Zmu/(S_c+l_c))*psi1exp1_c+gamma1*Z_c*Z_c/l_c*psi1exp2_c)*l_sqrt_c;
}
""")
# The kernel form computing psi1 het_noise
comp_dpsi2_dvar = ElementwiseKernel(
"double *dpsi2_dvar, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
"dpsi2_dvar[i] = comp_dpsi2_dvar_element(psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
"comp_dpsi2_dvar",
preamble="""
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_dpsi2_dvar_element(double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
{
// psi2 (n,m1,m2)
int m2 = idx/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double psi2_sum=0;
for(int q=0;q<Q;q++){
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
double exp1_e = - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 +exp1_e;
double exp2_e = - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
double psi2_q = LOGEXPSUM(exp1,exp2);
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = -psi2_q;
psi2exp1[IDX_NMMQ(n,m1,m2,q)] = exp(exp1_e);
psi2exp2[IDX_MMQ(m1,m2,q)] = exp(exp2_e);
psi2_sum += psi2_q;
}
for(int q=0;q<Q;q++) {
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = exp(psi2_neq[IDX_NMMQ(n,m1,m2,q)]+psi2_sum);
}
return 2*var*exp(psi2_sum);
}
""")
# The kernel form computing psi1 het_noise
comp_psi2_der = ElementwiseKernel(
"double *dpsi2_dl, double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
"dpsi2_dl[i] = comp_psi2_der_element(dpsi2_dmu, dpsi2_dS, dpsi2_dgamma, dpsi2_dZ, psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
"comp_psi2_der",
preamble="""
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
__device__ double comp_psi2_der_element(double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
{
// dpsi2 (n,m1,m2,q)
int q = idx/(M*M*N);
int m2 = (idx%(M*M*N))/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double neq = psi2_neq[IDX_NMMQ(n,m1,m2,q)];
double gamma_c = gamma[IDX_NQ(n,q)];
double Z1_c = Z[IDX_MQ(m1,q)];
double Z2_c = Z[IDX_MQ(m2,q)];
double S_c = S[IDX_NQ(n,q)];
double l_c = l[q];
double l_sqrt_c = sqrt(l[q]);
double psi2exp1_c = psi2exp1[IDX_NMMQ(n,m1,m2,q)];
double psi2exp2_c = psi2exp2[IDX_MMQ(m1,m2,q)];
double dZ = Z2_c - Z1_c;
double muZ = mu[IDX_NQ(n,q)] - (Z1_c+Z2_c)/2.0;
double Z2 = Z1_c*Z1_c+Z2_c*Z2_c;
double denom = 2.0*S_c/l_c+1.0;
double denom_sqrt = sqrt(denom);
double psi2_common = gamma_c/(denom_sqrt*denom*l_c);
double gamma1 = 1-gamma_c;
double var2 = var*var;
dpsi2_dgamma[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2exp1_c/denom_sqrt - psi2exp2_c);
dpsi2_dmu[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(-2.0*psi2_common*muZ*psi2exp1_c);
dpsi2_dS[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(2.0*muZ*muZ/(2.0*S_c+l_c)-1.0)*psi2exp1_c);
dpsi2_dZ[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(dZ*denom/-2.0+muZ)*psi2exp1_c-gamma1*Z2_c/l_c*psi2exp2_c)*2.0;
return var2*neq*(psi2_common*(S_c/l_c+dZ*dZ*denom/(4.0*l_c)+muZ*muZ/(2.0*S_c+l_c))*psi2exp1_c+gamma1*Z2/(2.0*l_c)*psi2exp2_c)*l_sqrt_c*2.0;
}
""")
except: except:
pass pass
class PSICOMP_SSRBF(object): gpu_code = """
def __init__(self): // define THREADNUM
assert gpu_init.initSuccess, "GPU initialization failed!"
self.cublas_handle = gpu_init.cublas_handle #define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
self.gpuCache = None #define IDX_NMM(n,m1,m2) ((m2*M+m1)*N+n)
self.gpuCacheAll = None #define IDX_NQ(n,q) (q*N+n)
#define IDX_NM(n,m) (m*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define IDX_MM(m1,m2) (m2*M+m1)
#define IDX_NQB(n,q,b) ((b*Q+q)*N+n)
#define IDX_QB(q,b) (b*Q+q)
// Divide data evenly
__device__ void divide_data(int total_data, int psize, int pidx, int *start, int *end) {
int residue = (total_data)%psize;
if(pidx<residue) {
int size = total_data/psize+1;
*start = size*pidx;
*end = *start+size;
} else {
int size = total_data/psize;
*start = size*pidx+residue;
*end = *start+size;
}
}
def _initGPUCache(self, N, M, Q): __device__ void reduce_sum(double* array, int array_size) {
if self.gpuCache!=None and self.gpuCache['mu_gpu'].shape[0] == N: int s;
return if(array_size >= blockDim.x) {
for(int i=blockDim.x+threadIdx.x; i<array_size; i+= blockDim.x) {
array[threadIdx.x] += array[i];
}
array_size = blockDim.x;
}
__syncthreads();
for(int i=1; i<=array_size;i*=2) {s=i;}
if(threadIdx.x < array_size-s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
for(s=s/2;s>=1;s=s/2) {
if(threadIdx.x < s) {array[threadIdx.x] += array[s+threadIdx.x];}
__syncthreads();
}
}
__global__ void compDenom(double *log_denom1, double *log_denom2, double *log_gamma, double*log_gamma1, double *gamma, double *l, double *S, int N, int Q)
{
int n_start, n_end;
divide_data(N, gridDim.x, blockIdx.x, &n_start, &n_end);
if self.gpuCacheAll!=None and self.gpuCacheAll['mu_gpu'].shape[0]<N: # Too small cache -> reallocate for(int i=n_start*Q+threadIdx.x; i<n_end*Q; i+=blockDim.x) {
self._releaseMemory() int n=i/Q;
int q=i%Q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double gnq = gamma[IDX_NQ(n,q)];
log_denom1[IDX_NQ(n,q)] = log(Snq/lq+1.);
log_denom2[IDX_NQ(n,q)] = log(2.*Snq/lq+1.);
log_gamma[IDX_NQ(n,q)] = log(gnq);
log_gamma1[IDX_NQ(n,q)] = log(1.-gnq);
}
}
__global__ void psi1computations(double *psi1, double *log_denom1, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int m_start, m_end;
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
for(int m=m_start; m<m_end; m++) {
for(int n=threadIdx.x; n<N; n+= blockDim.x) {
double log_psi1 = 0;
for(int q=0;q<Q;q++) {
double Zmq = Z[IDX_MQ(m,q)];
double muZ = mu[IDX_NQ(n,q)]-Zmq;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double exp1 = log_gamma[IDX_NQ(n,q)]-(muZ*muZ/(Snq+lq)+log_denom1[IDX_NQ(n,q)])/(2.);
double exp2 = log_gamma1[IDX_NQ(n,q)]-Zmq*Zmq/(2.*lq);
log_psi1 += (exp1>exp2)?exp1+log1p(exp(exp2-exp1)):exp2+log1p(exp(exp1-exp2));
}
psi1[IDX_NM(n,m)] = var*exp(log_psi1);
}
}
}
__global__ void psi2computations(double *psi2, double *psi2n, double *log_denom2, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, int N, int M, int Q)
{
int psi2_idx_start, psi2_idx_end;
__shared__ double psi2_local[THREADNUM];
divide_data((M+1)*M/2, gridDim.x, blockIdx.x, &psi2_idx_start, &psi2_idx_end);
for(int psi2_idx=psi2_idx_start; psi2_idx<psi2_idx_end; psi2_idx++) {
int m1 = int((sqrt(8.*psi2_idx+1.)-1.)/2.);
int m2 = psi2_idx - (m1+1)*m1/2;
if self.gpuCacheAll == None: psi2_local[threadIdx.x] = 0;
self.gpuCacheAll = { for(int n=threadIdx.x;n<N;n+=blockDim.x) {
double log_psi2_n = 0;
for(int q=0;q<Q;q++) {
double Zm1q = Z[IDX_MQ(m1,q)];
double Zm2q = Z[IDX_MQ(m2,q)];
double dZ = Zm1q - Zm2q;
double muZhat = mu[IDX_NQ(n,q)]- (Zm1q+Zm2q)/2.;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double Snq = S[IDX_NQ(n,q)];
double lq = l[q]*l[q];
double exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_denom2[IDX_NQ(n,q)]/2. + log_gamma[IDX_NQ(n,q)];
double exp2 = log_gamma1[IDX_NQ(n,q)] - Z2/(2.*lq);
log_psi2_n += (exp1>exp2)?exp1+log1p(exp(exp2-exp1)):exp2+log1p(exp(exp1-exp2));
}
double exp_psi2_n = exp(log_psi2_n);
psi2n[IDX_NMM(n,m1,m2)] = var*var*exp_psi2_n;
if(m1!=m2) { psi2n[IDX_NMM(n,m2,m1)] = var*var*exp_psi2_n;}
psi2_local[threadIdx.x] += exp_psi2_n;
}
__syncthreads();
reduce_sum(psi2_local, THREADNUM);
if(threadIdx.x==0) {
psi2[IDX_MM(m1,m2)] = var*var*psi2_local[0];
if(m1!=m2) { psi2[IDX_MM(m2,m1)] = var*var*psi2_local[0]; }
}
__syncthreads();
}
}
__global__ void psi1compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dgamma, double *dL_dpsi1, double *psi1, double *log_denom1, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double dgamma_local = 0;
double Snq,mu_nq,gnq,log_gnq,log_gnq1,log_de;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)]; gnq = gamma[IDX_NQ(n,q)];
log_gnq = log_gamma[IDX_NQ(n,q)]; log_gnq1 = log_gamma1[IDX_NQ(n,q)];
log_de = log_denom1[IDX_NQ(n,q)];}
for(int m=m_start; m<m_end; m++) {
if(n<N) {
double lpsi1 = psi1[IDX_NM(n,m)]*dL_dpsi1[IDX_NM(n,m)];
if(q==0) {dvar_local += lpsi1;}
double Zmq = Z[IDX_MQ(m,q)];
double Zmu = Zmq - mu_nq;
double denom = Snq+lq;
double Zmu2_denom = Zmu*Zmu/denom;
double exp1 = log_gnq-(Zmu*Zmu/(Snq+lq)+log_de)/(2.);
double exp2 = log_gnq1-Zmq*Zmq/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu_local += lpsi1*Zmu*d_exp1/(denom*exp_sum);
dS_local += lpsi1*(Zmu2_denom-1.)*d_exp1/(denom*exp_sum);
dgamma_local += lpsi1*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl_local += lpsi1*((Zmu2_denom+Snq/lq)/denom*d_exp1+Zmq*Zmq/(lq*lq)*d_exp2)/(2.*exp_sum);
g_local[threadIdx.x] = lpsi1*(-Zmu/denom*d_exp1-Zmq/lq*d_exp2)/exp_sum;
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local/2.;
dgamma[IDX_NQB(n,q,blockIdx.x)] += dgamma_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]/var;}
}
__global__ void psi2compDer(double *dvar, double *dl, double *dZ, double *dmu, double *dS, double *dgamma, double *dL_dpsi2, double *psi2n, double *log_denom2, double *log_gamma, double*log_gamma1, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q)
{
int m_start, m_end;
__shared__ double g_local[THREADNUM];
divide_data(M, gridDim.x, blockIdx.x, &m_start, &m_end);
int P = int(ceil(double(N)/THREADNUM));
double dvar_local = 0;
for(int q=0;q<Q;q++) {
double lq_sqrt = l[q];
double lq = lq_sqrt*lq_sqrt;
double dl_local = 0;
for(int p=0;p<P;p++) {
int n = p*THREADNUM + threadIdx.x;
double dmu_local = 0;
double dS_local = 0;
double dgamma_local = 0;
double Snq,mu_nq,gnq,log_gnq,log_gnq1,log_de;
if(n<N) {Snq = S[IDX_NQ(n,q)]; mu_nq=mu[IDX_NQ(n,q)]; gnq = gamma[IDX_NQ(n,q)];
log_gnq = log_gamma[IDX_NQ(n,q)]; log_gnq1 = log_gamma1[IDX_NQ(n,q)];
log_de = log_denom2[IDX_NQ(n,q)];}
for(int m1=m_start; m1<m_end; m1++) {
g_local[threadIdx.x] = 0;
for(int m2=0;m2<M;m2++) {
if(n<N) {
double lpsi2 = psi2n[IDX_NMM(n,m1,m2)]*dL_dpsi2[IDX_MM(m1,m2)];
if(q==0) {dvar_local += lpsi2;}
double Zm1q = Z[IDX_MQ(m1,q)];
double Zm2q = Z[IDX_MQ(m2,q)];
double dZ = Zm1q - Zm2q;
double Z2 = Zm1q*Zm1q+Zm2q*Zm2q;
double muZhat = mu_nq - (Zm1q + Zm2q)/2.;
double denom = 2.*Snq+lq;
double muZhat2_denom = muZhat*muZhat/denom;
double exp1 = dZ*dZ/(-4.*lq)-muZhat*muZhat/(2.*Snq+lq) - log_de/2. + log_gnq;
double exp2 = log_gnq1 - Z2/(2.*lq);
double d_exp1,d_exp2;
if(exp1>exp2) {
d_exp1 = 1.;
d_exp2 = exp(exp2-exp1);
} else {
d_exp1 = exp(exp1-exp2);
d_exp2 = 1.;
}
double exp_sum = d_exp1+d_exp2;
dmu_local += lpsi2*muZhat/denom*d_exp1/exp_sum;
dS_local += lpsi2*(2.*muZhat2_denom-1.)/denom*d_exp1/exp_sum;
dgamma_local += lpsi2*(d_exp1/gnq-d_exp2/(1.-gnq))/exp_sum;
dl_local += lpsi2*(((Snq/lq+muZhat2_denom)/denom+dZ*dZ/(4.*lq*lq))*d_exp1+Z2/(2.*lq*lq)*d_exp2)/exp_sum;
g_local[threadIdx.x] += 2.*lpsi2*((muZhat/denom-dZ/(2*lq))*d_exp1-Zm1q/lq*d_exp2)/exp_sum;
}
}
__syncthreads();
reduce_sum(g_local, p<P-1?THREADNUM:N-(P-1)*THREADNUM);
if(threadIdx.x==0) {dZ[IDX_MQ(m1,q)] += g_local[0];}
}
if(n<N) {
dmu[IDX_NQB(n,q,blockIdx.x)] += -2.*dmu_local;
dS[IDX_NQB(n,q,blockIdx.x)] += dS_local;
dgamma[IDX_NQB(n,q,blockIdx.x)] += dgamma_local;
}
__threadfence_block();
}
g_local[threadIdx.x] = dl_local*2.*lq_sqrt;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dl[IDX_QB(q,blockIdx.x)] += g_local[0];}
}
g_local[threadIdx.x] = dvar_local;
__syncthreads();
reduce_sum(g_local, THREADNUM);
if(threadIdx.x==0) {dvar[blockIdx.x] += g_local[0]*2/var;}
}
"""
class PSICOMP_SSRBF_GPU(PSICOMP_RBF):
def __init__(self, threadnum=128, blocknum=15, GPU_direct=False):
self.GPU_direct = GPU_direct
self.gpuCache = None
self.threadnum = threadnum
self.blocknum = blocknum
module = SourceModule("#define THREADNUM "+str(self.threadnum)+"\n"+gpu_code)
self.g_psi1computations = module.get_function('psi1computations')
self.g_psi1computations.prepare('PPPPdPPPPiii')
self.g_psi2computations = module.get_function('psi2computations')
self.g_psi2computations.prepare('PPPPPdPPPPiii')
self.g_psi1compDer = module.get_function('psi1compDer')
self.g_psi1compDer.prepare('PPPPPPPPPPPdPPPPPiii')
self.g_psi2compDer = module.get_function('psi2compDer')
self.g_psi2compDer.prepare('PPPPPPPPPPPdPPPPPiii')
self.g_compDenom = module.get_function('compDenom')
self.g_compDenom.prepare('PPPPPPPii')
def __deepcopy__(self, memo):
s = PSICOMP_SSRBF_GPU(threadnum=self.threadnum, blocknum=self.blocknum, GPU_direct=self.GPU_direct)
memo[id(self)] = s
return s
def _initGPUCache(self, N, M, Q):
if self.gpuCache == None:
self.gpuCache = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'), 'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'), 'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), 'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), 'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), 'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'logGamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log1Gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'logpsi1denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'logpsi2denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'), 'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'), 'psi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
# derivatives psi1 'psi2n_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
'psi1_neq_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'dL_dpsi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi1exp1_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'dL_dpsi2_gpu' :gpuarray.empty((M,M),np.float64,order='F'),
'psi1exp2_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'log_denom1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'dpsi1_dvar_gpu' :gpuarray.empty((N,M),np.float64, order='F'), 'log_denom2_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'dpsi1_dl_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'log_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'dpsi1_dZ_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'log_gamma1_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'dpsi1_dgamma_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), # derivatives
'dpsi1_dmu_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'dvar_gpu' :gpuarray.empty((self.blocknum,),np.float64, order='F'),
'dpsi1_dS_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), 'dl_gpu' :gpuarray.empty((Q,self.blocknum),np.float64, order='F'),
# derivatives psi2 'dZ_gpu' :gpuarray.empty((M,Q),np.float64, order='F'),
'psi2_neq_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), 'dmu_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'psi2exp1_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), 'dS_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'psi2exp2_gpu' :gpuarray.empty((M,M,Q),np.float64, order='F'), 'dgamma_gpu' :gpuarray.empty((N,Q,self.blocknum),np.float64, order='F'),
'dpsi2_dvar_gpu' :gpuarray.empty((N,M,M),np.float64, order='F'), # grad
'dpsi2_dl_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), 'grad_l_gpu' :gpuarray.empty((Q,),np.float64, order='F'),
'dpsi2_dZ_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), 'grad_mu_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'dpsi2_dgamma_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), 'grad_S_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'dpsi2_dmu_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), 'grad_gamma_gpu' :gpuarray.empty((N,Q,),np.float64, order='F'),
'dpsi2_dS_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
# gradients
'grad_l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'grad_Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'grad_mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'grad_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
} }
self.gpuCache = self.gpuCacheAll
elif self.gpuCacheAll['mu_gpu'].shape[0]==N:
self.gpuCache = self.gpuCacheAll
else: else:
# remap to a smaller cache assert N==self.gpuCache['mu_gpu'].shape[0]
self.gpuCache = self.gpuCacheAll.copy() assert M==self.gpuCache['Z_gpu'].shape[0]
Nlist=['mu_gpu','S_gpu','gamma_gpu','logGamma_gpu','log1Gamma_gpu','logpsi1denom_gpu','logpsi2denom_gpu','psi0_gpu','psi1_gpu','psi2_gpu', assert Q==self.gpuCache['l_gpu'].shape[0]
'psi1_neq_gpu','psi1exp1_gpu','psi1exp2_gpu','dpsi1_dvar_gpu','dpsi1_dl_gpu','dpsi1_dZ_gpu','dpsi1_dgamma_gpu','dpsi1_dmu_gpu',
'dpsi1_dS_gpu','psi2_neq_gpu','psi2exp1_gpu','dpsi2_dvar_gpu','dpsi2_dl_gpu','dpsi2_dZ_gpu','dpsi2_dgamma_gpu','dpsi2_dmu_gpu','dpsi2_dS_gpu','grad_mu_gpu','grad_S_gpu','grad_gamma_gpu',]
oldN = self.gpuCacheAll['mu_gpu'].shape[0]
for v in Nlist:
u = self.gpuCacheAll[v]
self.gpuCache[v] = u.ravel()[:u.size/oldN*N].reshape(*((N,)+u.shape[1:]))
def _releaseMemory(self): def sync_params(self, lengthscale, Z, mu, S, gamma):
if self.gpuCacheAll!=None: if len(lengthscale)==1:
[v.gpudata.free() for v in self.gpuCacheAll.values()] self.gpuCache['l_gpu'].fill(lengthscale)
self.gpuCacheAll = None else:
self.gpuCache = None self.gpuCache['l_gpu'].set(np.asfortranarray(lengthscale))
self.gpuCache['Z_gpu'].set(np.asfortranarray(Z))
self.gpuCache['mu_gpu'].set(np.asfortranarray(mu))
self.gpuCache['S_gpu'].set(np.asfortranarray(S))
self.gpuCache['gamma_gpu'].set(np.asfortranarray(gamma))
N,Q = self.gpuCache['S_gpu'].shape
self.g_compDenom.prepared_call((self.blocknum,1),(self.threadnum,1,1), self.gpuCache['log_denom1_gpu'].gpudata,self.gpuCache['log_denom2_gpu'].gpudata,self.gpuCache['log_gamma_gpu'].gpudata,self.gpuCache['log_gamma1_gpu'].gpudata,self.gpuCache['gamma_gpu'].gpudata,self.gpuCache['l_gpu'].gpudata,self.gpuCache['S_gpu'].gpudata, np.int32(N), np.int32(Q))
def reset_derivative(self):
self.gpuCache['dvar_gpu'].fill(0.)
self.gpuCache['dl_gpu'].fill(0.)
self.gpuCache['dZ_gpu'].fill(0.)
self.gpuCache['dmu_gpu'].fill(0.)
self.gpuCache['dS_gpu'].fill(0.)
self.gpuCache['dgamma_gpu'].fill(0.)
self.gpuCache['grad_l_gpu'].fill(0.)
self.gpuCache['grad_mu_gpu'].fill(0.)
self.gpuCache['grad_S_gpu'].fill(0.)
self.gpuCache['grad_gamma_gpu'].fill(0.)
def estimateMemoryOccupation(self, N, M, Q): def get_dimensions(self, Z, variational_posterior):
""" return variational_posterior.mean.shape[0], Z.shape[0], Z.shape[1]
Estimate the best batch size.
N - the number of total datapoints
M - the number of inducing points
Q - the number of hidden (input) dimensions
return: the constant memory size, the memory occupation of batchsize=1
unit: GB
"""
return (2.*Q+2.*M*Q+M*M*Q)*8./1024./1024./1024., (1.+2.*M+10.*Q+2.*M*M+8.*M*Q+7.*M*M*Q)*8./1024./1024./1024.
@Cache_this(limit=1,ignore_args=(0,)) @Cache_this(limit=1, ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, mu, S, gamma): def psicomputations(self, variance, lengthscale, Z, variational_posterior):
"""Compute Psi statitsitcs""" """
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1: Z - MxQ
ARD = True mu - NxQ
else: S - NxQ
ARD = False """
N,M,Q = self.get_dimensions(Z, variational_posterior)
N = mu.shape[0]
M = Z.shape[0]
Q = mu.shape[1]
self._initGPUCache(N,M,Q) self._initGPUCache(N,M,Q)
l_gpu = self.gpuCache['l_gpu'] self.sync_params(lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
gamma_gpu = self.gpuCache['gamma_gpu']
logGamma_gpu = self.gpuCache['logGamma_gpu']
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu']
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu']
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu']
psi0_gpu = self.gpuCache['psi0_gpu']
psi1_gpu = self.gpuCache['psi1_gpu'] psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu'] psi2_gpu = self.gpuCache['psi2_gpu']
psi2n_gpu = self.gpuCache['psi2n_gpu']
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
log_gamma_gpu = self.gpuCache['log_gamma_gpu']
log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
if ARD: psi0 = np.empty((N,))
l_gpu.set(np.asfortranarray(lengthscale**2)) psi0[:] = variance
self.g_psi1computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2computations.prepared_call((self.blocknum,1),(self.threadnum,1,1),psi2_gpu.gpudata, psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata, np.int32(N), np.int32(M), np.int32(Q))
if self.GPU_direct:
return psi0, psi1_gpu, psi2_gpu
else: else:
l_gpu.fill(lengthscale*lengthscale) return psi0, psi1_gpu.get(), psi2_gpu.get()
Z_gpu.set(np.asfortranarray(Z))
mu_gpu.set(np.asfortranarray(mu)) @Cache_this(limit=1, ignore_args=(0,1,2,3))
S_gpu.set(np.asfortranarray(S)) def psiDerivativecomputations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
gamma_gpu.set(np.asfortranarray(gamma)) ARD = (len(lengthscale)!=1)
linalg_gpu.log(gamma_gpu,logGamma_gpu)
linalg_gpu.logOne(gamma_gpu,log1Gamma_gpu)
comp_logpsidenom(logpsi1denom_gpu, S_gpu,l_gpu,1.0,N)
comp_logpsidenom(logpsi2denom_gpu, S_gpu,l_gpu,2.0,N)
psi0_gpu.fill(variance) N,M,Q = self.get_dimensions(Z, variational_posterior)
comp_psi1(psi1_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q) psi1_gpu = self.gpuCache['psi1_gpu']
comp_psi2(psi2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q) psi2n_gpu = self.gpuCache['psi2n_gpu']
return psi0_gpu, psi1_gpu, psi2_gpu
@Cache_this(limit=1,ignore_args=(0,))
def _psiDercomputations(self, variance, lengthscale, Z, mu, S, gamma):
"""Compute the derivatives w.r.t. Psi statistics"""
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
self._initGPUCache(N,M,Q)
l_gpu = self.gpuCache['l_gpu'] l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu'] Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu'] mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu'] S_gpu = self.gpuCache['S_gpu']
gamma_gpu = self.gpuCache['gamma_gpu'] gamma_gpu = self.gpuCache['gamma_gpu']
logGamma_gpu = self.gpuCache['logGamma_gpu'] dvar_gpu = self.gpuCache['dvar_gpu']
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu'] dl_gpu = self.gpuCache['dl_gpu']
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu'] dZ_gpu = self.gpuCache['dZ_gpu']
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu'] dmu_gpu = self.gpuCache['dmu_gpu']
dS_gpu = self.gpuCache['dS_gpu']
psi1_neq_gpu = self.gpuCache['psi1_neq_gpu'] dgamma_gpu = self.gpuCache['dgamma_gpu']
psi1exp1_gpu = self.gpuCache['psi1exp1_gpu']
psi1exp2_gpu = self.gpuCache['psi1exp2_gpu']
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
psi2_neq_gpu = self.gpuCache['psi2_neq_gpu']
psi2exp1_gpu = self.gpuCache['psi2exp1_gpu']
psi2exp2_gpu = self.gpuCache['psi2exp2_gpu']
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
#==========================================================================================================
# Assuming the l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, logGamma_gpu, log1Gamma_gpu,
# logpsi1denom_gpu, logpsi2denom_gpu has been synchonized.
#==========================================================================================================
# psi1 derivatives
comp_dpsi1_dvar(dpsi1_dvar_gpu, psi1_neq_gpu, psi1exp1_gpu,psi1exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
comp_psi1_der(dpsi1_dl_gpu,dpsi1_dmu_gpu,dpsi1_dS_gpu,dpsi1_dgamma_gpu, dpsi1_dZ_gpu, psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
# psi2 derivatives
comp_dpsi2_dvar(dpsi2_dvar_gpu, psi2_neq_gpu, psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
comp_psi2_der(dpsi2_dl_gpu,dpsi2_dmu_gpu,dpsi2_dS_gpu,dpsi2_dgamma_gpu, dpsi2_dZ_gpu, psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1:
ARD = True
else:
ARD = False
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu'] grad_l_gpu = self.gpuCache['grad_l_gpu']
# variance
variance.gradient = gpuarray.sum(dL_dpsi0).get() \
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi1.size, dL_dpsi1.gpudata, 1, dpsi1_dvar_gpu.gpudata, 1) \
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi2.size, dL_dpsi2.gpudata, 1, dpsi2_dvar_gpu.gpudata, 1)
# lengscale
if ARD:
grad_l_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_l_gpu, psi1_comb_gpu, 1, N*M)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_l_gpu, psi2_comb_gpu, 1, N*M*M)
lengthscale.gradient = grad_l_gpu.get()
else:
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
lengthscale.gradient = gpuarray.sum(psi1_comb_gpu).get() + gpuarray.sum(psi2_comb_gpu).get()
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
grad_Z_gpu = self.gpuCache['grad_Z_gpu']
grad_Z_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dZ_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_Z_gpu, psi1_comb_gpu, 1, N)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dZ_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_Z_gpu, psi2_comb_gpu, 1, N*M)
return grad_Z_gpu.get()
def gradients_qX_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu'] grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu'] grad_S_gpu = self.gpuCache['grad_S_gpu']
grad_gamma_gpu = self.gpuCache['grad_gamma_gpu'] grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
log_denom1_gpu = self.gpuCache['log_denom1_gpu']
log_denom2_gpu = self.gpuCache['log_denom2_gpu']
log_gamma_gpu = self.gpuCache['log_gamma_gpu']
log_gamma1_gpu = self.gpuCache['log_gamma1_gpu']
# mu gradients if self.GPU_direct:
grad_mu_gpu.fill(0.) dL_dpsi1_gpu = dL_dpsi1
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dmu_gpu, dL_dpsi1.size) dL_dpsi2_gpu = dL_dpsi2
linalg_gpu.sum_axis(grad_mu_gpu, psi1_comb_gpu, N, M) dL_dpsi0_sum = gpuarray.sum(dL_dpsi0).get()
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dmu_gpu, dL_dpsi2.size) else:
linalg_gpu.sum_axis(grad_mu_gpu, psi2_comb_gpu, N, M*M) dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dpsi1_gpu.set(np.asfortranarray(dL_dpsi1))
dL_dpsi2_gpu.set(np.asfortranarray(dL_dpsi2))
dL_dpsi0_sum = dL_dpsi0.sum()
# S gradients self.reset_derivative()
grad_S_gpu.fill(0.) # t=self.g_psi1compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi1_gpu,psi1_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dS_gpu, dL_dpsi1.size) # print 'g_psi1compDer '+str(t)
linalg_gpu.sum_axis(grad_S_gpu, psi1_comb_gpu, N, M) # t=self.g_psi2compDer(dvar_gpu,dl_gpu,dZ_gpu,dmu_gpu,dS_gpu,dL_dpsi2_gpu,psi2n_gpu, np.float64(variance),l_gpu,Z_gpu,mu_gpu,S_gpu, np.int32(N), np.int32(M), np.int32(Q), block=(self.threadnum,1,1), grid=(self.blocknum,1),time_kernel=True)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dS_gpu, dL_dpsi2.size) # print 'g_psi2compDer '+str(t)
linalg_gpu.sum_axis(grad_S_gpu, psi2_comb_gpu, N, M*M) self.g_psi1compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi1_gpu.gpudata,psi1_gpu.gpudata, log_denom1_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
self.g_psi2compDer.prepared_call((self.blocknum,1),(self.threadnum,1,1),dvar_gpu.gpudata,dl_gpu.gpudata,dZ_gpu.gpudata,dmu_gpu.gpudata,dS_gpu.gpudata,dgamma_gpu.gpudata,dL_dpsi2_gpu.gpudata,psi2n_gpu.gpudata, log_denom2_gpu.gpudata, log_gamma_gpu.gpudata, log_gamma1_gpu.gpudata, np.float64(variance),l_gpu.gpudata,Z_gpu.gpudata,mu_gpu.gpudata,S_gpu.gpudata,gamma_gpu.gpudata,np.int32(N), np.int32(M), np.int32(Q))
dL_dvar = dL_dpsi0_sum + gpuarray.sum(dvar_gpu).get()
sum_axis(grad_mu_gpu,dmu_gpu,N*Q,self.blocknum)
dL_dmu = grad_mu_gpu.get()
sum_axis(grad_S_gpu,dS_gpu,N*Q,self.blocknum)
dL_dS = grad_S_gpu.get()
sum_axis(grad_gamma_gpu,dgamma_gpu,N*Q,self.blocknum)
dL_dgamma = grad_gamma_gpu.get()
dL_dZ = dZ_gpu.get()
if ARD:
sum_axis(grad_l_gpu,dl_gpu,Q,self.blocknum)
dL_dlengscale = grad_l_gpu.get()
else:
dL_dlengscale = gpuarray.sum(dl_gpu).get()
return dL_dvar, dL_dlengscale, dL_dZ, dL_dmu, dL_dS, dL_dgamma
# gamma gradients
grad_gamma_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dgamma_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_gamma_gpu, psi1_comb_gpu, N, M)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dgamma_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_gamma_gpu, psi2_comb_gpu, N, M*M)
return grad_mu_gpu.get(), grad_S_gpu.get(), grad_gamma_gpu.get()

View file

@ -3,13 +3,9 @@
import numpy as np import numpy as np
from scipy import weave
from ...util.misc import param_to_array
from stationary import Stationary from stationary import Stationary
from GPy.util.caching import Cache_this from psi_comp import PSICOMP_RBF
from ...core.parameterization import variational from psi_comp.rbf_psi_gpucomp import PSICOMP_RBF_GPU
from psi_comp import ssrbf_psi_comp
from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF
from ...util.config import * from ...util.config import *
class RBF(Stationary): class RBF(Stationary):
@ -26,10 +22,11 @@ class RBF(Stationary):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU) super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
self.weave_options = {} self.weave_options = {}
self.group_spike_prob = False self.group_spike_prob = False
self.psicomp = PSICOMP_RBF()
if self.useGPU: if self.useGPU:
self.psicomp = PSICOMP_SSRBF() self.psicomp = PSICOMP_RBF_GPU()
else:
self.psicomp = PSICOMP_RBF()
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)
@ -37,6 +34,15 @@ class RBF(Stationary):
def dK_dr(self, r): def dK_dr(self, r):
return -r*self.K_of_r(r) return -r*self.K_of_r(r)
def __getstate__(self):
dc = super(RBF, self).__getstate__()
if self.useGPU:
dc['psicomp'] = PSICOMP_RBF()
return dc
def __setstate__(self, state):
return super(RBF, self).__setstate__(state)
def spectrum(self, omega): def spectrum(self, omega):
assert self.input_dim == 1 #TODO: higher dim spectra? assert self.input_dim == 1 #TODO: higher dim spectra?
return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2) return self.variance*np.sqrt(2*np.pi)*self.lengthscale*np.exp(-self.lengthscale*2*omega**2/2)
@ -46,331 +52,22 @@ class RBF(Stationary):
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[0]
if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
else:
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
else:
return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[1]
if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
else:
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
else:
_, _, _, psi1 = self._psi1computations(Z, variational_posterior)
return psi1
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior)[2]
if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
else:
return ssrbf_psi_comp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
else:
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
return psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM dL_dvar, dL_dlengscale = self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[:2]
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): self.variance.gradient = dL_dvar
if self.useGPU: self.lengthscale.gradient = dL_dlengscale
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
# dL_dvar, dL_dlengscale, dL_dZ, dL_dgamma, dL_dmu, dL_dS = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
dL_dvar, dL_dlengscale, _, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
self.variance.gradient = dL_dvar
self.lengthscale.gradient = dL_dlengscale
# _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
# _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
# #contributions from psi0:
# self.variance.gradient = np.sum(dL_dpsi0)
#
# #from psi1
# self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
# if self.ARD:
# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
# else:
# self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#
# #from psi2
# self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
# if self.ARD:
# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
# else:
# self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale**2
if l2.size != self.input_dim:
l2 = l2*np.ones(self.input_dim)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
self.lengthscale.gradient = 0.
#from psi1
denom, _, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
d_length = psi1[:,:,None] * ((dist_sq - 1.)/(self.lengthscale*denom) +1./self.lengthscale)
dpsi1_dlength = d_length * dL_dpsi1[:, :, None]
if self.ARD:
self.lengthscale.gradient += dpsi1_dlength.sum(0).sum(0)
else:
self.lengthscale.gradient += dpsi1_dlength.sum()
self.variance.gradient += np.sum(dL_dpsi1 * psi1) / self.variance
#from psi2
S = variational_posterior.variance
_, Zdist_sq, _, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
if not self.ARD:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2).sum()
else:
self.lengthscale.gradient += self._weave_psi2_lengthscale_grads(dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2)
self.variance.gradient += 2.*np.sum(dL_dpsi2 * psi2)/self.variance
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[2]
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU:
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
_, _, dL_dZ, _, _, _ = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
return dL_dZ
# _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
# _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
# #psi1
# grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#
# #psi2
# grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
#
# return grad
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#psi1
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
grad = np.einsum('ij,ij,ijk,ijk->jk', dL_dpsi1, psi1, dist, -1./(denom*l2))
#psi2
Zdist, Zdist_sq, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
term1 = Zdist / l2 # M, M, Q
S = variational_posterior.variance
term2 = mudist / (2.*S[:,None,None,:] + l2) # N, M, M, Q
grad += 2.*np.einsum('ijk,ijk,ijkl->kl', dL_dpsi2, psi2, term1[None,:,:,:] + term2)
return grad
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM return self.psicomp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)[3:]
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
if self.useGPU:
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
_, _, _, dL_dmu, dL_dS, dL_dgamma = ssrbf_psi_comp.psiDerivativecomputations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
return dL_dmu, dL_dS, dL_dgamma
# ndata = variational_posterior.mean.shape[0]
#
# _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
# _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#
# #psi1
# grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
# grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
# grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#
# #psi2
# grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
# grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
# grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
#
# if self.group_spike_prob:
# grad_gamma[:] = grad_gamma.mean(axis=0)
#
# return grad_mu, grad_S, grad_gamma
elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2
#psi1
denom, dist, dist_sq, psi1 = self._psi1computations(Z, variational_posterior)
tmp = psi1[:, :, None] / l2 / denom
grad_mu = np.sum(dL_dpsi1[:, :, None] * tmp * dist, 1)
grad_S = np.sum(dL_dpsi1[:, :, None] * 0.5 * tmp * (dist_sq - 1), 1)
#psi2
_, _, mudist, mudist_sq, psi2 = self._psi2computations(Z, variational_posterior)
S = variational_posterior.variance
tmp = psi2[:, :, :, None] / (2.*S[:,None,None,:] + l2)
grad_mu += -2.*np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2, tmp , mudist)
grad_S += np.einsum('ijk,ijkl,ijkl->il', dL_dpsi2 , tmp , (2.*mudist_sq - 1))
else:
raise ValueError, "unknown distriubtion received for psi-statistics"
return grad_mu, grad_S
#---------------------------------------#
# Precomputations #
#---------------------------------------#
@Cache_this(limit=1)
def _psi1computations(self, Z, vp):
mu, S = vp.mean, vp.variance
l2 = self.lengthscale **2
denom = S[:, None, :] / l2 + 1. # N,1,Q
dist = Z[None, :, :] - mu[:, None, :] # N,M,Q
dist_sq = np.square(dist) / l2 / denom # N,M,Q
exponent = -0.5 * np.sum(dist_sq + np.log(denom), -1)#N,M
psi1 = self.variance * np.exp(exponent) # N,M
return denom, dist, dist_sq, psi1
@Cache_this(limit=1, ignore_args=(0,))
def _Z_distances(self, Z):
Zhat = 0.5 * (Z[:, None, :] + Z[None, :, :]) # M,M,Q
Zdist = 0.5 * (Z[:, None, :] - Z[None, :, :]) # M,M,Q
return Zhat, Zdist
@Cache_this(limit=1)
def _psi2computations(self, Z, vp):
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for private(tmp, exponent_tmp)'
header_string = '#include <omp.h>'
libraries = ['gomp']
else:
pragma_string = ''
header_string = ''
libraries = []
mu, S = vp.mean, vp.variance
N, Q = mu.shape
M = Z.shape[0]
#compute required distances
Zhat, Zdist = self._Z_distances(Z)
Zdist_sq = np.square(Zdist / self.lengthscale) # M,M,Q
#allocate memory for the things we want to compute
mudist = np.empty((N, M, M, Q))
mudist_sq = np.empty((N, M, M, Q))
psi2 = np.empty((N, M, M))
l2 = self.lengthscale **2
denom = (2.*S[:,None,None,:] / l2) + 1. # N,Q
half_log_denom = 0.5 * np.log(denom[:,0,0,:])
denom_l2 = denom[:,0,0,:]*l2
variance_sq = float(np.square(self.variance))
code = """
double tmp, exponent_tmp;
%s
for (int n=0; n<N; n++)
{
for (int m=0; m<M; m++)
{
for (int mm=0; mm<(m+1); mm++)
{
exponent_tmp = 0.0;
for (int q=0; q<Q; q++)
{
//compute mudist
tmp = mu(n,q) - Zhat(m,mm,q);
mudist(n,m,mm,q) = tmp;
mudist(n,mm,m,q) = tmp;
//now mudist_sq
tmp = tmp*tmp/denom_l2(n,q);
mudist_sq(n,m,mm,q) = tmp;
mudist_sq(n,mm,m,q) = tmp;
//now exponent
tmp = -Zdist_sq(m,mm,q) - tmp - half_log_denom(n,q);
exponent_tmp += tmp;
}
//compute psi2 by exponentiating
psi2(n,m,mm) = variance_sq * exp(exponent_tmp);
psi2(n,mm,m) = psi2(n,m,mm);
}
}
}
""" % pragma_string
support_code = """
%s
#include <math.h>
""" % header_string
mu = param_to_array(mu)
weave.inline(code, support_code=support_code, libraries=libraries,
arg_names=['N', 'M', 'Q', 'mu', 'Zhat', 'mudist_sq', 'mudist', 'denom_l2', 'Zdist_sq', 'half_log_denom', 'psi2', 'variance_sq'],
type_converters=weave.converters.blitz, **self.weave_options)
return Zdist, Zdist_sq, mudist, mudist_sq, psi2
def _weave_psi2_lengthscale_grads(self, dL_dpsi2, psi2, Zdist_sq, S, mudist_sq, l2):
#here's the einsum equivalent, it's ~3 times slower
#return 2.*np.einsum( 'ijk,ijk,ijkl,il->l', dL_dpsi2, psi2, Zdist_sq * (2.*S[:,None,None,:]/l2 + 1.) + mudist_sq + S[:, None, None, :] / l2, 1./(2.*S + l2))*self.lengthscale
result = np.zeros(self.input_dim)
if config.getboolean('parallel', 'openmp'):
pragma_string = '#pragma omp parallel for reduction(+:tmp)'
header_string = '#include <omp.h>'
libraries = ['gomp']
else:
pragma_string = ''
header_string = ''
libraries = []
code = """
double tmp;
for(int q=0; q<Q; q++)
{
tmp = 0.0;
%s
for(int n=0; n<N; n++)
{
for(int m=0; m<M; m++)
{
//diag terms
tmp += dL_dpsi2(n,m,m) * psi2(n,m,m) * (Zdist_sq(m,m,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,m,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
//off-diag terms
for(int mm=0; mm<m; mm++)
{
tmp += 2.0 * dL_dpsi2(n,m,mm) * psi2(n,m,mm) * (Zdist_sq(m,mm,q) * (2.0*S(n,q)/l2(q) + 1.0) + mudist_sq(n,m,mm,q) + S(n,q)/l2(q)) / (2.0*S(n,q) + l2(q)) ;
}
}
}
result(q) = tmp;
}
""" % pragma_string
support_code = """
%s
#include <math.h>
""" % header_string
N,Q = S.shape
M = psi2.shape[-1]
S = param_to_array(S)
weave.inline(code, support_code=support_code, libraries=libraries,
arg_names=['psi2', 'dL_dpsi2', 'N', 'M', 'Q', 'mudist_sq', 'l2', 'Zdist_sq', 'S', 'result'],
type_converters=weave.converters.blitz, **self.weave_options)
return 2.*result*self.lengthscale

View file

@ -1,139 +0,0 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from kern import Kern
import numpy as np
from ...util.linalg import tdot
from ...util.config import *
from stationary import Stationary
from psi_comp import ssrbf_psi_comp
class SSRBF(Stationary):
"""
Radial Basis Function kernel, aka squared-exponential, exponentiated quadratic or Gaussian kernel
for Spike-and-Slab GPLVM
.. math::
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) \ \ \ \ \ \\text{ where } r^2 = \sum_{i=1}^d \\frac{ (x_i-x^\prime_i)^2}{\ell_i^2}
where \ell_i is the lengthscale, \sigma^2 the variance and d the dimensionality of the input.
:param input_dim: the number of input dimensions
:type input_dim: int
:param variance: the variance of the kernel
:type variance: float
:param lengthscale: the vector of lengthscale of the kernel
:type lengthscale: array or list of the appropriate size (or float if there is only one lengthscale parameter)
:param ARD: Auto Relevance Determination. If equal to "False", the kernel is isotropic (ie. one single lengthscale parameter \ell), otherwise there is one lengthscale parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
.. Note: this object implements both the ARD and 'spherical' version of the function
"""
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=True, active_dims=None, name='SSRBF'):
assert ARD==True, "Not Implemented!"
super(SSRBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2)
def dK_dr(self, r):
return -r*self.K_of_r(r)
def parameters_changed(self):
pass
def Kdiag(self, X):
ret = np.empty(X.shape[0])
ret[:] = self.variance
return ret
#---------------------------------------#
# PSI statistics #
#---------------------------------------#
def psi0(self, Z, variational_posterior):
ret = np.empty(variational_posterior.mean.shape[0])
ret[:] = self.variance
return ret
def psi1(self, Z, variational_posterior):
_psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return _psi1
def psi2(self, Z, variational_posterior):
_psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
return _psi2
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
#from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
return grad
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
ndata = variational_posterior.mean.shape[0]
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
return grad_mu, grad_S, grad_gamma
#---------------------------------------#
# Precomputations #
#---------------------------------------#
#@cache_this(1)
def _K_computations(self, X, X2):
"""
K(X,X2) - X is NxQ
Q -> input dimension (self.input_dim)
"""
if X2 is None:
self._X2 = None
X = X / self.lengthscale
Xsquare = np.sum(np.square(X), axis=1)
self._K_dist2 = -2.*tdot(X) + (Xsquare[:, None] + Xsquare[None, :])
else:
self._X2 = X2.copy()
X = X / self.lengthscale
X2 = X2 / self.lengthscale
self._K_dist2 = -2.*np.dot(X, X2.T) + (np.sum(np.square(X), axis=1)[:, None] + np.sum(np.square(X2), axis=1)[None, :])
self._K_dvar = np.exp(-0.5 * self._K_dist2)

View file

@ -6,7 +6,6 @@ from kern import Kern
import numpy as np import numpy as np
from ...core.parameterization import Param from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp from ...core.parameterization.transformations import Logexp
import numpy as np
class Static(Kern): class Static(Kern):
def __init__(self, input_dim, variance, active_dims, name): def __init__(self, input_dim, variance, active_dims, name):
@ -39,7 +38,7 @@ class Static(Kern):
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
K = self.K(variational_posterior.mean, Z) K = self.K(variational_posterior.mean, Z)
return K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes return np.einsum('ij,ik->jk',K,K) #K[:,:,None]*K[:,None,:] # NB. more efficient implementations on inherriting classes
class White(Static): class White(Static):
@ -53,7 +52,7 @@ class White(Static):
return np.zeros((X.shape[0], X2.shape[0])) return np.zeros((X.shape[0], X2.shape[0]))
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_full(self, dL_dK, X, X2=None): def update_gradients_full(self, dL_dK, X, X2=None):
self.variance.gradient = np.trace(dL_dK) self.variance.gradient = np.trace(dL_dK)
@ -82,12 +81,12 @@ class Bias(Static):
self.variance.gradient = dL_dKdiag.sum() self.variance.gradient = dL_dKdiag.sum()
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
ret = np.empty((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) ret = np.empty((Z.shape[0], Z.shape[0]), dtype=np.float64)
ret[:] = self.variance**2 ret[:] = self.variance*self.variance*variational_posterior.shape[0]
return ret return ret
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum() self.variance.gradient = dL_dpsi0.sum() + dL_dpsi1.sum() + 2.*self.variance*dL_dpsi2.sum()*variational_posterior.shape[0]
class Fixed(Static): class Fixed(Static):
def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='fixed'): def __init__(self, input_dim, covariance_matrix, variance=1., active_dims=None, name='fixed'):
@ -97,7 +96,7 @@ class Fixed(Static):
:param variance: the variance of the kernel :param variance: the variance of the kernel
:type variance: float :type variance: float
""" """
super(Bias, self).__init__(input_dim, variance, active_dims, name) super(Fixed, self).__init__(input_dim, variance, active_dims, name)
self.fixed_K = covariance_matrix self.fixed_K = covariance_matrix
def K(self, X, X2): def K(self, X, X2):
return self.variance * self.fixed_K return self.variance * self.fixed_K
@ -112,7 +111,7 @@ class Fixed(Static):
self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K) self.variance.gradient = np.einsum('i,i', dL_dKdiag, self.fixed_K)
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
return np.zeros((variational_posterior.shape[0], Z.shape[0], Z.shape[0]), dtype=np.float64) return np.zeros((Z.shape[0], Z.shape[0]), dtype=np.float64)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
self.variance.gradient = dL_dpsi0.sum() self.variance.gradient = dL_dpsi0.sum()

View file

@ -79,7 +79,6 @@ class Stationary(Kern):
#a convenience function, so we can cache dK_dr #a convenience function, so we can cache dK_dr
return self.dK_dr(self._scaled_dist(X, X2)) return self.dK_dr(self._scaled_dist(X, X2))
@Cache_this(limit=5, ignore_args=(0,))
def _unscaled_dist(self, X, X2=None): def _unscaled_dist(self, X, X2=None):
""" """
Compute the Euclidean distance between each row of X and X2, or between Compute the Euclidean distance between each row of X and X2, or between
@ -169,11 +168,18 @@ class Stationary(Kern):
#the lower memory way with a loop #the lower memory way with a loop
tmp = invdist*dL_dr tmp = invdist*dL_dr
if X2 is None:
tmp *= 2.
X2 = X
ret = np.empty(X.shape, dtype=np.float64) ret = np.empty(X.shape, dtype=np.float64)
<<<<<<< HEAD
[np.sum(tmp*(X[:,q:q+1]-X2[:,q:q+1]), axis=1, out=ret[:,q]) for q in xrange(self.input_dim)] [np.sum(tmp*(X[:,q:q+1]-X2[:,q:q+1]), axis=1, out=ret[:,q]) for q in xrange(self.input_dim)]
=======
if X2 is None:
[np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
ret2 = np.empty(X.shape, dtype=np.float64)
[np.einsum('ij,ji->j', tmp, X[:,q][:,None]-X[:,q][None,:], out=ret2[:,q]) for q in xrange(self.input_dim)]
ret += ret2
else:
[np.einsum('ij,ij->i', tmp, X[:,q][:,None]-X2[:,q][None,:], out=ret[:,q]) for q in xrange(self.input_dim)]
>>>>>>> 1061bf52482aa3bf6769db810c955d5fbf51ceae
ret /= self.lengthscale**2 ret /= self.lengthscale**2
return ret return ret

View file

@ -0,0 +1,208 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np
from scipy import weave
from kern import Kern
from ...util.linalg import tdot
from ...util.misc import param_to_array
from ...core.parameterization import Param
from ...core.parameterization.transformations import Logexp
from ...util.caching import Cache_this
from ...core.parameterization import variational
from ...util.config import *
class TruncLinear(Kern):
"""
Truncated Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, variances=None, delta=None, ARD=False, active_dims=None, name='linear'):
super(TruncLinear, self).__init__(input_dim, active_dims, name)
self.ARD = ARD
if not ARD:
if variances is not None:
variances = np.asarray(variances)
delta = np.asarray(delta)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
delta = np.zeros(1)
else:
if variances is not None:
variances = np.asarray(variances)
delta = np.asarray(delta)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
delta = np.zeros(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.delta = Param('delta', delta)
self.add_parameter(self.variances)
self.add_parameter(self.delta)
@Cache_this(limit=2)
def K(self, X, X2=None):
XX = self.variances*self._product(X, X2)
return XX.sum(axis=-1)
@Cache_this(limit=2)
def _product(self, X, X2=None):
if X2 is None:
X2 = X
XX = np.einsum('nq,mq->nmq',X-self.delta,X2-self.delta)
XX[XX<0] = 0
return XX
def Kdiag(self, X):
return (self.variances*np.square(X-self.delta)).sum(axis=-1)
def update_gradients_full(self, dL_dK, X, X2=None):
dK_dvar = self._product(X, X2)
if X2 is None:
X2=X
dK_ddelta = self.variances*(2*self.delta-X[:,None,:]-X2[None,:,:])*(dK_dvar>0)
if self.ARD:
self.variances.gradient[:] = np.einsum('nmq,nm->q',dK_dvar,dL_dK)
self.delta.gradient[:] = np.einsum('nmq,nm->q',dK_ddelta,dL_dK)
else:
self.variances.gradient[:] = np.einsum('nmq,nm->',dK_dvar,dL_dK)
self.delta.gradient[:] = np.einsum('nmq,nm->',dK_ddelta,dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
if self.ARD:
self.variances.gradient[:] = np.einsum('nq,n->q',np.square(X-self.delta),dL_dKdiag)
self.delta.gradient[:] = np.einsum('nq,n->q',2*self.variances*(self.delta-X),dL_dKdiag)
else:
self.variances.gradient[:] = np.einsum('nq,n->',np.square(X-self.delta),dL_dKdiag)
self.delta.gradient[:] = np.einsum('nq,n->',2*self.variances*(self.delta-X),dL_dKdiag)
def gradients_X(self, dL_dK, X, X2=None):
XX = self._product(X, X2)
if X2 is None:
Xp = (self.variances*(X-self.delta))*(XX>0)
else:
Xp = (self.variances*(X2-self.delta))*(XX>0)
if X2 is None:
return np.einsum('nmq,nm->nq',Xp,dL_dK)+np.einsum('mnq,nm->mq',Xp,dL_dK)
else:
return np.einsum('nmq,nm->nq',Xp,dL_dK)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*(X-self.delta)
def input_sensitivity(self):
return np.ones(self.input_dim) * self.variances
class TruncLinear_inf(Kern):
"""
Truncated Linear kernel
.. math::
k(x,y) = \sum_{i=1}^input_dim \sigma^2_i \max(0, x_iy_i - \simga_q)
:param input_dim: the number of input dimensions
:type input_dim: int
:param variances: the vector of variances :math:`\sigma^2_i`
:type variances: array or list of the appropriate size (or float if there
is only one variance parameter)
:param ARD: Auto Relevance Determination. If False, the kernel has only one
variance parameter \sigma^2, otherwise there is one variance
parameter per dimension.
:type ARD: Boolean
:rtype: kernel object
"""
def __init__(self, input_dim, interval, variances=None, ARD=False, active_dims=None, name='linear'):
super(TruncLinear_inf, self).__init__(input_dim, active_dims, name)
self.interval = interval
self.ARD = ARD
if not ARD:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == 1, "Only one variance needed for non-ARD kernel"
else:
variances = np.ones(1)
else:
if variances is not None:
variances = np.asarray(variances)
assert variances.size == self.input_dim, "bad number of variances, need one ARD variance per input_dim"
else:
variances = np.ones(self.input_dim)
self.variances = Param('variances', variances, Logexp())
self.add_parameter(self.variances)
# @Cache_this(limit=2)
def K(self, X, X2=None):
tmp = self._product(X, X2)
return (self.variances*tmp).sum(axis=-1)
# @Cache_this(limit=2)
def _product(self, X, X2=None):
if X2 is None:
X2 = X
X_X2 = X[:,None,:]-X2[None,:,:]
tmp = np.abs(X_X2**3)/6+np.einsum('nq,mq->nmq',X,X2)*(self.interval[1]-self.interval[0]) \
-(X[:,None,:]+X2[None,:,:])*(self.interval[1]*self.interval[1]-self.interval[0]*self.interval[0])/2+(self.interval[1]**3-self.interval[0]**3)/3.
return tmp
def Kdiag(self, X):
tmp = np.square(X)*(self.interval[1]-self.interval[0]) \
-X*(self.interval[1]*self.interval[1]-self.interval[0]*self.interval[0])+(self.interval[1]**3-self.interval[0]**3)/3
return (self.variances*tmp).sum(axis=-1)
def update_gradients_full(self, dL_dK, X, X2=None):
dK_dvar = self._product(X, X2)
if self.ARD:
self.variances.gradient[:] = np.einsum('nmq,nm->q',dK_dvar,dL_dK)
else:
self.variances.gradient[:] = np.einsum('nmq,nm->',dK_dvar,dL_dK)
def update_gradients_diag(self, dL_dKdiag, X):
tmp = np.square(X)*(self.interval[1]-self.interval[0]) \
-X*(self.interval[1]*self.interval[1]-self.interval[0]*self.interval[0])+(self.interval[1]**3-self.interval[0]**3)/3
if self.ARD:
self.variances.gradient[:] = np.einsum('nq,n->q',tmp,dL_dKdiag)
else:
self.variances.gradient[:] = np.einsum('nq,n->',tmp,dL_dKdiag)
def gradients_X(self, dL_dK, X, X2=None):
XX = self._product(X, X2)
if X2 is None:
Xp = (self.variances*(X-self.delta))*(XX>0)
else:
Xp = (self.variances*(X2-self.delta))*(XX>0)
if X2 is None:
return np.einsum('nmq,nm->nq',Xp,dL_dK)+np.einsum('mnq,nm->mq',Xp,dL_dK)
else:
return np.einsum('nmq,nm->nq',Xp,dL_dK)
def gradients_X_diag(self, dL_dKdiag, X):
return 2.*self.variances*dL_dKdiag[:,None]*(X-self.delta)
def input_sensitivity(self):
return np.ones(self.input_dim) * self.variances

View file

@ -17,3 +17,4 @@ from ss_gplvm import SSGPLVM
from gp_coregionalized_regression import GPCoregionalizedRegression from gp_coregionalized_regression import GPCoregionalizedRegression
from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression from sparse_gp_coregionalized_regression import SparseGPCoregionalizedRegression
from gp_heteroscedastic_regression import GPHeteroscedasticRegression from gp_heteroscedastic_regression import GPHeteroscedasticRegression
from ss_mrd import SSMRD

View file

@ -8,7 +8,7 @@ from ..likelihoods import Gaussian
from ..inference.optimization import SCG from ..inference.optimization import SCG
from ..util import linalg from ..util import linalg
from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
import logging import logging
@ -25,7 +25,9 @@ class BayesianGPLVM(SparseGP):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='bayesian gplvm', mpi_comm=None, **kwargs):
self.mpi_comm = mpi_comm
self.__IN_OPTIMIZATION__ = False
self.logger = logging.getLogger(self.__class__.__name__) self.logger = logging.getLogger(self.__class__.__name__)
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
@ -61,22 +63,42 @@ class BayesianGPLVM(SparseGP):
from ..inference.latent_function_inference.var_dtc import VarDTCMissingData from ..inference.latent_function_inference.var_dtc import VarDTCMissingData
self.logger.debug("creating inference_method with var_dtc missing data") self.logger.debug("creating inference_method with var_dtc missing data")
inference_method = VarDTCMissingData(inan=inan) inference_method = VarDTCMissingData(inan=inan)
elif mpi_comm is not None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
else: else:
from ..inference.latent_function_inference.var_dtc import VarDTC from ..inference.latent_function_inference.var_dtc import VarDTC
self.logger.debug("creating inference_method var_dtc") self.logger.debug("creating inference_method var_dtc")
inference_method = VarDTC() inference_method = VarDTC()
if isinstance(inference_method,VarDTC_minibatch):
inference_method.mpi_comm = mpi_comm
if kernel.useGPU and isinstance(inference_method, VarDTC_GPU):
kernel.psicomp.GPU_direct = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.logger.info("Adding X as parameter") self.logger.info("Adding X as parameter")
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
if mpi_comm != None:
from ..util.mpi import divide_data
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
self.N_range = (N_start, N_end)
self.N_list = np.array(N_list)
self.Y_local = self.Y[N_start:N_end]
print 'MPI RANK: '+str(self.mpi_comm.rank)+' with datasize: '+str(self.N_range)
mpi_comm.Bcast(self.param_array, root=0)
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" """Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient = X_grad X.mean.gradient, X.variance.gradient = X_grad
def get_X_gradients(self, X):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient
def parameters_changed(self): def parameters_changed(self):
if isinstance(self.inference_method, VarDTC_GPU): if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
update_gradients(self) update_gradients(self, mpi_comm=self.mpi_comm)
return return
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
@ -188,6 +210,50 @@ class BayesianGPLVM(SparseGP):
from ..plotting.matplot_dep import dim_reduction_plots from ..plotting.matplot_dep import dim_reduction_plots
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
def __getstate__(self):
dc = super(BayesianGPLVM, self).__getstate__()
dc['mpi_comm'] = None
if self.mpi_comm != None:
del dc['N_range']
del dc['N_list']
del dc['Y_local']
return dc
def __setstate__(self, state):
return super(BayesianGPLVM, self).__setstate__(state)
#=====================================================
# The MPI parallelization
# - can move to model at some point
#=====================================================
def _set_params_transformed(self, p):
if self.mpi_comm != None:
if self.__IN_OPTIMIZATION__ and self.mpi_comm.rank==0:
self.mpi_comm.Bcast(np.int32(1),root=0)
self.mpi_comm.Bcast(p, root=0)
super(BayesianGPLVM, self)._set_params_transformed(p)
def optimize(self, optimizer=None, start=None, **kwargs):
self.__IN_OPTIMIZATION__ = True
if self.mpi_comm==None:
super(BayesianGPLVM, self).optimize(optimizer,start,**kwargs)
elif self.mpi_comm.rank==0:
super(BayesianGPLVM, self).optimize(optimizer,start,**kwargs)
self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
x = self._get_params_transformed().copy()
flag = np.empty(1,dtype=np.int32)
while True:
self.mpi_comm.Bcast(flag,root=0)
if flag==1:
self._set_params_transformed(x)
elif flag==-1:
break
else:
self.__IN_OPTIMIZATION__ = False
raise Exception("Unrecognizable flag for synchronization!")
self.__IN_OPTIMIZATION__ = False
def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): def latent_cost_and_grad(mu_S, input_dim, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):

View file

@ -11,7 +11,7 @@ from GPy.models.gplvm import GPLVM
# from ..core import model # from ..core import model
# from ..util.linalg import pdinv, PCA # from ..util.linalg import pdinv, PCA
class SparseGPLVM(SparseGPRegression, GPLVM): class SparseGPLVM(SparseGPRegression):
""" """
Sparse Gaussian Process Latent Variable Model Sparse Gaussian Process Latent Variable Model
@ -23,40 +23,25 @@ class SparseGPLVM(SparseGPRegression, GPLVM):
:type init: 'PCA'|'random' :type init: 'PCA'|'random'
""" """
def __init__(self, Y, input_dim, kernel=None, init='PCA', num_inducing=10): def __init__(self, Y, input_dim, X=None, kernel=None, init='PCA', num_inducing=10):
X = self.initialise_latent(init, input_dim, Y) if X is None:
from ..util.initialization import initialize_latent
X, fracs = initialize_latent(init, input_dim, Y)
SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing) SparseGPRegression.__init__(self, X, Y, kernel=kernel, num_inducing=num_inducing)
self.ensure_default_constraints()
def _get_param_names(self): def parameters_changed(self):
return (sum([['X_%i_%i' % (n, q) for q in range(self.input_dim)] for n in range(self.num_data)], []) super(SparseGPLVM, self).parameters_changed()
+ SparseGPRegression._get_param_names(self)) self.X.gradient = self.kern.gradients_X(self.grad_dict['dL_dKnm'], self.X, self.Z)
def _get_params(self): def plot_latent(self, labels=None, which_indices=None,
return np.hstack((self.X.flatten(), SparseGPRegression._get_params(self))) resolution=50, ax=None, marker='o', s=40,
fignum=None, plot_inducing=True, legend=True,
plot_limits=None,
aspect='auto', updates=False, predict_kwargs={}, imshow_kwargs={}):
assert "matplotlib" in sys.modules, "matplotlib package has not been imported."
from ..plotting.matplot_dep import dim_reduction_plots
def _set_params(self, x): return dim_reduction_plots.plot_latent(self, labels, which_indices,
self.X = x[:self.X.size].reshape(self.num_data, self.input_dim).copy() resolution, ax, marker, s,
SparseGPRegression._set_params(self, x[self.X.size:]) fignum, plot_inducing, legend,
plot_limits, aspect, updates, predict_kwargs, imshow_kwargs)
def log_likelihood(self):
return SparseGPRegression.log_likelihood(self)
def dL_dX(self):
dL_dX = self.kern.dKdiag_dX(self.dL_dpsi0, self.X)
dL_dX += self.kern.gradients_X(self.dL_dpsi1, self.X, self.Z)
return dL_dX
def _log_likelihood_gradients(self):
return np.hstack((self.dL_dX().flatten(), SparseGPRegression._log_likelihood_gradients(self)))
def plot(self):
GPLVM.plot(self)
# passing Z without a small amout of jitter will induce the white kernel where we don;t want it!
mu, var, upper, lower = SparseGPRegression.predict(self, self.Z + np.random.randn(*self.Z.shape) * 0.0001)
pb.plot(mu[:, 0] , mu[:, 1], 'ko')
def plot_latent(self, *args, **kwargs):
input_1, input_2 = GPLVM.plot_latent(*args, **kwargs)
pb.plot(m.Z[:, input_1], m.Z[:, input_2], '^w')

View file

@ -2,18 +2,14 @@
# Licensed under the BSD 3-clause license (see LICENSE.txt) # Licensed under the BSD 3-clause license (see LICENSE.txt)
import numpy as np import numpy as np
import itertools
from matplotlib import pyplot
from ..core.sparse_gp import SparseGP from ..core.sparse_gp import SparseGP
from .. import kern from .. import kern
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from ..inference.optimization import SCG
from ..util import linalg
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch from ..inference.latent_function_inference.var_dtc_parallel import update_gradients, VarDTC_minibatch
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
from ..kern._src.psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF_GPU
class SSGPLVM(SparseGP): class SSGPLVM(SparseGP):
""" """
@ -28,8 +24,12 @@ class SSGPLVM(SparseGP):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike_and_Slab GPLVM', group_spike=False, mpi_comm=None, **kwargs):
self.mpi_comm = mpi_comm
self.__IN_OPTIMIZATION__ = False
self.group_spike = group_spike
if X == None: if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
X, fracs = initialize_latent(init, input_dim, Y) X, fracs = initialize_latent(init, input_dim, Y)
@ -41,46 +41,60 @@ class SSGPLVM(SparseGP):
if X_variance is None: # The variance of the variational approximation (S) if X_variance is None: # The variance of the variational approximation (S)
X_variance = np.random.uniform(0,.1,X.shape) X_variance = np.random.uniform(0,.1,X.shape)
gamma = np.empty_like(X, order='F') # The posterior probabilities of the binary variable in the variational approximation gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim) gamma[:] = 0.5 + 0.1 * np.random.randn(X.shape[0], input_dim)
gamma[gamma>1.-1e-9] = 1.-1e-9
if group_spike: gamma[gamma<1e-9] = 1e-9
gamma[:] = gamma.mean(axis=0)
if Z is None: if Z is None:
Z = np.random.permutation(X.copy())[:num_inducing] Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
pi = np.empty((input_dim))
pi[:] = 0.5
if likelihood is None: if likelihood is None:
likelihood = Gaussian() likelihood = Gaussian()
if kernel is None: if kernel is None:
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
if kernel.useGPU:
pi = np.empty((input_dim)) kernel.psicomp = PSICOMP_SSRBF_GPU()
pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b if inference_method is None:
inference_method = VarDTC_minibatch(mpi_comm=mpi_comm)
self.variational_prior = SpikeAndSlabPrior(pi=pi,learnPi=True) # the prior probability of the latent binary variable b
X = np.asfortranarray(X)
X_variance = np.asfortranarray(X_variance)
gamma = np.asfortranarray(gamma)
X = SpikeAndSlabPosterior(X, X_variance, gamma) X = SpikeAndSlabPosterior(X, X_variance, gamma)
if group_spike:
kernel.group_spike_prob = True
self.variational_prior.group_spike_prob = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
self.add_parameter(self.variational_prior) self.add_parameter(self.variational_prior)
if mpi_comm != None:
from ..util.mpi import divide_data
N_start, N_end, N_list = divide_data(Y.shape[0], mpi_comm)
self.N_range = (N_start, N_end)
self.N_list = np.array(N_list)
self.Y_local = self.Y[N_start:N_end]
print 'MPI RANK: '+str(self.mpi_comm.rank)+' with datasize: '+str(self.N_range)
mpi_comm.Bcast(self.param_array, root=0)
if self.group_spike:
[self.X.gamma[:,i].tie('tieGamma'+str(i)) for i in xrange(self.X.gamma.shape[1])] # Tie columns together
def set_X_gradients(self, X, X_grad): def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form.""" """Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
def get_X_gradients(self, X):
"""Get the gradients of the posterior distribution of X in its specific form."""
return X.mean.gradient, X.variance.gradient, X.binary_prob.gradient
def parameters_changed(self): def parameters_changed(self):
if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch): if isinstance(self.inference_method, VarDTC_GPU) or isinstance(self.inference_method, VarDTC_minibatch):
update_gradients(self) update_gradients(self, mpi_comm=self.mpi_comm)
return return
super(SSGPLVM, self).parameters_changed() super(SSGPLVM, self).parameters_changed()
@ -104,235 +118,47 @@ class SSGPLVM(SparseGP):
return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs) return dim_reduction_plots.plot_latent(self, plot_inducing=plot_inducing, *args, **kwargs)
def do_test_latents(self, Y): def __getstate__(self):
""" dc = super(SSGPLVM, self).__getstate__()
Compute the latent representation for a set of new points Y dc['mpi_comm'] = None
if self.mpi_comm != None:
Notes: del dc['N_range']
This will only work with a univariate Gaussian likelihood (for now) del dc['N_list']
""" del dc['Y_local']
assert not self.likelihood.is_heteroscedastic return dc
N_test = Y.shape[0]
input_dim = self.Z.shape[1] def __setstate__(self, state):
means = np.zeros((N_test, input_dim)) return super(SSGPLVM, self).__setstate__(state)
covars = np.zeros((N_test, input_dim))
#=====================================================
dpsi0 = -0.5 * self.output_dim * self.likelihood.precision # The MPI parallelization
dpsi2 = self.dL_dpsi2[0][None, :, :] # TODO: this may change if we ignore het. likelihoods # - can move to model at some point
V = self.likelihood.precision * Y #=====================================================
#compute CPsi1V def _set_params_transformed(self, p):
if self.Cpsi1V is None: if self.mpi_comm != None:
psi1V = np.dot(self.psi1.T, self.likelihood.V) if self.__IN_OPTIMIZATION__ and self.mpi_comm.rank==0:
tmp, _ = linalg.dtrtrs(self._Lm, np.asfortranarray(psi1V), lower=1, trans=0) self.mpi_comm.Bcast(np.int32(1),root=0)
tmp, _ = linalg.dpotrs(self.LB, tmp, lower=1) self.mpi_comm.Bcast(p, root=0)
self.Cpsi1V, _ = linalg.dtrtrs(self._Lm, tmp, lower=1, trans=1) super(SSGPLVM, self)._set_params_transformed(p)
dpsi1 = np.dot(self.Cpsi1V, V.T) def optimize(self, optimizer=None, start=None, **kwargs):
self.__IN_OPTIMIZATION__ = True
start = np.zeros(self.input_dim * 2) if self.mpi_comm==None:
super(SSGPLVM, self).optimize(optimizer,start,**kwargs)
for n, dpsi1_n in enumerate(dpsi1.T[:, :, None]): elif self.mpi_comm.rank==0:
args = (self.kern, self.Z, dpsi0, dpsi1_n.T, dpsi2) super(SSGPLVM, self).optimize(optimizer,start,**kwargs)
xopt, fopt, neval, status = SCG(f=latent_cost, gradf=latent_grad, x=start, optargs=args, display=False) self.mpi_comm.Bcast(np.int32(-1),root=0)
elif self.mpi_comm.rank>0:
mu, log_S = xopt.reshape(2, 1, -1) x = self._get_params_transformed().copy()
means[n] = mu[0].copy() flag = np.empty(1,dtype=np.int32)
covars[n] = np.exp(log_S[0]).copy() while True:
self.mpi_comm.Bcast(flag,root=0)
return means, covars if flag==1:
self._set_params_transformed(x)
def dmu_dX(self, Xnew): elif flag==-1:
""" break
Calculate the gradient of the prediction at Xnew w.r.t Xnew. else:
""" self.__IN_OPTIMIZATION__ = False
dmu_dX = np.zeros_like(Xnew) raise Exception("Unrecognizable flag for synchronization!")
for i in range(self.Z.shape[0]): self.__IN_OPTIMIZATION__ = False
dmu_dX += self.kern.dK_dX(self.Cpsi1Vf[i:i + 1, :], Xnew, self.Z[i:i + 1, :])
return dmu_dX
def dmu_dXnew(self, Xnew):
"""
Individual gradient of prediction at Xnew w.r.t. each sample in Xnew
"""
dK_dX = np.zeros((Xnew.shape[0], self.num_inducing))
ones = np.ones((1, 1))
for i in range(self.Z.shape[0]):
dK_dX[:, i] = self.kern.dK_dX(ones, Xnew, self.Z[i:i + 1, :]).sum(-1)
return np.dot(dK_dX, self.Cpsi1Vf)
def plot_steepest_gradient_map(self, fignum=None, ax=None, which_indices=None, labels=None, data_labels=None, data_marker='o', data_s=40, resolution=20, aspect='auto', updates=False, ** kwargs):
input_1, input_2 = significant_dims = most_significant_input_dimensions(self, which_indices)
X = np.zeros((resolution ** 2, self.input_dim))
indices = np.r_[:X.shape[0]]
if labels is None:
labels = range(self.output_dim)
def plot_function(x):
X[:, significant_dims] = x
dmu_dX = self.dmu_dXnew(X)
argmax = np.argmax(dmu_dX, 1)
return dmu_dX[indices, argmax], np.array(labels)[argmax]
if ax is None:
fig = pyplot.figure(num=fignum)
ax = fig.add_subplot(111)
if data_labels is None:
data_labels = np.ones(self.num_data)
ulabels = []
for lab in data_labels:
if not lab in ulabels:
ulabels.append(lab)
marker = itertools.cycle(list(data_marker))
from GPy.util import Tango
for i, ul in enumerate(ulabels):
if type(ul) is np.string_:
this_label = ul
elif type(ul) is np.int64:
this_label = 'class %i' % ul
else:
this_label = 'class %i' % i
m = marker.next()
index = np.nonzero(data_labels == ul)[0]
x = self.X[index, input_1]
y = self.X[index, input_2]
ax.scatter(x, y, marker=m, s=data_s, color=Tango.nextMedium(), label=this_label)
ax.set_xlabel('latent dimension %i' % input_1)
ax.set_ylabel('latent dimension %i' % input_2)
from matplotlib.cm import get_cmap
from GPy.util.latent_space_visualizations.controllers.imshow_controller import ImAnnotateController
if not 'cmap' in kwargs.keys():
kwargs.update(cmap=get_cmap('jet'))
controller = ImAnnotateController(ax,
plot_function,
tuple(self.X.min(0)[:, significant_dims]) + tuple(self.X.max(0)[:, significant_dims]),
resolution=resolution,
aspect=aspect,
**kwargs)
ax.legend()
ax.figure.tight_layout()
if updates:
pyplot.show()
clear = raw_input('Enter to continue')
if clear.lower() in 'yes' or clear == '':
controller.deactivate()
return controller.view
def plot_X_1d(self, fignum=None, ax=None, colors=None):
"""
Plot latent space X in 1D:
- if fig is given, create input_dim subplots in fig and plot in these
- if ax is given plot input_dim 1D latent space plots of X into each `axis`
- if neither fig nor ax is given create a figure with fignum and plot in there
colors:
colors of different latent space dimensions input_dim
"""
import pylab
if ax is None:
fig = pylab.figure(num=fignum, figsize=(8, min(12, (2 * self.X.shape[1]))))
if colors is None:
colors = pylab.gca()._get_lines.color_cycle
pylab.clf()
else:
colors = iter(colors)
plots = []
x = np.arange(self.X.shape[0])
for i in range(self.X.shape[1]):
if ax is None:
a = fig.add_subplot(self.X.shape[1], 1, i + 1)
elif isinstance(ax, (tuple, list)):
a = ax[i]
else:
raise ValueError("Need one ax per latent dimnesion input_dim")
a.plot(self.X, c='k', alpha=.3)
plots.extend(a.plot(x, self.X.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
a.fill_between(x,
self.X.T[i] - 2 * np.sqrt(self.X_variance.T[i]),
self.X.T[i] + 2 * np.sqrt(self.X_variance.T[i]),
facecolor=plots[-1].get_color(),
alpha=.3)
a.legend(borderaxespad=0.)
a.set_xlim(x.min(), x.max())
if i < self.X.shape[1] - 1:
a.set_xticklabels('')
pylab.draw()
if ax is None:
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig
def getstate(self):
"""
Get the current state of the class,
here just all the indices, rest can get recomputed
"""
return SparseGP._getstate(self) + [self.init]
def setstate(self, state):
self._const_jitter = None
self.init = state.pop()
SparseGP._setstate(self, state)
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables for test points
(negative log-likelihood: should be minimised!)
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -lik, -np.hstack((dmu.flatten(), dlnS.flatten()))
def latent_cost(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
objective function for fitting the latent variables (negative log-likelihood: should be minimised!)
This is the same as latent_cost_and_grad but only for the objective
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
psi0 = kern.psi0(Z, mu, S)
psi1 = kern.psi1(Z, mu, S)
psi2 = kern.psi2(Z, mu, S)
lik = dL_dpsi0 * psi0 + np.dot(dL_dpsi1.flatten(), psi1.flatten()) + np.dot(dL_dpsi2.flatten(), psi2.flatten()) - 0.5 * np.sum(np.square(mu) + S) + 0.5 * np.sum(log_S)
return -float(lik)
def latent_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
"""
This is the same as latent_cost_and_grad but only for the grad
"""
mu, log_S = mu_S.reshape(2, 1, -1)
S = np.exp(log_S)
mu0, S0 = kern.dpsi0_dmuS(dL_dpsi0, Z, mu, S)
mu1, S1 = kern.dpsi1_dmuS(dL_dpsi1, Z, mu, S)
mu2, S2 = kern.dpsi2_dmuS(dL_dpsi2, Z, mu, S)
dmu = mu0 + mu1 + mu2 - mu
# dS = S0 + S1 + S2 -0.5 + .5/S
dlnS = S * (S0 + S1 + S2 - 0.5) + .5
return -np.hstack((dmu.flatten(), dlnS.flatten()))

34
GPy/models/ss_mrd.py Normal file
View file

@ -0,0 +1,34 @@
"""
The Maniforld Relevance Determination model with the spike-and-slab prior
"""
from ..core import Model
from .ss_gplvm import SSGPLVM
class SSMRD(Model):
def __init__(self, Ylist, input_dim, X=None, X_variance=None,
initx = 'PCA', initz = 'permute',
num_inducing=10, Z=None, kernel=None,
inference_method=None, likelihoods=None, name='ss_mrd', Ynames=None):
super(SSMRD, self).__init__(name)
self.updates = False
self.models = [SSGPLVM(y, input_dim, X=X, X_variance=X_variance, num_inducing=num_inducing,Z=Z,init=initx,
kernel=kernel.copy() if kernel else None,inference_method=inference_method,likelihood=likelihoods,
name='model_'+str(i)) for i,y in enumerate(Ylist)]
self.add_parameters(*(self.models))
[[[self.models[m].X.mean[i,j:j+1].tie('mean_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.mean.shape[1])]
for i in xrange(self.models[0].X.mean.shape[0])]
[[[self.models[m].X.variance[i,j:j+1].tie('var_'+str(i)+'_'+str(j)) for m in xrange(len(self.models))] for j in xrange(self.models[0].X.variance.shape[1])]
for i in xrange(self.models[0].X.variance.shape[0])]
self.updates = True
def parameters_changed(self):
super(SSMRD, self).parameters_changed()
self._log_marginal_likelihood = sum([m._log_marginal_likelihood for m in self.models])
def log_likelihood(self):
return self._log_marginal_likelihood

View file

@ -93,7 +93,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_sid
a.set_xticklabels('') a.set_xticklabels('')
# binary prob plot # binary prob plot
a = fig.add_subplot(*sub2) a = fig.add_subplot(*sub2)
a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center') a.bar(x,gamma[:,i],bottom=0.,linewidth=0,width=1.0,align='center')
a.set_xlim(x.min(), x.max()) a.set_xlim(x.min(), x.max())
a.set_ylim([0.,1.]) a.set_ylim([0.,1.])
pb.draw() pb.draw()

View file

@ -88,7 +88,7 @@ class vector_show(matplotlib_show):
class lvm(matplotlib_show): class lvm(matplotlib_show):
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]): def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1], disable_drag=False):
"""Visualize a latent variable model """Visualize a latent variable model
:param model: the latent variable model to visualize. :param model: the latent variable model to visualize.
@ -107,12 +107,14 @@ class lvm(matplotlib_show):
if isinstance(latent_axes,mpl.axes.Axes): if isinstance(latent_axes,mpl.axes.Axes):
self.cid = latent_axes.figure.canvas.mpl_connect('button_press_event', self.on_click) self.cid = latent_axes.figure.canvas.mpl_connect('button_press_event', self.on_click)
self.cid = latent_axes.figure.canvas.mpl_connect('motion_notify_event', self.on_move) if not disable_drag:
self.cid = latent_axes.figure.canvas.mpl_connect('motion_notify_event', self.on_move)
self.cid = latent_axes.figure.canvas.mpl_connect('axes_leave_event', self.on_leave) self.cid = latent_axes.figure.canvas.mpl_connect('axes_leave_event', self.on_leave)
self.cid = latent_axes.figure.canvas.mpl_connect('axes_enter_event', self.on_enter) self.cid = latent_axes.figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
else: else:
self.cid = latent_axes[0].figure.canvas.mpl_connect('button_press_event', self.on_click) self.cid = latent_axes[0].figure.canvas.mpl_connect('button_press_event', self.on_click)
self.cid = latent_axes[0].figure.canvas.mpl_connect('motion_notify_event', self.on_move) if not disable_drag:
self.cid = latent_axes[0].figure.canvas.mpl_connect('motion_notify_event', self.on_move)
self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_leave_event', self.on_leave) self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_leave_event', self.on_leave)
self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter) self.cid = latent_axes[0].figure.canvas.mpl_connect('axes_enter_event', self.on_enter)
@ -124,6 +126,7 @@ class lvm(matplotlib_show):
self.move_on = False self.move_on = False
self.latent_index = latent_index self.latent_index = latent_index
self.latent_dim = model.input_dim self.latent_dim = model.input_dim
self.disable_drag = disable_drag
# The red cross which shows current latent point. # The red cross which shows current latent point.
self.latent_values = vals self.latent_values = vals
@ -147,8 +150,13 @@ class lvm(matplotlib_show):
def on_click(self, event): def on_click(self, event):
if event.inaxes!=self.latent_axes: return if event.inaxes!=self.latent_axes: return
self.move_on = not self.move_on if self.disable_drag:
self.called = True self.move_on = True
self.called = True
self.on_move(event)
else:
self.move_on = not self.move_on
self.called = True
def on_move(self, event): def on_move(self, event):
if event.inaxes!=self.latent_axes: return if event.inaxes!=self.latent_axes: return
@ -274,8 +282,11 @@ class image_show(matplotlib_show):
:param preset_mean: the preset mean of a scaled image. :param preset_mean: the preset mean of a scaled image.
:type preset_mean: double :type preset_mean: double
:param preset_std: the preset standard deviation of a scaled image. :param preset_std: the preset standard deviation of a scaled image.
:type preset_std: double""" :type preset_std: double
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean=0., preset_std=1., select_image=0): :param cmap: the colormap for image visualization
:type cmap: matplotlib.cm"""
def __init__(self, vals, axes=None, dimensions=(16,16), transpose=False, order='C', invert=False, scale=False, palette=[], preset_mean=0., preset_std=1., select_image=0, cmap=None):
matplotlib_show.__init__(self, vals, axes) matplotlib_show.__init__(self, vals, axes)
self.dimensions = dimensions self.dimensions = dimensions
self.transpose = transpose self.transpose = transpose
@ -290,8 +301,10 @@ class image_show(matplotlib_show):
self.set_image(self.vals) self.set_image(self.vals)
if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette) if not self.palette == []: # Can just show the image (self.set_image() took care of setting the palette)
self.handle = self.axes.imshow(self.vals, interpolation='nearest') self.handle = self.axes.imshow(self.vals, interpolation='nearest')
else: # Use a boring gray map. elif cmap==None: # Use a jet map.
self.handle = self.axes.imshow(self.vals, cmap=plt.cm.gray, interpolation='nearest') # @UndefinedVariable self.handle = self.axes.imshow(self.vals, cmap=plt.cm.jet, interpolation='nearest') # @UndefinedVariable
else: # Use the selected map.
self.handle = self.axes.imshow(self.vals, cmap=cmap, interpolation='nearest') # @UndefinedVariable
plt.show() plt.show()
def modify(self, vals): def modify(self, vals):
@ -399,7 +412,7 @@ class mocap_data_show(matplotlib_show):
def __init__(self, vals, axes=None, connect=None): def __init__(self, vals, axes=None, connect=None):
if axes==None: if axes==None:
fig = plt.figure() fig = plt.figure()
axes = fig.add_subplot(111, projection='3d',aspect='equal') axes = fig.add_subplot(111, projection='3d', aspect='equal')
matplotlib_show.__init__(self, vals, axes) matplotlib_show.__init__(self, vals, axes)
self.connect = connect self.connect = connect
@ -437,6 +450,7 @@ class mocap_data_show(matplotlib_show):
self.process_values() self.process_values()
self.initialize_axes_modify() self.initialize_axes_modify()
self.draw_vertices() self.draw_vertices()
self.initialize_axes()
self.finalize_axes_modify() self.finalize_axes_modify()
self.draw_edges() self.draw_edges()
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()
@ -459,10 +473,10 @@ class mocap_data_show(matplotlib_show):
self.axes.set_xlim(self.x_lim) self.axes.set_xlim(self.x_lim)
self.axes.set_ylim(self.y_lim) self.axes.set_ylim(self.y_lim)
self.axes.set_zlim(self.z_lim) self.axes.set_zlim(self.z_lim)
self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1.5, 1.5]) self.axes.auto_scale_xyz([-1., 1.], [-1., 1.], [-1., 1.])
#self.axes.set_aspect('equal') # self.axes.set_aspect('equal')
self.axes.autoscale(enable=False) # self.axes.autoscale(enable=False)
def finalize_axes_modify(self): def finalize_axes_modify(self):
self.axes.set_xlim(self.x_lim) self.axes.set_xlim(self.x_lim)

View file

@ -16,6 +16,7 @@ import diag
import initialization import initialization
import multioutput import multioutput
import linalg_gpu import linalg_gpu
import mpi
try: try:
import sympy import sympy

View file

@ -5,12 +5,44 @@ Global variables: initSuccess
providing CUBLAS handle: cublas_handle providing CUBLAS handle: cublas_handle
""" """
gpu_initialized = False
gpu_device = None
gpu_context = None
MPI_enabled = False
try:
from mpi4py import MPI
MPI_enabled = True
except:
pass
try:
if MPI_enabled and MPI.COMM_WORLD.size>1:
from .parallel import get_id_within_node
gpuid = get_id_within_node()
import pycuda.driver
pycuda.driver.init()
if gpuid>=pycuda.driver.Device.count():
print '['+MPI.Get_processor_name()+'] more processes than the GPU numbers!'
#MPI.COMM_WORLD.Abort()
raise
gpu_device = pycuda.driver.Device(gpuid)
gpu_context = gpu_device.make_context()
gpu_initialized = True
else:
import pycuda.autoinit
gpu_initialized = True
except:
pass
try: try:
import pycuda.autoinit
from scikits.cuda import cublas from scikits.cuda import cublas
import scikits.cuda.linalg as culinalg import scikits.cuda.linalg as culinalg
culinalg.init() culinalg.init()
cublas_handle = cublas.cublasCreate() cublas_handle = cublas.cublasCreate()
initSuccess = True
except: except:
initSuccess = False pass
def closeGPU():
if gpu_context is not None:
gpu_context.detach()

View file

@ -19,6 +19,9 @@ try:
strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step") strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step")
# np.trace(np.dot(A,B)) (also equivalent to (A*B.T).sum() ) A - a1 x a2, B - a2 x a1
traceDot = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="A[i]*B[(i%a1)*a2+i/a1]", arguments="double *A, double *B, int a1, int a2")
#======================================================================================= #=======================================================================================
# Element-wise functions # Element-wise functions
#======================================================================================= #=======================================================================================
@ -57,3 +60,34 @@ try:
except: except:
pass pass
try:
import scikits.cuda.linalg as culinalg
from scikits.cuda import cublas
from scikits.cuda.cula import culaExceptions
except:
pass
def jitchol(A, L, cublas_handle, maxtries=5):
try:
cublas.cublasDcopy(cublas_handle, A.size, A.gpudata, 1, L.gpudata, 1)
culinalg.cho_factor(L,'L')
except culaExceptions:
diagA = np.diag(A)
if np.any(diagA <= 0.):
raise linalg.LinAlgError, "not pd: non-positive diagonal elements"
jitter = diagA.mean() * 1e-6
while maxtries > 0 and np.isfinite(jitter):
print 'Warning: adding jitter of {:.10e}'.format(jitter)
try:
return linalg.cholesky(A + np.eye(A.shape[0]).T * jitter, lower=True)
except:
jitter *= 10
finally:
maxtries -= 1
raise linalg.LinAlgError, "not positive definite, even with jitter."

35
GPy/util/mpi.py Normal file
View file

@ -0,0 +1,35 @@
"""
The tools for mpi
"""
try:
import numpy as np
from mpi4py import MPI
numpy_to_MPI_typemap = {
np.dtype(np.float64) : MPI.DOUBLE,
np.dtype(np.float32) : MPI.FLOAT,
np.dtype(np.int) : MPI.INT,
np.dtype(np.int8) : MPI.CHAR,
np.dtype(np.uint8) : MPI.UNSIGNED_CHAR,
np.dtype(np.int32) : MPI.INT,
np.dtype(np.uint32) : MPI.UNSIGNED_INT,
}
except:
pass
def divide_data(datanum, comm):
residue = (datanum)%comm.size
datanum_list = np.empty((comm.size),dtype=np.int32)
for i in xrange(comm.size):
if i<residue:
datanum_list[i] = int(datanum/comm.size)+1
else:
datanum_list[i] = int(datanum/comm.size)
if comm.rank<residue:
size = datanum/comm.size+1
offset = size*comm.rank
else:
size = datanum/comm.size
offset = size*comm.rank+residue
return offset, offset+size, datanum_list

14
GPy/util/parallel.py Normal file
View file

@ -0,0 +1,14 @@
"""
The module of tools for parallelization (MPI)
"""
try:
from mpi4py import MPI
except:
pass
def get_id_within_node(comm=MPI.COMM_WORLD):
rank = comm.rank
nodename = MPI.Get_processor_name()
nodelist = comm.allgather(nodename)
return len([i for i in nodelist[:rank] if i==nodename])