Copy and paste observable_array from repository to try and resolve bizzare merge request.

This commit is contained in:
Neil Lawrence 2014-04-14 10:00:07 +01:00
commit c2d3c82944
30 changed files with 1646 additions and 184 deletions

View file

@ -4,6 +4,7 @@
from model import * from model import *
from parameterization.parameterized import adjust_name_for_printing, Parameterizable from parameterization.parameterized import adjust_name_for_printing, Parameterizable
from parameterization.param import Param, ParamConcatenation from parameterization.param import Param, ParamConcatenation
from parameterization.observable_array import ObsAr
from gp import GP from gp import GP
from sparse_gp import SparseGP from sparse_gp import SparseGP

View file

@ -5,6 +5,7 @@ Created on 27 Feb 2014
''' '''
from collections import defaultdict from collections import defaultdict
import weakref
def intarray_default_factory(): def intarray_default_factory():
import numpy as np import numpy as np
@ -41,60 +42,66 @@ class ObservablesList(object):
def __init__(self): def __init__(self):
self._poc = [] self._poc = []
def remove(self, value):
return self._poc.remove(value)
def __delitem__(self, ind):
return self._poc.__delitem__(ind)
def __setitem__(self, ind, item):
return self._poc.__setitem__(ind, item)
def __getitem__(self, ind): def __getitem__(self, ind):
return self._poc.__getitem__(ind) p,o,c = self._poc[ind]
return p, o(), c
def remove(self, priority, observable, callble):
"""
"""
self.flush()
for i in range(len(self) - 1, -1, -1):
p,o,c = self[i]
if priority==p and observable==o and callble==c:
del self._poc[i]
def __repr__(self): def __repr__(self):
return self._poc.__repr__() return self._poc.__repr__()
def add(self, priority, observable, callble):
def append(self, obj): ins = 0
return self._poc.append(obj) for pr, _, _ in self:
if priority > pr:
break
def index(self, value): ins += 1
return self._poc.index(value) self._poc.insert(ins, (priority, weakref.ref(observable), callble))
def extend(self, iterable):
return self._poc.extend(iterable)
def __str__(self): def __str__(self):
return self._poc.__str__() ret = []
curr_p = None
for p, o, c in self:
curr = ''
if curr_p != p:
pre = "{!s}: ".format(p)
curr_pre = pre
else: curr_pre = " "*len(pre)
curr_p = p
curr += curr_pre
ret.append(curr + ", ".join(map(repr, [o,c])))
return '\n'.join(ret)
def flush(self):
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
def __iter__(self): def __iter__(self):
return self._poc.__iter__() self.flush()
for p, o, c in self._poc:
if o() is not None:
def insert(self, index, obj): yield p, o(), c
return self._poc.insert(index, obj)
def __len__(self): def __len__(self):
self.flush()
return self._poc.__len__() return self._poc.__len__()
def __deepcopy__(self, memo): def __deepcopy__(self, memo):
self.flush()
s = ObservablesList() s = ObservablesList()
import copy import copy
s._poc = copy.deepcopy(self._poc, memo) s._poc = copy.deepcopy(self._poc, memo)
return s return s
def __getstate__(self): def __getstate__(self):
self.flush()
from ...util.caching import Cacher from ...util.caching import Cacher
obs = [] obs = []
for p, o, c in self: for p, o, c in self:
@ -106,6 +113,6 @@ class ObservablesList(object):
def __setstate__(self, state): def __setstate__(self, state):
self._poc = [] self._poc = []
for p, o, c in state: for p, o, c in state:
self._poc.append((p,o,getattr(o, c))) self.add(p,o,getattr(o, c))
pass pass

View file

@ -0,0 +1,137 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
__updated__ = '2014-03-31'
import numpy as np
from parameter_core import Observable, Pickleable
class ObsAr(np.ndarray, Pickleable, Observable):
"""
An ndarray which reports changes to its observers.
The observers can add themselves with a callable, which
will be called every time this array changes. The callable
takes exactly one argument, which is this array itself.
"""
__array_priority__ = -1 # Never give back ObsAr
def __new__(cls, input_array, *a, **kw):
if not isinstance(input_array, ObsAr):
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
else: obj = input_array
#cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
super(ObsAr, obj).__init__(*a, **kw)
return obj
def __array_finalize__(self, obj):
# see InfoArray.__array_finalize__ for comments
if obj is None: return
self.observers = getattr(obj, 'observers', None)
def __array_wrap__(self, out_arr, context=None):
return out_arr.view(np.ndarray)
def copy(self):
memo = {}
memo[id(self)] = self
return self.__deepcopy__(memo)
def __deepcopy__(self, memo):
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
memo[id(self)] = s
import copy
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
return s
def __reduce__(self):
func, args, state = super(ObsAr, self).__reduce__()
return func, args, (state, Pickleable.__getstate__(self))
def __setstate__(self, state):
np.ndarray.__setstate__(self, state[0])
Pickleable.__setstate__(self, state[1])
def __setitem__(self, s, val):
super(ObsAr, self).__setitem__(s, val)
self.notify_observers()
def __getslice__(self, start, stop):
return self.__getitem__(slice(start, stop))
def __setslice__(self, start, stop, val):
return self.__setitem__(slice(start, stop), val)
def __ilshift__(self, *args, **kwargs):
r = np.ndarray.__ilshift__(self, *args, **kwargs)
self.notify_observers()
return r
def __irshift__(self, *args, **kwargs):
r = np.ndarray.__irshift__(self, *args, **kwargs)
self.notify_observers()
return r
def __ixor__(self, *args, **kwargs):
r = np.ndarray.__ixor__(self, *args, **kwargs)
self.notify_observers()
return r
def __ipow__(self, *args, **kwargs):
r = np.ndarray.__ipow__(self, *args, **kwargs)
self.notify_observers()
return r
def __ifloordiv__(self, *args, **kwargs):
r = np.ndarray.__ifloordiv__(self, *args, **kwargs)
self.notify_observers()
return r
def __isub__(self, *args, **kwargs):
r = np.ndarray.__isub__(self, *args, **kwargs)
self.notify_observers()
return r
def __ior__(self, *args, **kwargs):
r = np.ndarray.__ior__(self, *args, **kwargs)
self.notify_observers()
return r
def __itruediv__(self, *args, **kwargs):
r = np.ndarray.__itruediv__(self, *args, **kwargs)
self.notify_observers()
return r
def __idiv__(self, *args, **kwargs):
r = np.ndarray.__idiv__(self, *args, **kwargs)
self.notify_observers()
return r
def __iand__(self, *args, **kwargs):
r = np.ndarray.__iand__(self, *args, **kwargs)
self.notify_observers()
return r
def __imod__(self, *args, **kwargs):
r = np.ndarray.__imod__(self, *args, **kwargs)
self.notify_observers()
return r
def __iadd__(self, *args, **kwargs):
r = np.ndarray.__iadd__(self, *args, **kwargs)
self.notify_observers()
return r
def __imul__(self, *args, **kwargs):
r = np.ndarray.__imul__(self, *args, **kwargs)
self.notify_observers()
return r

View file

@ -59,7 +59,7 @@ class Param(OptimizationHandlable, ObsAr):
import pydot import pydot
node = pydot.Node(id(self), shape='record', label=self.name) node = pydot.Node(id(self), shape='record', label=self.name)
G.add_node(node) G.add_node(node)
for o in self._observer_callables_.keys(): for o in self.observers.keys():
label = o.name if hasattr(o, 'name') else str(o) label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label) observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node) G.add_node(observed_node)
@ -324,7 +324,7 @@ class ParamConcatenation(object):
if update: if update:
self.update_all_params() self.update_all_params()
def values(self): def values(self):
return numpy.hstack([p.param_array for p in self.params]) return numpy.hstack([p.param_array.flat for p in self.params])
#=========================================================================== #===========================================================================
# parameter operations: # parameter operations:
#=========================================================================== #===========================================================================

View file

@ -44,22 +44,23 @@ class Observable(object):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super(Observable, self).__init__() super(Observable, self).__init__()
from lists_and_dicts import ObservablesList from lists_and_dicts import ObservablesList
self._observer_callables_ = ObservablesList() self.observers = ObservablesList()
def add_observer(self, observer, callble, priority=0): def add_observer(self, observer, callble, priority=0):
self._insert_sorted(priority, observer, callble) self.observers.add(priority, observer, callble)
def remove_observer(self, observer, callble=None): def remove_observer(self, observer, callble=None):
to_remove = [] to_remove = []
for p, obs, clble in self._observer_callables_: for poc in self.observers:
_, obs, clble = poc
if callble is not None: if callble is not None:
if (obs == observer) and (callble == clble): if (obs == observer) and (callble == clble):
to_remove.append((p, obs, clble)) to_remove.append(poc)
else: else:
if obs is observer: if obs is observer:
to_remove.append((p, obs, clble)) to_remove.append(poc)
for r in to_remove: for r in to_remove:
self._observer_callables_.remove(r) self.observers.remove(*r)
def notify_observers(self, which=None, min_priority=None): def notify_observers(self, which=None, min_priority=None):
""" """
@ -74,21 +75,13 @@ class Observable(object):
if which is None: if which is None:
which = self which = self
if min_priority is None: if min_priority is None:
[callble(self, which=which) for _, _, callble in self._observer_callables_] [callble(self, which=which) for _, _, callble in self.observers]
else: else:
for p, _, callble in self._observer_callables_: for p, _, callble in self.observers:
if p <= min_priority: if p <= min_priority:
break break
callble(self, which=which) callble(self, which=which)
def _insert_sorted(self, p, o, c):
ins = 0
for pr, _, _ in self._observer_callables_:
if p > pr:
break
ins += 1
self._observer_callables_.insert(ins, (p, o, c))
#=============================================================================== #===============================================================================
# Foundation framework for parameterized and param objects: # Foundation framework for parameterized and param objects:
#=============================================================================== #===============================================================================
@ -192,7 +185,7 @@ class Pickleable(object):
def __getstate__(self): def __getstate__(self):
ignore_list = ([#'_parent_', '_parent_index_', ignore_list = ([#'_parent_', '_parent_index_',
#'_observer_callables_', #'observers',
'_param_array_', '_gradient_array_', '_fixes_', '_param_array_', '_gradient_array_', '_fixes_',
'_Cacher_wrap__cachers'] '_Cacher_wrap__cachers']
#+ self.parameter_names(recursive=False) #+ self.parameter_names(recursive=False)

View file

@ -90,7 +90,7 @@ class Parameterized(Parameterizable):
child_node = child.build_pydot(G) child_node = child.build_pydot(G)
G.add_edge(pydot.Edge(node, child_node)) G.add_edge(pydot.Edge(node, child_node))
for o in self._observer_callables_.keys(): for o in self.observers.keys():
label = o.name if hasattr(o, 'name') else str(o) label = o.name if hasattr(o, 'name') else str(o)
observed_node = pydot.Node(id(o), label=label) observed_node = pydot.Node(id(o), label=label)
G.add_node(observed_node) G.add_node(observed_node)

View file

@ -40,6 +40,7 @@ class SpikeAndSlabPrior(VariationalPrior):
self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10)) self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
self.variance = Param('variance',variance) self.variance = Param('variance',variance)
self.add_parameters(self.pi) self.add_parameters(self.pi)
self.group_spike_prob = False
def KL_divergence(self, variational_posterior): def KL_divergence(self, variational_posterior):
mu = variational_posterior.mean mu = variational_posterior.mean
@ -55,7 +56,11 @@ class SpikeAndSlabPrior(VariationalPrior):
S = variational_posterior.variance S = variational_posterior.variance
gamma = variational_posterior.binary_prob gamma = variational_posterior.binary_prob
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. if self.group_spike_prob:
gamma_grad = np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
gamma.gradient -= gamma_grad.mean(axis=0)
else:
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
mu.gradient -= gamma*mu mu.gradient -= gamma*mu
S.gradient -= (1. - (1. / (S))) * gamma /2. S.gradient -= (1. - (1. / (S))) * gamma /2.
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0) self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)

View file

@ -409,12 +409,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
# optimize # optimize
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
if optimize: m.optimize(messages=verbose, max_f_eval=10000) if optimize: m.optimize(messages=verbose, max_f_eval=10000)
if plot and GPy.plotting.matplot_dep.visualize.visual_available: if plot:
plt.clf plt.clf
ax = m.plot_latent() ax = m.plot_latent()
y = m.likelihood.Y[0, :] y = m.Y[0, :]
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, latent_axes=ax)
raw_input('Press enter to finish') raw_input('Press enter to finish')
return m return m

View file

@ -32,6 +32,7 @@ from expectation_propagation import EP
from dtc import DTC from dtc import DTC
from fitc import FITC from fitc import FITC
from var_dtc_parallel import VarDTC_minibatch from var_dtc_parallel import VarDTC_minibatch
from var_dtc_gpu import VarDTC_GPU
# class FullLatentFunctionData(object): # class FullLatentFunctionData(object):
# #

View file

@ -32,7 +32,7 @@ class ExactGaussianInference(object):
return Y return Y
else: else:
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!" #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
return Y return Y
def inference(self, kern, X, likelihood, Y, Y_metadata=None): def inference(self, kern, X, likelihood, Y, Y_metadata=None):

View file

@ -0,0 +1,545 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
from posterior import Posterior
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
from ...util import diag
from ...core.parameterization.variational import VariationalPosterior
import numpy as np
from ...util.misc import param_to_array
log_2_pi = np.log(2*np.pi)
from ...util import gpu_init
try:
import scikits.cuda.linalg as culinalg
import pycuda.gpuarray as gpuarray
from scikits.cuda import cublas
from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod
except:
pass
class VarDTC_GPU(object):
"""
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
The function self.inference returns a Posterior object, which summarizes
the posterior.
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
"""
const_jitter = np.float64(1e-6)
def __init__(self, batchsize=None, gpu_memory=4., limit=1):
self.batchsize = batchsize
self.gpu_memory = gpu_memory
self.midRes = {}
self.batch_pos = 0 # the starting position of the current mini-batch
self.cublas_handle = gpu_init.cublas_handle
# Initialize GPU caches
self.gpuCache = None
def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y):
ndata = Y.shape[0]
if self.batchsize==None:
self.batchsize = self._estimateBatchSize(kern, ndata, num_inducing, input_dim, output_dim)
if self.gpuCache == None:
self.gpuCache = {# inference_likelihood
'Kmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'Lm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'ones_gpu' :gpuarray.empty(num_inducing, np.float64,order='F'),
'LL_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'b_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
'v_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
'vvt_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'KmmInvPsi2LLInvT_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'KmmInvPsi2P_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'dL_dpsi2R_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'dL_dKmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'psi1Y_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'),
'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN
'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN
'psi2_t_gpu' :gpuarray.empty((num_inducing*num_inducing*self.batchsize),np.float64,order='F'),
# inference_minibatch
'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'dL_dpsi2_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'),
'dL_dthetaL_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'betapsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'thetaL_t_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'betaYT2_gpu' :gpuarray.empty((output_dim,self.batchsize),np.float64,order='F'),
'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
'psi2p_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'),
}
self.gpuCache['ones_gpu'].fill(1.0)
YT_gpu = self.gpuCache['YT_gpu']
self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
def _estimateMemoryOccupation(self, N, M, D):
"""
Estimate the best batch size.
N - the number of total datapoints
M - the number of inducing points
D - the number of observed (output) dimensions
return: the constant memory size, the memory occupation of batchsize=1
unit: GB
"""
return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024.
def _estimateBatchSize(self, kern, N, M, Q, D):
"""
Estimate the best batch size.
N - the number of total datapoints
M - the number of inducing points
D - the number of observed (output) dimensions
return: the constant memory size, the memory occupation of batchsize=1
unit: GB
"""
if kern.useGPU:
x0,x1 = kern.psicomp.estimateMemoryOccupation(N,M,Q)
else:
x0, x1 = 0.,0.
y0, y1 = self._estimateMemoryOccupation(N, M, D)
opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N)
return opt_batchsize
def _get_YYTfactor(self, Y):
"""
find a matrix L which satisfies LLT = YYT.
Note that L may have fewer columns than Y.
"""
N, D = Y.shape
if (N>=D):
return param_to_array(Y)
else:
return jitchol(tdot(Y))
def inference_likelihood(self, kern, X, Z, likelihood, Y):
"""
The first phase of inference:
Compute: log-likelihood, dL_dKmm
Cached intermediate results: Kmm, KmmInv,
"""
num_inducing, input_dim = Z.shape[0], Z.shape[1]
num_data, output_dim = Y.shape
self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
else:
uncertain_inputs = False
#see whether we've got a different noise variance for each datum
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
trYYT = self._trYYT
psi1Y_gpu = self.gpuCache['psi1Y_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
beta_gpu = self.gpuCache['beta_gpu']
YT_gpu = self.gpuCache['YT_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
psi2_t_gpu = self.gpuCache['psi2_t_gpu']
if het_noise:
beta_gpu.set(np.asfortranarray(beta))
mul_bcast(betaYT_gpu,beta_gpu,YT_gpu,beta_gpu.size)
YRY_full = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, betaYT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
else:
beta_gpu.fill(beta)
betaYT_gpu.fill(0.)
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
YRY_full = trYYT*beta
if kern.useGPU:
psi1Y_gpu.fill(0.)
psi2_gpu.fill(0.)
psi0_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
ndata = n_end - n_start
X_slice = X[n_start:n_end]
beta_gpu_slice = beta_gpu[n_start:n_end]
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
if ndata==self.batchsize:
psi2_t_gpu_slice = psi2_t_gpu
else:
psi2_t_gpu_slice = psi2_t_gpu[:num_inducing*num_inducing*ndata]
if uncertain_inputs:
psi0p_gpu = kern.psi0(Z, X_slice)
psi1p_gpu = kern.psi1(Z, X_slice)
psi2p_gpu = kern.psi2(Z, X_slice)
else:
psi0p_gpu = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
if het_noise:
psi0_full += cublas.cublasDdot(self.cublas_handle, psi0p_gpu.size, beta_gpu_slice.gpudata, 1, psi0p_gpu.gpudata, 1)
else:
psi0_full += gpuarray.sum(psi0p_gpu).get()
if uncertain_inputs:
if het_noise:
mul_bcast(psi2_t_gpu_slice,beta_gpu_slice,psi2p_gpu,beta_gpu_slice.size)
sum_axis(psi2_gpu,psi2_t_gpu_slice,1,ndata)
else:
sum_axis(psi2_gpu,psi2p_gpu,1,ndata)
else:
if het_noise:
psi1_t_gpu = psi2_t_gpu_slice[:,num_inducing*ndata]
mul_bcast(psi1_t_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, 1.0, psi1p_gpu.gpudata, ndata, psi1_t_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
else:
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
if not het_noise:
psi0_full *= beta
if uncertain_inputs:
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
else:
psi2_full = np.zeros((num_inducing,num_inducing),order='F')
psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD
psi0_full = 0
# YRY_full = 0
for n_start in xrange(0,num_data,self.batchsize):
n_end = min(self.batchsize+n_start, num_data)
Y_slice = Y[n_start:n_end]
X_slice = X[n_start:n_end]
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
if het_noise:
beta_slice = beta[n_start:n_end]
psi0_full += (beta_slice*psi0).sum()
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
# YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
else:
psi0_full += psi0.sum()
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
if uncertain_inputs:
if het_noise:
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
else:
psi2_full += psi2.sum(axis=0)
else:
if het_noise:
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
else:
psi2_full += tdot(psi1.T)
if not het_noise:
psi0_full *= beta
psi1Y_full *= beta
psi2_full *= beta
# YRY_full = trYYT*beta
psi1Y_gpu.set(psi1Y_full)
psi2_gpu.set(psi2_full)
#======================================================================
# Compute Common Components
#======================================================================
Kmm = kern.K(Z).copy()
Kmm_gpu = self.gpuCache['Kmm_gpu']
Kmm_gpu.set(np.asfortranarray(Kmm))
diag.add(Kmm, self.const_jitter)
ones_gpu = self.gpuCache['ones_gpu']
cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1)
# assert np.allclose(Kmm, Kmm_gpu.get())
# Lm = jitchol(Kmm)
#
Lm_gpu = self.gpuCache['Lm_gpu']
cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1)
culinalg.cho_factor(Lm_gpu,'L')
# print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max()
# Lambda = Kmm+psi2_full
# LL = jitchol(Lambda)
#
Lambda_gpu = self.gpuCache['LL_gpu']
cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lambda_gpu.gpudata, 1)
cublas.cublasDaxpy(self.cublas_handle, psi2_gpu.size, np.float64(1.0), psi2_gpu.gpudata, 1, Lambda_gpu.gpudata, 1)
LL_gpu = Lambda_gpu
culinalg.cho_factor(LL_gpu,'L')
# print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max()
# b,_ = dtrtrs(LL, psi1Y_full)
# bbt_cpu = np.square(b).sum()
#
b_gpu = self.gpuCache['b_gpu']
cublas.cublasDcopy(self.cublas_handle, b_gpu.size, psi1Y_gpu.gpudata, 1, b_gpu.gpudata, 1)
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing)
bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1)
# print np.abs(bbt-bbt_cpu)
# v,_ = dtrtrs(LL.T,b,lower=False)
# vvt = np.einsum('md,od->mo',v,v)
# LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
#
v_gpu = self.gpuCache['v_gpu']
cublas.cublasDcopy(self.cublas_handle, v_gpu.size, b_gpu.gpudata, 1, v_gpu.gpudata, 1)
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing)
vvt_gpu = self.gpuCache['vvt_gpu']
cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, output_dim, np.float64(1.0), v_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing, np.float64(0.), vvt_gpu.gpudata, num_inducing)
LmInvPsi2LmInvT_gpu = self.gpuCache['KmmInvPsi2LLInvT_gpu']
cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, LmInvPsi2LmInvT_gpu.gpudata, 1)
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
#tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1)
tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get())
# print np.abs(vvt-vvt_gpu.get()).max()
# print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT)
# Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
# LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
# KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
# KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T
#
KmmInvPsi2LLInvT_gpu = LmInvPsi2LmInvT_gpu # Reuse GPU memory (size:MxM)
cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, KmmInvPsi2LLInvT_gpu.gpudata, 1)
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
KmmInvPsi2P_gpu = self.gpuCache['KmmInvPsi2P_gpu']
cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1)
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing)
# print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max()
# dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
#
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
cublas.cublasDcopy(self.cublas_handle, vvt_gpu.size, vvt_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1)
# print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max()
#======================================================================
# Compute log-likelihood
#======================================================================
if het_noise:
logL_R = -np.log(beta).sum()
else:
logL_R = -num_data*np.log(beta)
# logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get())
logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get())
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda)
# print np.abs(logL_old - logL)
#======================================================================
# Compute dL_dKmm
#======================================================================
# dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
#
dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu']
cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, num_inducing, np.float64(1.0), KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, np.float64(0.), dL_dKmm_gpu.gpudata, num_inducing)
cublas.cublasDaxpy(self.cublas_handle, dL_dKmm_gpu.size, np.float64(1./output_dim), vvt_gpu.gpudata, 1, dL_dKmm_gpu.gpudata, 1)
cublas.cublasDscal(self.cublas_handle, dL_dKmm_gpu.size, np.float64(-output_dim/2.), dL_dKmm_gpu.gpudata, 1)
# print np.abs(dL_dKmm - dL_dKmm_gpu.get()).max()
#======================================================================
# Compute the Posterior distribution of inducing points p(u|Y)
#======================================================================
post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
return logL, dL_dKmm_gpu.get(), post
def inference_minibatch(self, kern, X, Z, likelihood, Y):
"""
The second phase of inference: Computing the derivatives over a minibatch of Y
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
return a flag showing whether it reached the end of Y (isEnd)
"""
num_data, output_dim = Y.shape
num_inducing = Z.shape[0]
if isinstance(X, VariationalPosterior):
uncertain_inputs = True
else:
uncertain_inputs = False
beta = 1./np.fmax(likelihood.variance, 1e-6)
het_noise = beta.size > 1
n_start = self.batch_pos
n_end = min(self.batchsize+n_start, num_data)
if n_end==num_data:
isEnd = True
self.batch_pos = 0
else:
isEnd = False
self.batch_pos = n_end
nSlice = n_end-n_start
X_slice = X[n_start:n_end]
if kern.useGPU:
if uncertain_inputs:
psi0p_gpu = kern.psi0(Z, X_slice)
psi1p_gpu = kern.psi1(Z, X_slice)
psi2p_gpu = kern.psi2(Z, X_slice)
else:
psi0p_gpu = kern.Kdiag(X_slice)
psi1p_gpu = kern.K(X_slice, Z)
psi2p_gpu = self.gpuCache['psi2p_gpu']
if psi2p_gpu.shape[0] > nSlice:
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
else:
if uncertain_inputs:
psi0 = kern.psi0(Z, X_slice)
psi1 = kern.psi1(Z, X_slice)
psi2 = kern.psi2(Z, X_slice)
else:
psi0 = kern.Kdiag(X_slice)
psi1 = kern.K(X_slice, Z)
psi0p_gpu = self.gpuCache['psi0p_gpu']
psi1p_gpu = self.gpuCache['psi1p_gpu']
psi2p_gpu = self.gpuCache['psi2p_gpu']
if psi0p_gpu.shape[0] > nSlice:
psi0p_gpu = psi0p_gpu[:nSlice]
psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
psi0p_gpu.set(np.asfortranarray(psi0))
psi1p_gpu.set(np.asfortranarray(psi1))
if uncertain_inputs:
psi2p_gpu.set(np.asfortranarray(psi2))
#======================================================================
# Prepare gpu memory
#======================================================================
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
v_gpu = self.gpuCache['v_gpu']
betaYT_gpu = self.gpuCache['betaYT_gpu']
beta_gpu = self.gpuCache['beta_gpu']
dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
dL_dthetaL_gpu = self.gpuCache['dL_dthetaL_gpu']
psi2R_gpu = self.gpuCache['psi2_t_gpu'][:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
betapsi1_gpu = self.gpuCache['betapsi1_gpu']
thetaL_t_gpu = self.gpuCache['thetaL_t_gpu']
betaYT2_gpu = self.gpuCache['betaYT2_gpu']
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
beta_gpu_slice = beta_gpu[n_start:n_end]
# Adjust to the batch size
if dL_dpsi0_gpu.shape[0] > nSlice:
betaYT2_gpu = betaYT2_gpu[:,:nSlice]
dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
dL_dpsi2_gpu = dL_dpsi2_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
dL_dthetaL_gpu = dL_dthetaL_gpu.ravel()[:nSlice]
psi2R_gpu = psi2R_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
thetaL_t_gpu = thetaL_t_gpu.ravel()[:nSlice]
betapsi1_gpu = betapsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
mul_bcast(betapsi1_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
#======================================================================
# Compute dL_dpsi
#======================================================================
dL_dpsi0_gpu.fill(0.)
cublas.cublasDaxpy(self.cublas_handle, dL_dpsi0_gpu.size, output_dim/(-2.), beta_gpu_slice.gpudata, 1, dL_dpsi0_gpu.gpudata, 1)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
if uncertain_inputs:
outer_prod(dL_dpsi2_gpu,beta_gpu_slice,dL_dpsi2R_gpu,beta_gpu_slice.size)
else:
cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, 1.0, betapsi1_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice)
#======================================================================
# Compute dL_dthetaL
#======================================================================
if not uncertain_inputs:
join_prod(psi2p_gpu,psi1p_gpu,psi1p_gpu,nSlice,num_inducing)
mul_bcast_first(psi2R_gpu,dL_dpsi2R_gpu,psi2p_gpu,nSlice)
dL_dthetaL_gpu.fill(0.)
cublas.cublasDcopy(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata, 1, betaYT2_gpu.gpudata, 1)
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT2_gpu,betaYT2_gpu.size)
cublas.cublasDscal(self.cublas_handle, betaYT2_gpu.size, 0.5, betaYT2_gpu.gpudata, 1)
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/(-2.0), beta_gpu_slice.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
cublas.cublasDcopy(self.cublas_handle, beta_gpu_slice.size, beta_gpu_slice.gpudata, 1, thetaL_t_gpu.gpudata, 1)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu.size)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,psi0p_gpu,thetaL_t_gpu.size)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/2.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
thetaL_t_gpu.fill(0.)
sum_axis(thetaL_t_gpu, psi2R_gpu, nSlice, num_inducing*num_inducing)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, -1.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', output_dim, nSlice, num_inducing, -1.0, v_gpu.gpudata, num_inducing, betapsi1_gpu.gpudata, nSlice, 0.0, betaYT2_gpu.gpudata, output_dim)
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT_gpu_slice,betaYT2_gpu.size)
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
if kern.useGPU:
dL_dpsi0 = dL_dpsi0_gpu
dL_dpsi1 = dL_dpsi1_gpu
else:
dL_dpsi0 = dL_dpsi0_gpu.get()
dL_dpsi1 = dL_dpsi1_gpu.get()
if uncertain_inputs:
if kern.useGPU:
dL_dpsi2 = dL_dpsi2_gpu
else:
dL_dpsi2 = dL_dpsi2_gpu.get()
if het_noise:
dL_dthetaL = dL_dthetaL_gpu.get()
else:
dL_dthetaL = gpuarray.sum(dL_dthetaL_gpu).get()
if uncertain_inputs:
grad_dict = {'dL_dpsi0':dL_dpsi0,
'dL_dpsi1':dL_dpsi1,
'dL_dpsi2':dL_dpsi2,
'dL_dthetaL':dL_dthetaL}
else:
grad_dict = {'dL_dKdiag':dL_dpsi0,
'dL_dKnm':dL_dpsi1,
'dL_dthetaL':dL_dthetaL}
return isEnd, (n_start,n_end), grad_dict

View file

@ -280,3 +280,54 @@ class VarDTC_minibatch(object):
return isEnd, (n_start,n_end), grad_dict return isEnd, (n_start,n_end), grad_dict
def update_gradients(model):
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
het_noise = model.likelihood.variance.size > 1
if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],))
else:
dL_dthetaL = 0
#gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
kern_grad = model.kern.gradient.copy()
#gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
isEnd = False
while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
if isinstance(model.X, VariationalPosterior):
X_slice = model.X[n_range[0]:n_range[1]]
#gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
kern_grad += model.kern.gradient
#gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
#gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
model.set_X_gradients(X_slice, X_grad)
if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else:
dL_dthetaL += grad_dict['dL_dthetaL']
# Set the gradients w.r.t. kernel
model.kern.gradient = kern_grad
# Update Log-likelihood
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.X)
# dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL)

View file

@ -13,9 +13,10 @@ class Kern(Parameterized):
#=========================================================================== #===========================================================================
# This adds input slice support. The rather ugly code for slicing can be # This adds input slice support. The rather ugly code for slicing can be
# found in kernel_slice_operations # found in kernel_slice_operations
__metaclass__ = KernCallsViaSlicerMeta #__metaclass__ = KernCallsViaSlicerMeta
#=========================================================================== #===========================================================================
def __init__(self, input_dim, active_dims, name, *a, **kw): _support_GPU=False
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
""" """
The base class for a kernel: a positive definite function The base class for a kernel: a positive definite function
which forms of a covariance function (kernel). which forms of a covariance function (kernel).
@ -40,6 +41,8 @@ class Kern(Parameterized):
assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims) assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
self._sliced_X = 0 self._sliced_X = 0
self.useGPU = self._support_GPU and useGPU
@Cache_this(limit=10) @Cache_this(limit=10)
def _slice_X(self, X): def _slice_X(self, X):
return X[:, self.active_dims] return X[:, self.active_dims]

View file

@ -0,0 +1,535 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
"""
The package for the psi statistics computation on GPU
"""
import numpy as np
from GPy.util.caching import Cache_this
from ....util import gpu_init
try:
import pycuda.gpuarray as gpuarray
from scikits.cuda import cublas
from pycuda.reduction import ReductionKernel
from pycuda.elementwise import ElementwiseKernel
from ....util import linalg_gpu
# The kernel form computing psi1 het_noise
comp_psi1 = ElementwiseKernel(
"double *psi1, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
"psi1[i] = comp_psi1_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
"comp_psi1",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_psi1_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
{
int n = idx%N;
int m = idx/N;
double psi1_exp=0;
for(int q=0;q<Q;q++){
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)] + muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
double exp2 = log1Gamma[IDX_NQ(n,q)] - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
psi1_exp += LOGEXPSUM(exp1,exp2);
}
return var*exp(psi1_exp);
}
""")
# The kernel form computing psi2 het_noise
comp_psi2 = ElementwiseKernel(
"double *psi2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
"psi2[i] = comp_psi2_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
"comp_psi2",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_psi2_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
{
// psi2 (n,m1,m2)
int m2 = idx/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double psi2_exp=0;
for(int q=0;q<Q;q++){
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
double exp2 = log1Gamma[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
psi2_exp += LOGEXPSUM(exp1,exp2);
}
return var*var*exp(psi2_exp);
}
""")
# compute psidenom
comp_logpsidenom = ElementwiseKernel(
"double *out, double *S, double *l, double scale, int N",
"out[i] = comp_logpsidenom_element(S, l, scale, N, i)",
"comp_logpsidenom",
preamble="""
__device__ double comp_logpsidenom_element(double *S, double *l, double scale, int N, int idx)
{
int q = idx/N;
return log(scale*S[idx]/l[q]+1.0);
}
""")
# The kernel form computing psi1 het_noise
comp_dpsi1_dvar = ElementwiseKernel(
"double *dpsi1_dvar, double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
"dpsi1_dvar[i] = comp_dpsi1_dvar_element(psi1_neq, psi1exp1, psi1exp2, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
"comp_dpsi1_dvar",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_dpsi1_dvar_element(double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
{
int n = idx%N;
int m = idx/N;
double psi1_sum = 0;
for(int q=0;q<Q;q++){
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
double exp1_e = -(muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)])/2.0 + exp1_e;
double exp2_e = - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
double psi1_q = LOGEXPSUM(exp1,exp2);
psi1_neq[IDX_NMQ(n,m,q)] = -psi1_q;
psi1exp1[IDX_NMQ(n,m,q)] = exp(exp1_e);
psi1exp2[IDX_MQ(m,q)] = exp(exp2_e);
psi1_sum += psi1_q;
}
for(int q=0;q<Q;q++) {
psi1_neq[IDX_NMQ(n,m,q)] = exp(psi1_neq[IDX_NMQ(n,m,q)]+psi1_sum);
}
return exp(psi1_sum);
}
""")
# The kernel form computing psi1 het_noise
comp_psi1_der = ElementwiseKernel(
"double *dpsi1_dl, double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
"dpsi1_dl[i] = comp_psi1_der_element(dpsi1_dmu, dpsi1_dS, dpsi1_dgamma, dpsi1_dZ, psi1_neq, psi1exp1, psi1exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
"comp_psi1_der",
preamble="""
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
__device__ double comp_psi1_der_element(double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
{
int q = idx/(M*N);
int m = (idx%(M*N))/N;
int n = idx%N;
double neq = psi1_neq[IDX_NMQ(n,m,q)];
double gamma_c = gamma[IDX_NQ(n,q)];
double Z_c = Z[IDX_MQ(m,q)];
double S_c = S[IDX_NQ(n,q)];
double l_c = l[q];
double l_sqrt_c = sqrt(l[q]);
double psi1exp1_c = psi1exp1[IDX_NMQ(n,m,q)];
double psi1exp2_c = psi1exp2[IDX_MQ(m,q)];
double denom = S_c/l_c+1.0;
double denom_sqrt = sqrt(denom);
double Zmu = Z_c-mu[IDX_NQ(n,q)];
double psi1_common = gamma_c/(denom_sqrt*denom*l_c);
double gamma1 = 1-gamma_c;
dpsi1_dgamma[IDX_NMQ(n,m,q)] = var*neq*(psi1exp1_c/denom_sqrt - psi1exp2_c);
dpsi1_dmu[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*Zmu*psi1exp1_c);
dpsi1_dS[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*(Zmu*Zmu/(S_c+l_c)-1.0)*psi1exp1_c)/2.0;
dpsi1_dZ[IDX_NMQ(n,m,q)] = var*neq*(-psi1_common*Zmu*psi1exp1_c-gamma1*Z_c/l_c*psi1exp2_c);
return var*neq*(psi1_common*(S_c/l_c+Zmu*Zmu/(S_c+l_c))*psi1exp1_c+gamma1*Z_c*Z_c/l_c*psi1exp2_c)*l_sqrt_c;
}
""")
# The kernel form computing psi1 het_noise
comp_dpsi2_dvar = ElementwiseKernel(
"double *dpsi2_dvar, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
"dpsi2_dvar[i] = comp_dpsi2_dvar_element(psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
"comp_dpsi2_dvar",
preamble="""
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
__device__ double comp_dpsi2_dvar_element(double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
{
// psi2 (n,m1,m2)
int m2 = idx/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double psi2_sum=0;
for(int q=0;q<Q;q++){
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
double exp1_e = - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 +exp1_e;
double exp2_e = - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
double psi2_q = LOGEXPSUM(exp1,exp2);
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = -psi2_q;
psi2exp1[IDX_NMMQ(n,m1,m2,q)] = exp(exp1_e);
psi2exp2[IDX_MMQ(m1,m2,q)] = exp(exp2_e);
psi2_sum += psi2_q;
}
for(int q=0;q<Q;q++) {
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = exp(psi2_neq[IDX_NMMQ(n,m1,m2,q)]+psi2_sum);
}
return 2*var*exp(psi2_sum);
}
""")
# The kernel form computing psi1 het_noise
comp_psi2_der = ElementwiseKernel(
"double *dpsi2_dl, double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
"dpsi2_dl[i] = comp_psi2_der_element(dpsi2_dmu, dpsi2_dS, dpsi2_dgamma, dpsi2_dZ, psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
"comp_psi2_der",
preamble="""
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
#define IDX_NQ(n,q) (q*N+n)
#define IDX_MQ(m,q) (q*M+m)
__device__ double comp_psi2_der_element(double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
{
// dpsi2 (n,m1,m2,q)
int q = idx/(M*M*N);
int m2 = (idx%(M*M*N))/(M*N);
int m1 = (idx%(M*N))/N;
int n = idx%N;
double neq = psi2_neq[IDX_NMMQ(n,m1,m2,q)];
double gamma_c = gamma[IDX_NQ(n,q)];
double Z1_c = Z[IDX_MQ(m1,q)];
double Z2_c = Z[IDX_MQ(m2,q)];
double S_c = S[IDX_NQ(n,q)];
double l_c = l[q];
double l_sqrt_c = sqrt(l[q]);
double psi2exp1_c = psi2exp1[IDX_NMMQ(n,m1,m2,q)];
double psi2exp2_c = psi2exp2[IDX_MMQ(m1,m2,q)];
double dZ = Z2_c - Z1_c;
double muZ = mu[IDX_NQ(n,q)] - (Z1_c+Z2_c)/2.0;
double Z2 = Z1_c*Z1_c+Z2_c*Z2_c;
double denom = 2.0*S_c/l_c+1.0;
double denom_sqrt = sqrt(denom);
double psi2_common = gamma_c/(denom_sqrt*denom*l_c);
double gamma1 = 1-gamma_c;
double var2 = var*var;
dpsi2_dgamma[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2exp1_c/denom_sqrt - psi2exp2_c);
dpsi2_dmu[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(-2.0*psi2_common*muZ*psi2exp1_c);
dpsi2_dS[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(2.0*muZ*muZ/(2.0*S_c+l_c)-1.0)*psi2exp1_c);
dpsi2_dZ[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(dZ*denom/-2.0+muZ)*psi2exp1_c-gamma1*Z2_c/l_c*psi2exp2_c)*2.0;
return var2*neq*(psi2_common*(S_c/l_c+dZ*dZ*denom/(4.0*l_c)+muZ*muZ/(2.0*S_c+l_c))*psi2exp1_c+gamma1*Z2/(2.0*l_c)*psi2exp2_c)*l_sqrt_c*2.0;
}
""")
except:
pass
class PSICOMP_SSRBF(object):
def __init__(self):
assert gpu_init.initSuccess, "GPU initialization failed!"
self.cublas_handle = gpu_init.cublas_handle
self.gpuCache = None
self.gpuCacheAll = None
def _initGPUCache(self, N, M, Q):
if self.gpuCache!=None and self.gpuCache['mu_gpu'].shape[0] == N:
return
if self.gpuCacheAll!=None and self.gpuCacheAll['mu_gpu'].shape[0]<N: # Too small cache -> reallocate
self._releaseMemory()
if self.gpuCacheAll == None:
self.gpuCacheAll = {
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'logGamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'log1Gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'logpsi1denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'logpsi2denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'),
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
'psi2_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
# derivatives psi1
'psi1_neq_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'psi1exp1_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'psi1exp2_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'dpsi1_dvar_gpu' :gpuarray.empty((N,M),np.float64, order='F'),
'dpsi1_dl_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'dpsi1_dZ_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'dpsi1_dgamma_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'dpsi1_dmu_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
'dpsi1_dS_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
# derivatives psi2
'psi2_neq_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
'psi2exp1_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
'psi2exp2_gpu' :gpuarray.empty((M,M,Q),np.float64, order='F'),
'dpsi2_dvar_gpu' :gpuarray.empty((N,M,M),np.float64, order='F'),
'dpsi2_dl_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
'dpsi2_dZ_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
'dpsi2_dgamma_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
'dpsi2_dmu_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
'dpsi2_dS_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
# gradients
'grad_l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
'grad_Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
'grad_mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'grad_S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
'grad_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
}
self.gpuCache = self.gpuCacheAll
elif self.gpuCacheAll['mu_gpu'].shape[0]==N:
self.gpuCache = self.gpuCacheAll
else:
# remap to a smaller cache
self.gpuCache = self.gpuCacheAll.copy()
Nlist=['mu_gpu','S_gpu','gamma_gpu','logGamma_gpu','log1Gamma_gpu','logpsi1denom_gpu','logpsi2denom_gpu','psi0_gpu','psi1_gpu','psi2_gpu',
'psi1_neq_gpu','psi1exp1_gpu','psi1exp2_gpu','dpsi1_dvar_gpu','dpsi1_dl_gpu','dpsi1_dZ_gpu','dpsi1_dgamma_gpu','dpsi1_dmu_gpu',
'dpsi1_dS_gpu','psi2_neq_gpu','psi2exp1_gpu','dpsi2_dvar_gpu','dpsi2_dl_gpu','dpsi2_dZ_gpu','dpsi2_dgamma_gpu','dpsi2_dmu_gpu','dpsi2_dS_gpu','grad_mu_gpu','grad_S_gpu','grad_gamma_gpu',]
oldN = self.gpuCacheAll['mu_gpu'].shape[0]
for v in Nlist:
u = self.gpuCacheAll[v]
self.gpuCache[v] = u.ravel()[:u.size/oldN*N].reshape(*((N,)+u.shape[1:]))
def _releaseMemory(self):
if self.gpuCacheAll!=None:
[v.gpudata.free() for v in self.gpuCacheAll.values()]
self.gpuCacheAll = None
self.gpuCache = None
def estimateMemoryOccupation(self, N, M, Q):
"""
Estimate the best batch size.
N - the number of total datapoints
M - the number of inducing points
Q - the number of hidden (input) dimensions
return: the constant memory size, the memory occupation of batchsize=1
unit: GB
"""
return (2.*Q+2.*M*Q+M*M*Q)*8./1024./1024./1024., (1.+2.*M+10.*Q+2.*M*M+8.*M*Q+7.*M*M*Q)*8./1024./1024./1024.
@Cache_this(limit=1,ignore_args=(0,))
def psicomputations(self, variance, lengthscale, Z, mu, S, gamma):
"""Compute Psi statitsitcs"""
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1:
ARD = True
else:
ARD = False
N = mu.shape[0]
M = Z.shape[0]
Q = mu.shape[1]
self._initGPUCache(N,M,Q)
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
gamma_gpu = self.gpuCache['gamma_gpu']
logGamma_gpu = self.gpuCache['logGamma_gpu']
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu']
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu']
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu']
psi0_gpu = self.gpuCache['psi0_gpu']
psi1_gpu = self.gpuCache['psi1_gpu']
psi2_gpu = self.gpuCache['psi2_gpu']
if ARD:
l_gpu.set(np.asfortranarray(lengthscale**2))
else:
l_gpu.fill(lengthscale*lengthscale)
Z_gpu.set(np.asfortranarray(Z))
mu_gpu.set(np.asfortranarray(mu))
S_gpu.set(np.asfortranarray(S))
gamma_gpu.set(np.asfortranarray(gamma))
linalg_gpu.log(gamma_gpu,logGamma_gpu)
linalg_gpu.logOne(gamma_gpu,log1Gamma_gpu)
comp_logpsidenom(logpsi1denom_gpu, S_gpu,l_gpu,1.0,N)
comp_logpsidenom(logpsi2denom_gpu, S_gpu,l_gpu,2.0,N)
psi0_gpu.fill(variance)
comp_psi1(psi1_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
comp_psi2(psi2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
return psi0_gpu, psi1_gpu, psi2_gpu
@Cache_this(limit=1,ignore_args=(0,))
def _psiDercomputations(self, variance, lengthscale, Z, mu, S, gamma):
"""Compute the derivatives w.r.t. Psi statistics"""
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
self._initGPUCache(N,M,Q)
l_gpu = self.gpuCache['l_gpu']
Z_gpu = self.gpuCache['Z_gpu']
mu_gpu = self.gpuCache['mu_gpu']
S_gpu = self.gpuCache['S_gpu']
gamma_gpu = self.gpuCache['gamma_gpu']
logGamma_gpu = self.gpuCache['logGamma_gpu']
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu']
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu']
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu']
psi1_neq_gpu = self.gpuCache['psi1_neq_gpu']
psi1exp1_gpu = self.gpuCache['psi1exp1_gpu']
psi1exp2_gpu = self.gpuCache['psi1exp2_gpu']
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
psi2_neq_gpu = self.gpuCache['psi2_neq_gpu']
psi2exp1_gpu = self.gpuCache['psi2exp1_gpu']
psi2exp2_gpu = self.gpuCache['psi2exp2_gpu']
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
#==========================================================================================================
# Assuming the l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, logGamma_gpu, log1Gamma_gpu,
# logpsi1denom_gpu, logpsi2denom_gpu has been synchonized.
#==========================================================================================================
# psi1 derivatives
comp_dpsi1_dvar(dpsi1_dvar_gpu, psi1_neq_gpu, psi1exp1_gpu,psi1exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
comp_psi1_der(dpsi1_dl_gpu,dpsi1_dmu_gpu,dpsi1_dS_gpu,dpsi1_dgamma_gpu, dpsi1_dZ_gpu, psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
# psi2 derivatives
comp_dpsi2_dvar(dpsi2_dvar_gpu, psi2_neq_gpu, psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
comp_psi2_der(dpsi2_dl_gpu,dpsi2_dmu_gpu,dpsi2_dS_gpu,dpsi2_dgamma_gpu, dpsi2_dZ_gpu, psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1:
ARD = True
else:
ARD = False
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
grad_l_gpu = self.gpuCache['grad_l_gpu']
# variance
variance.gradient = gpuarray.sum(dL_dpsi0).get() \
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi1.size, dL_dpsi1.gpudata, 1, dpsi1_dvar_gpu.gpudata, 1) \
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi2.size, dL_dpsi2.gpudata, 1, dpsi2_dvar_gpu.gpudata, 1)
# lengscale
if ARD:
grad_l_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_l_gpu, psi1_comb_gpu, 1, N*M)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_l_gpu, psi2_comb_gpu, 1, N*M*M)
lengthscale.gradient = grad_l_gpu.get()
else:
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
lengthscale.gradient = gpuarray.sum(psi1_comb_gpu).get() + gpuarray.sum(psi2_comb_gpu).get()
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
grad_Z_gpu = self.gpuCache['grad_Z_gpu']
grad_Z_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dZ_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_Z_gpu, psi1_comb_gpu, 1, N)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dZ_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_Z_gpu, psi2_comb_gpu, 1, N*M)
return grad_Z_gpu.get()
def gradients_qX_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
mu = variational_posterior.mean
S = variational_posterior.variance
gamma = variational_posterior.binary_prob
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
grad_S_gpu = self.gpuCache['grad_S_gpu']
grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
# mu gradients
grad_mu_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dmu_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_mu_gpu, psi1_comb_gpu, N, M)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dmu_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_mu_gpu, psi2_comb_gpu, N, M*M)
# S gradients
grad_S_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dS_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_S_gpu, psi1_comb_gpu, N, M)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dS_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_S_gpu, psi2_comb_gpu, N, M*M)
# gamma gradients
grad_gamma_gpu.fill(0.)
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dgamma_gpu, dL_dpsi1.size)
linalg_gpu.sum_axis(grad_gamma_gpu, psi1_comb_gpu, N, M)
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dgamma_gpu, dL_dpsi2.size)
linalg_gpu.sum_axis(grad_gamma_gpu, psi2_comb_gpu, N, M*M)
return grad_mu_gpu.get(), grad_S_gpu.get(), grad_gamma_gpu.get()

View file

@ -9,6 +9,7 @@ from stationary import Stationary
from GPy.util.caching import Cache_this from GPy.util.caching import Cache_this
from ...core.parameterization import variational from ...core.parameterization import variational
from psi_comp import ssrbf_psi_comp from psi_comp import ssrbf_psi_comp
from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF
class RBF(Stationary): class RBF(Stationary):
""" """
@ -19,9 +20,15 @@ class RBF(Stationary):
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
""" """
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'): _support_GPU = True
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
self.weave_options = {} self.weave_options = {}
self.group_spike_prob = False
if self.useGPU:
self.psicomp = PSICOMP_SSRBF()
def K_of_r(self, r): def K_of_r(self, r):
return self.variance * np.exp(-0.5 * r**2) return self.variance * np.exp(-0.5 * r**2)
@ -34,18 +41,28 @@ class RBF(Stationary):
#---------------------------------------# #---------------------------------------#
def psi0(self, Z, variational_posterior): def psi0(self, Z, variational_posterior):
return self.Kdiag(variational_posterior.mean) if self.useGPU:
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
else:
return self.Kdiag(variational_posterior.mean)
def psi1(self, Z, variational_posterior): def psi1(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
else:
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
else: else:
_, _, _, psi1 = self._psi1computations(Z, variational_posterior) _, _, _, psi1 = self._psi1computations(Z, variational_posterior)
return psi1 return psi1
def psi2(self, Z, variational_posterior): def psi2(self, Z, variational_posterior):
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) if self.useGPU:
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
else:
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
else: else:
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
return psi2 return psi2
@ -53,25 +70,29 @@ class RBF(Stationary):
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) if self.useGPU:
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
#contributions from psi0:
self.variance.gradient = np.sum(dL_dpsi0)
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
if self.ARD:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else: else:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#from psi2 _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
if self.ARD:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) #contributions from psi0:
else: self.variance.gradient = np.sum(dL_dpsi0)
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
#from psi1
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
if self.ARD:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
#from psi2
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
if self.ARD:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
else:
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale**2 l2 = self.lengthscale**2
@ -107,16 +128,19 @@ class RBF(Stationary):
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) if self.useGPU:
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1 #psi1
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
#psi2 #psi2
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
return grad return grad
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):
l2 = self.lengthscale **2 l2 = self.lengthscale **2
@ -140,22 +164,28 @@ class RBF(Stationary):
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
# Spike-and-Slab GPLVM # Spike-and-Slab GPLVM
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
ndata = variational_posterior.mean.shape[0] if self.useGPU:
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
else:
ndata = variational_posterior.mean.shape[0]
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
#psi1 #psi1
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
#psi2 #psi2
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
return grad_mu, grad_S, grad_gamma if self.group_spike_prob:
grad_gamma[:] = grad_gamma.mean(axis=0)
return grad_mu, grad_S, grad_gamma
elif isinstance(variational_posterior, variational.NormalPosterior): elif isinstance(variational_posterior, variational.NormalPosterior):

View file

@ -41,8 +41,8 @@ class Stationary(Kern):
""" """
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name): def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):
super(Stationary, self).__init__(input_dim, active_dims, name) super(Stationary, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
self.ARD = ARD self.ARD = ARD
if not ARD: if not ARD:
if lengthscale is None: if lengthscale is None:

View file

@ -15,8 +15,8 @@ except ImportError:
if sympy_available: if sympy_available:
# These are likelihoods that rely on symbolic. # These are likelihoods that rely on symbolic.
from symbolic import Symbolic from symbolic import Symbolic
from sstudent_t import SstudentT #from sstudent_t import SstudentT
from negative_binomial import Negative_binomial from negative_binomial import Negative_binomial
from skew_normal import Skew_normal #from skew_normal import Skew_normal
from skew_exponential import Skew_exponential #from skew_exponential import Skew_exponential
from null_category import Null_category #from null_category import Null_category

View file

@ -7,7 +7,9 @@ from ..core import SparseGP
from ..likelihoods import Gaussian from ..likelihoods import Gaussian
from ..inference.optimization import SCG from ..inference.optimization import SCG
from ..util import linalg from ..util import linalg
from ..core.parameterization.variational import NormalPosterior, NormalPrior,VariationalPosterior from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
class BayesianGPLVM(SparseGP): class BayesianGPLVM(SparseGP):
""" """
@ -65,6 +67,10 @@ class BayesianGPLVM(SparseGP):
X.mean.gradient, X.variance.gradient = X_grad X.mean.gradient, X.variance.gradient = X_grad
def parameters_changed(self): def parameters_changed(self):
if isinstance(self.inference_method, VarDTC_GPU):
update_gradients(self)
return
super(BayesianGPLVM, self).parameters_changed() super(BayesianGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
@ -155,56 +161,6 @@ class BayesianGPLVM(SparseGP):
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
def update_gradients(model):
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
het_noise = model.likelihood.variance.size > 1
if het_noise:
dL_dthetaL = np.empty((model.Y.shape[0],))
else:
dL_dthetaL = 0
#gradients w.r.t. kernel
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
kern_grad = model.kern.gradient.copy()
#gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
isEnd = False
while not isEnd:
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
if isinstance(model.X, VariationalPosterior):
#gradients w.r.t. kernel
model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
kern_grad += model.kern.gradient
#gradients w.r.t. Z
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]])
#gradients w.r.t. posterior parameters of X
X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad)
if het_noise:
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
else:
dL_dthetaL += grad_dict['dL_dthetaL']
# Set the gradients w.r.t. kernel
model.kern.gradient = kern_grad
# Update Log-likelihood
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
# update for the KL divergence
model.variational_prior.update_gradients_KL(model.X)
# dL_dthetaL
model.likelihood.update_gradients(dL_dthetaL)
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
""" """
objective function for fitting the latent variables for test points objective function for fitting the latent variables for test points

View file

@ -11,6 +11,9 @@ from ..likelihoods import Gaussian
from ..inference.optimization import SCG from ..inference.optimization import SCG
from ..util import linalg from ..util import linalg
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
class SSGPLVM(SparseGP): class SSGPLVM(SparseGP):
""" """
@ -25,11 +28,14 @@ class SSGPLVM(SparseGP):
""" """
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', **kwargs): Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs):
if X == None: # The mean of variational approximation (mu) if X == None:
from ..util.initialization import initialize_latent from ..util.initialization import initialize_latent
X = initialize_latent(init, input_dim, Y) X, fracs = initialize_latent(init, input_dim, Y)
else:
fracs = np.ones(input_dim)
self.init = init self.init = init
if X_variance is None: # The variance of the variational approximation (S) if X_variance is None: # The variance of the variational approximation (S)
@ -38,6 +44,9 @@ class SSGPLVM(SparseGP):
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim) gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
if group_spike:
gamma[:] = gamma.mean(axis=0)
if Z is None: if Z is None:
Z = np.random.permutation(X.copy())[:num_inducing] Z = np.random.permutation(X.copy())[:num_inducing]
assert Z.shape[1] == X.shape[1] assert Z.shape[1] == X.shape[1]
@ -46,18 +55,31 @@ class SSGPLVM(SparseGP):
likelihood = Gaussian() likelihood = Gaussian()
if kernel is None: if kernel is None:
kernel = kern.SSRBF(input_dim) kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
pi = np.empty((input_dim)) pi = np.empty((input_dim))
pi[:] = 0.5 pi[:] = 0.5
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
X = SpikeAndSlabPosterior(X, X_variance, gamma) X = SpikeAndSlabPosterior(X, X_variance, gamma)
if group_spike:
kernel.group_spike_prob = True
self.variational_prior.group_spike_prob = True
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
self.add_parameter(self.X, index=0) self.add_parameter(self.X, index=0)
self.add_parameter(self.variational_prior) self.add_parameter(self.variational_prior)
def set_X_gradients(self, X, X_grad):
"""Set the gradients of the posterior distribution of X in its specific form."""
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
def parameters_changed(self): def parameters_changed(self):
if isinstance(self.inference_method, VarDTC_GPU):
update_gradients(self)
return
super(SSGPLVM, self).parameters_changed() super(SSGPLVM, self).parameters_changed()
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)

View file

@ -15,3 +15,5 @@ import latent_space_visualizations
import netpbmfile import netpbmfile
import inference_plots import inference_plots
import maps import maps
import img_plots
from ssgplvm import SSGPLVM_plot

View file

@ -0,0 +1,56 @@
"""
The module contains the tools for ploting 2D image visualizations
"""
import numpy as np
from matplotlib.cm import jet
width_max = 15
height_max = 12
def _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad):
width = (x_size*fig_ncols+pad*(fig_ncols-1))
height = (y_size*fig_nrows+pad*(fig_nrows-1))
if width > float(height)/height_max*width_max:
return (width_max, float(width_max)/width*height)
else:
return (float(height_max)/height*width, height_max)
def plot_2D_images(figure, arr, symmetric=False, pad=None, zoom=None, mode=None, interpolation='nearest'):
ax = figure.add_subplot(111)
if len(arr.shape)==2:
arr = arr.reshape(*((1,)+arr.shape))
fig_num = arr.shape[0]
y_size = arr.shape[1]
x_size = arr.shape[2]
fig_ncols = int(np.ceil(np.sqrt(fig_num)))
fig_nrows = int(np.ceil((float)(fig_num)/fig_ncols))
if pad==None:
pad = max(int(min(y_size,x_size)/10),1)
figsize = _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad)
#figure.set_size_inches(figsize,forward=True)
#figure.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95)
if symmetric:
# symmetric around zero: fix zero as the middle color
mval = max(abs(arr.max()),abs(arr.min()))
arr = arr/(2.*mval)+0.5
else:
minval,maxval = arr.min(),arr.max()
arr = (arr-minval)/(maxval-minval)
if mode=='L':
arr_color = np.empty(arr.shape+(3,))
arr_color[:] = arr.reshape(*(arr.shape+(1,)))
elif mode==None or mode=='jet':
arr_color = jet(arr)
buf = np.ones((y_size*fig_nrows+pad*(fig_nrows-1), x_size*fig_ncols+pad*(fig_ncols-1), 3),dtype=arr.dtype)
for y in xrange(fig_nrows):
for x in xrange(fig_ncols):
if y*fig_ncols+x<fig_num:
buf[y*y_size+y*pad:(y+1)*y_size+y*pad, x*x_size+x*pad:(x+1)*x_size+x*pad] = arr_color[y*fig_ncols+x,:,:,:3]
img_plot = ax.imshow(buf, interpolation=interpolation)
ax.axis('off')

View file

@ -0,0 +1,29 @@
"""
The module plotting results for SSGPLVM
"""
import pylab
from ...models import SSGPLVM
from img_plots import plot_2D_images
from ...util.misc import param_to_array
class SSGPLVM_plot(object):
def __init__(self,model, imgsize):
assert isinstance(model,SSGPLVM)
self.model = model
self.imgsize= imgsize
assert model.Y.shape[1] == imgsize[0]*imgsize[1]
def plot_inducing(self):
fig1 = pylab.figure()
mean = self.model.posterior.mean
arr = mean.reshape(*(mean.shape[0],self.imgsize[1],self.imgsize[0]))
plot_2D_images(fig1, arr)
fig1.gca().set_title('The mean of inducing points')
fig2 = pylab.figure()
covar = self.model.posterior.covariance
plot_2D_images(fig2, covar)
fig2.gca().set_title('The variance of inducing points')

View file

@ -45,7 +45,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95)) fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
return fig return fig
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None): def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_side=True):
""" """
Plot latent space X in 1D: Plot latent space X in 1D:
@ -58,7 +58,10 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
""" """
if ax is None: if ax is None:
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1])))) if side_by_side:
fig = pb.figure(num=fignum, figsize=(16, min(12, (2 * parameterized.mean.shape[1]))))
else:
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
if colors is None: if colors is None:
colors = pb.gca()._get_lines.color_cycle colors = pb.gca()._get_lines.color_cycle
pb.clf() pb.clf()
@ -68,8 +71,15 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob) means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
x = np.arange(means.shape[0]) x = np.arange(means.shape[0])
for i in range(means.shape[1]): for i in range(means.shape[1]):
if side_by_side:
sub1 = (means.shape[1],2,2*i+1)
sub2 = (means.shape[1],2,2*i+2)
else:
sub1 = (means.shape[1]*2,1,2*i+1)
sub2 = (means.shape[1]*2,1,2*i+2)
# mean and variance plot # mean and variance plot
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 1) a = fig.add_subplot(*sub1)
a.plot(means, c='k', alpha=.3) a.plot(means, c='k', alpha=.3)
plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i))) plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
a.fill_between(x, a.fill_between(x,
@ -82,7 +92,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
if i < means.shape[1] - 1: if i < means.shape[1] - 1:
a.set_xticklabels('') a.set_xticklabels('')
# binary prob plot # binary prob plot
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 2) a = fig.add_subplot(*sub2)
a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center') a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center')
a.set_xlim(x.min(), x.max()) a.set_xlim(x.min(), x.max())
a.set_ylim([0.,1.]) a.set_ylim([0.,1.])

View file

@ -85,6 +85,7 @@ class vector_show(matplotlib_show):
class lvm(matplotlib_show): class lvm(matplotlib_show):
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]): def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
"""Visualize a latent variable model """Visualize a latent variable model
@ -130,10 +131,10 @@ class lvm(matplotlib_show):
def modify(self, vals): def modify(self, vals):
"""When latent values are modified update the latent representation and ulso update the output visualization.""" """When latent values are modified update the latent representation and ulso update the output visualization."""
self.vals = vals.copy() self.vals = vals[None,:].copy()
y = self.model.predict(self.vals)[0] y = self.model.predict(self.vals)[0]
self.data_visualize.modify(y) self.data_visualize.modify(y)
self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]]) self.latent_handle.set_data(self.vals[:,self.latent_index[0]], self.vals[:,self.latent_index[1]])
self.axes.figure.canvas.draw() self.axes.figure.canvas.draw()

View file

@ -191,13 +191,14 @@ class Test(ListDictTestCase):
par.count = 0 par.count = 0
par.add_observer(self, self._callback, 1) par.add_observer(self, self._callback, 1)
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy()) pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
self.assertNotIn(par._observer_callables_[0], pcopy._observer_callables_) self.assertNotIn(par.observers[0], pcopy.observers)
pcopy = par.copy() pcopy = par.copy()
pcopy.name = "copy" pcopy.name = "copy"
self.assertTrue(par.checkgrad()) self.assertTrue(par.checkgrad())
self.assertTrue(pcopy.checkgrad()) self.assertTrue(pcopy.checkgrad())
self.assertTrue(pcopy.kern.checkgrad()) self.assertTrue(pcopy.kern.checkgrad())
self.assertIn(par._observer_callables_[0], pcopy._observer_callables_) import ipdb;ipdb.set_trace()
self.assertIn(par.observers[0], pcopy.observers)
self.assertEqual(par.count, 3) self.assertEqual(par.count, 3)
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad

View file

@ -15,6 +15,7 @@ import caching
import diag import diag
import initialization import initialization
import multioutput import multioutput
import linalg_gpu
try: try:
import sympy import sympy

View file

@ -66,6 +66,7 @@ class Cacher(object):
#first make sure the depth limit isn't exceeded #first make sure the depth limit isn't exceeded
if len(self.cached_inputs) == self.limit: if len(self.cached_inputs) == self.limit:
args_ = self.cached_inputs.pop(0) args_ = self.cached_inputs.pop(0)
args_ = [a for i,a in enumerate(args_) if i not in self.ignore_args and i not in self.force_kwargs]
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None] [a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
self.inputs_changed.pop(0) self.inputs_changed.pop(0)
self.cached_outputs.pop(0) self.cached_outputs.pop(0)

16
GPy/util/gpu_init.py Normal file
View file

@ -0,0 +1,16 @@
"""
The package for scikits.cuda initialization
Global variables: initSuccess
providing CUBLAS handle: cublas_handle
"""
try:
import pycuda.autoinit
from scikits.cuda import cublas
import scikits.cuda.linalg as culinalg
culinalg.init()
cublas_handle = cublas.cublasCreate()
initSuccess = True
except:
initSuccess = False

59
GPy/util/linalg_gpu.py Normal file
View file

@ -0,0 +1,59 @@
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
# Licensed under the BSD 3-clause license (see LICENSE.txt)
#
# The utility functions for GPU computation
#
import numpy as np
from ..util import gpu_init
try:
from pycuda.reduction import ReductionKernel
from pycuda.elementwise import ElementwiseKernel
# log|A| for A is a low triangle matrix
# logDiagSum(A, A.shape[0]+1)
logDiagSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?log(x[i]):0", arguments="double *x, int step")
strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step")
#=======================================================================================
# Element-wise functions
#=======================================================================================
# log(X)
log = ElementwiseKernel("double *in, double *out", "out[i] = log(in[i])", "log_element")
# log(1.0-X)
logOne = ElementwiseKernel("double *in, double *out", "out[i] = log(1.-in[i])", "logOne_element")
# multiplication with broadcast on the last dimension (out = shorter[:,None]*longer)
mul_bcast = ElementwiseKernel("double *out, double *shorter, double *longer, int shorter_size", "out[i] = longer[i]*shorter[i%shorter_size]", "mul_bcast")
# multiplication with broadcast on the first dimension (out = shorter[None,:]*longer)
mul_bcast_first = ElementwiseKernel("double *out, double *shorter, double *longer, int first_dim", "out[i] = longer[i]*shorter[i/first_dim]", "mul_bcast")
# sum through the middle dimension (size_2) of a 3D matrix (size_1, size_2, size_3)
sum_axis = ElementwiseKernel("double *out, double *in, int size_1, int size_2", "out[i] += sum_axis_element(in, size_1, size_2, i)", "sum_axis",preamble="""
__device__ double sum_axis_element(double *in, int size_1, int size_2, int idx)
{
int k = idx/size_1;
int i = idx%size_1;
double sum=0;
for(int j=0;j<size_2;j++) {
sum += in[(k*size_2+j)*size_1+i];
}
return sum;
}
""")
# the outer product between two vectors (out = np.dot(v1,v2.T))
outer_prod = ElementwiseKernel("double *out, double *v1, double *v2, int v1_size", "out[i] = v1[i%v1_size]*v2[i/v1_size]", "outer_prod")
# the outer product between two vectors (out = np.einsum('na,nb->nab',m1,m2) a=dim1, b=dim2 )
join_prod = ElementwiseKernel("double *out, double *m1, double *m2, int dim1, int dim2", "out[i] = m1[(i%dim1)*dim1+(i%(dim1*dim2))/dim1]*m2[(i%dim1)*dim1+i/(dim1*dim2)]", "join_prod")
except:
pass