mirror of
https://github.com/SheffieldML/GPy.git
synced 2026-05-10 04:22:38 +02:00
Copy and paste observable_array from repository to try and resolve bizzare merge request.
This commit is contained in:
commit
c2d3c82944
30 changed files with 1646 additions and 184 deletions
|
|
@ -4,6 +4,7 @@
|
|||
from model import *
|
||||
from parameterization.parameterized import adjust_name_for_printing, Parameterizable
|
||||
from parameterization.param import Param, ParamConcatenation
|
||||
from parameterization.observable_array import ObsAr
|
||||
|
||||
from gp import GP
|
||||
from sparse_gp import SparseGP
|
||||
|
|
|
|||
|
|
@ -121,7 +121,7 @@ class GP(Model):
|
|||
If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew.
|
||||
This is to allow for different normalizations of the output dimensions.
|
||||
|
||||
"""
|
||||
"""
|
||||
#predict the latent function values
|
||||
mu, var = self._raw_predict(Xnew, full_cov=full_cov)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ Created on 27 Feb 2014
|
|||
'''
|
||||
|
||||
from collections import defaultdict
|
||||
import weakref
|
||||
|
||||
def intarray_default_factory():
|
||||
import numpy as np
|
||||
|
|
@ -41,60 +42,66 @@ class ObservablesList(object):
|
|||
def __init__(self):
|
||||
self._poc = []
|
||||
|
||||
def remove(self, value):
|
||||
return self._poc.remove(value)
|
||||
|
||||
|
||||
def __delitem__(self, ind):
|
||||
return self._poc.__delitem__(ind)
|
||||
|
||||
|
||||
def __setitem__(self, ind, item):
|
||||
return self._poc.__setitem__(ind, item)
|
||||
|
||||
|
||||
def __getitem__(self, ind):
|
||||
return self._poc.__getitem__(ind)
|
||||
p,o,c = self._poc[ind]
|
||||
return p, o(), c
|
||||
|
||||
def remove(self, priority, observable, callble):
|
||||
"""
|
||||
"""
|
||||
self.flush()
|
||||
for i in range(len(self) - 1, -1, -1):
|
||||
p,o,c = self[i]
|
||||
if priority==p and observable==o and callble==c:
|
||||
del self._poc[i]
|
||||
|
||||
def __repr__(self):
|
||||
return self._poc.__repr__()
|
||||
|
||||
|
||||
def append(self, obj):
|
||||
return self._poc.append(obj)
|
||||
|
||||
|
||||
def index(self, value):
|
||||
return self._poc.index(value)
|
||||
|
||||
|
||||
def extend(self, iterable):
|
||||
return self._poc.extend(iterable)
|
||||
|
||||
|
||||
def add(self, priority, observable, callble):
|
||||
ins = 0
|
||||
for pr, _, _ in self:
|
||||
if priority > pr:
|
||||
break
|
||||
ins += 1
|
||||
self._poc.insert(ins, (priority, weakref.ref(observable), callble))
|
||||
|
||||
def __str__(self):
|
||||
return self._poc.__str__()
|
||||
ret = []
|
||||
curr_p = None
|
||||
for p, o, c in self:
|
||||
curr = ''
|
||||
if curr_p != p:
|
||||
pre = "{!s}: ".format(p)
|
||||
curr_pre = pre
|
||||
else: curr_pre = " "*len(pre)
|
||||
curr_p = p
|
||||
curr += curr_pre
|
||||
ret.append(curr + ", ".join(map(repr, [o,c])))
|
||||
return '\n'.join(ret)
|
||||
|
||||
def flush(self):
|
||||
self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None]
|
||||
|
||||
def __iter__(self):
|
||||
return self._poc.__iter__()
|
||||
|
||||
|
||||
def insert(self, index, obj):
|
||||
return self._poc.insert(index, obj)
|
||||
|
||||
self.flush()
|
||||
for p, o, c in self._poc:
|
||||
if o() is not None:
|
||||
yield p, o(), c
|
||||
|
||||
def __len__(self):
|
||||
self.flush()
|
||||
return self._poc.__len__()
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
self.flush()
|
||||
s = ObservablesList()
|
||||
import copy
|
||||
s._poc = copy.deepcopy(self._poc, memo)
|
||||
return s
|
||||
|
||||
def __getstate__(self):
|
||||
self.flush()
|
||||
from ...util.caching import Cacher
|
||||
obs = []
|
||||
for p, o, c in self:
|
||||
|
|
@ -106,6 +113,6 @@ class ObservablesList(object):
|
|||
def __setstate__(self, state):
|
||||
self._poc = []
|
||||
for p, o, c in state:
|
||||
self._poc.append((p,o,getattr(o, c)))
|
||||
self.add(p,o,getattr(o, c))
|
||||
|
||||
pass
|
||||
|
|
|
|||
137
GPy/core/parameterization/observable_array.py
Normal file
137
GPy/core/parameterization/observable_array.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
__updated__ = '2014-03-31'
|
||||
|
||||
import numpy as np
|
||||
from parameter_core import Observable, Pickleable
|
||||
|
||||
class ObsAr(np.ndarray, Pickleable, Observable):
|
||||
"""
|
||||
An ndarray which reports changes to its observers.
|
||||
The observers can add themselves with a callable, which
|
||||
will be called every time this array changes. The callable
|
||||
takes exactly one argument, which is this array itself.
|
||||
"""
|
||||
__array_priority__ = -1 # Never give back ObsAr
|
||||
def __new__(cls, input_array, *a, **kw):
|
||||
if not isinstance(input_array, ObsAr):
|
||||
obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls)
|
||||
else: obj = input_array
|
||||
#cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing
|
||||
super(ObsAr, obj).__init__(*a, **kw)
|
||||
return obj
|
||||
|
||||
def __array_finalize__(self, obj):
|
||||
# see InfoArray.__array_finalize__ for comments
|
||||
if obj is None: return
|
||||
self.observers = getattr(obj, 'observers', None)
|
||||
|
||||
def __array_wrap__(self, out_arr, context=None):
|
||||
return out_arr.view(np.ndarray)
|
||||
|
||||
def copy(self):
|
||||
memo = {}
|
||||
memo[id(self)] = self
|
||||
return self.__deepcopy__(memo)
|
||||
|
||||
def __deepcopy__(self, memo):
|
||||
s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy())
|
||||
memo[id(self)] = s
|
||||
import copy
|
||||
s.__dict__.update(copy.deepcopy(self.__dict__, memo))
|
||||
return s
|
||||
|
||||
def __reduce__(self):
|
||||
func, args, state = super(ObsAr, self).__reduce__()
|
||||
return func, args, (state, Pickleable.__getstate__(self))
|
||||
|
||||
def __setstate__(self, state):
|
||||
np.ndarray.__setstate__(self, state[0])
|
||||
Pickleable.__setstate__(self, state[1])
|
||||
|
||||
def __setitem__(self, s, val):
|
||||
super(ObsAr, self).__setitem__(s, val)
|
||||
self.notify_observers()
|
||||
|
||||
def __getslice__(self, start, stop):
|
||||
return self.__getitem__(slice(start, stop))
|
||||
|
||||
def __setslice__(self, start, stop, val):
|
||||
return self.__setitem__(slice(start, stop), val)
|
||||
|
||||
def __ilshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ilshift__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
def __irshift__(self, *args, **kwargs):
|
||||
r = np.ndarray.__irshift__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ixor__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ixor__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ipow__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ipow__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ifloordiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ifloordiv__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __isub__(self, *args, **kwargs):
|
||||
r = np.ndarray.__isub__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __ior__(self, *args, **kwargs):
|
||||
r = np.ndarray.__ior__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __itruediv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__itruediv__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __idiv__(self, *args, **kwargs):
|
||||
r = np.ndarray.__idiv__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __iand__(self, *args, **kwargs):
|
||||
r = np.ndarray.__iand__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __imod__(self, *args, **kwargs):
|
||||
r = np.ndarray.__imod__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __iadd__(self, *args, **kwargs):
|
||||
r = np.ndarray.__iadd__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
||||
|
||||
def __imul__(self, *args, **kwargs):
|
||||
r = np.ndarray.__imul__(self, *args, **kwargs)
|
||||
self.notify_observers()
|
||||
return r
|
||||
|
|
@ -59,7 +59,7 @@ class Param(OptimizationHandlable, ObsAr):
|
|||
import pydot
|
||||
node = pydot.Node(id(self), shape='record', label=self.name)
|
||||
G.add_node(node)
|
||||
for o in self._observer_callables_.keys():
|
||||
for o in self.observers.keys():
|
||||
label = o.name if hasattr(o, 'name') else str(o)
|
||||
observed_node = pydot.Node(id(o), label=label)
|
||||
G.add_node(observed_node)
|
||||
|
|
@ -324,7 +324,7 @@ class ParamConcatenation(object):
|
|||
if update:
|
||||
self.update_all_params()
|
||||
def values(self):
|
||||
return numpy.hstack([p.param_array for p in self.params])
|
||||
return numpy.hstack([p.param_array.flat for p in self.params])
|
||||
#===========================================================================
|
||||
# parameter operations:
|
||||
#===========================================================================
|
||||
|
|
|
|||
|
|
@ -44,22 +44,23 @@ class Observable(object):
|
|||
def __init__(self, *args, **kwargs):
|
||||
super(Observable, self).__init__()
|
||||
from lists_and_dicts import ObservablesList
|
||||
self._observer_callables_ = ObservablesList()
|
||||
self.observers = ObservablesList()
|
||||
|
||||
def add_observer(self, observer, callble, priority=0):
|
||||
self._insert_sorted(priority, observer, callble)
|
||||
self.observers.add(priority, observer, callble)
|
||||
|
||||
def remove_observer(self, observer, callble=None):
|
||||
to_remove = []
|
||||
for p, obs, clble in self._observer_callables_:
|
||||
for poc in self.observers:
|
||||
_, obs, clble = poc
|
||||
if callble is not None:
|
||||
if (obs == observer) and (callble == clble):
|
||||
to_remove.append((p, obs, clble))
|
||||
to_remove.append(poc)
|
||||
else:
|
||||
if obs is observer:
|
||||
to_remove.append((p, obs, clble))
|
||||
to_remove.append(poc)
|
||||
for r in to_remove:
|
||||
self._observer_callables_.remove(r)
|
||||
self.observers.remove(*r)
|
||||
|
||||
def notify_observers(self, which=None, min_priority=None):
|
||||
"""
|
||||
|
|
@ -74,21 +75,13 @@ class Observable(object):
|
|||
if which is None:
|
||||
which = self
|
||||
if min_priority is None:
|
||||
[callble(self, which=which) for _, _, callble in self._observer_callables_]
|
||||
[callble(self, which=which) for _, _, callble in self.observers]
|
||||
else:
|
||||
for p, _, callble in self._observer_callables_:
|
||||
for p, _, callble in self.observers:
|
||||
if p <= min_priority:
|
||||
break
|
||||
callble(self, which=which)
|
||||
|
||||
def _insert_sorted(self, p, o, c):
|
||||
ins = 0
|
||||
for pr, _, _ in self._observer_callables_:
|
||||
if p > pr:
|
||||
break
|
||||
ins += 1
|
||||
self._observer_callables_.insert(ins, (p, o, c))
|
||||
|
||||
#===============================================================================
|
||||
# Foundation framework for parameterized and param objects:
|
||||
#===============================================================================
|
||||
|
|
@ -192,7 +185,7 @@ class Pickleable(object):
|
|||
|
||||
def __getstate__(self):
|
||||
ignore_list = ([#'_parent_', '_parent_index_',
|
||||
#'_observer_callables_',
|
||||
#'observers',
|
||||
'_param_array_', '_gradient_array_', '_fixes_',
|
||||
'_Cacher_wrap__cachers']
|
||||
#+ self.parameter_names(recursive=False)
|
||||
|
|
|
|||
|
|
@ -90,7 +90,7 @@ class Parameterized(Parameterizable):
|
|||
child_node = child.build_pydot(G)
|
||||
G.add_edge(pydot.Edge(node, child_node))
|
||||
|
||||
for o in self._observer_callables_.keys():
|
||||
for o in self.observers.keys():
|
||||
label = o.name if hasattr(o, 'name') else str(o)
|
||||
observed_node = pydot.Node(id(o), label=label)
|
||||
G.add_node(observed_node)
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10))
|
||||
self.variance = Param('variance',variance)
|
||||
self.add_parameters(self.pi)
|
||||
self.group_spike_prob = False
|
||||
|
||||
def KL_divergence(self, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
|
|
@ -55,7 +56,11 @@ class SpikeAndSlabPrior(VariationalPrior):
|
|||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
|
||||
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
if self.group_spike_prob:
|
||||
gamma_grad = np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
gamma.gradient -= gamma_grad.mean(axis=0)
|
||||
else:
|
||||
gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2.
|
||||
mu.gradient -= gamma*mu
|
||||
S.gradient -= (1. - (1. / (S))) * gamma /2.
|
||||
self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0)
|
||||
|
|
|
|||
|
|
@ -409,12 +409,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True):
|
|||
# optimize
|
||||
m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel)
|
||||
if optimize: m.optimize(messages=verbose, max_f_eval=10000)
|
||||
if plot and GPy.plotting.matplot_dep.visualize.visual_available:
|
||||
if plot:
|
||||
plt.clf
|
||||
ax = m.plot_latent()
|
||||
y = m.likelihood.Y[0, :]
|
||||
y = m.Y[0, :]
|
||||
data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect'])
|
||||
GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax)
|
||||
vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, latent_axes=ax)
|
||||
raw_input('Press enter to finish')
|
||||
|
||||
return m
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ from expectation_propagation import EP
|
|||
from dtc import DTC
|
||||
from fitc import FITC
|
||||
from var_dtc_parallel import VarDTC_minibatch
|
||||
from var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
# class FullLatentFunctionData(object):
|
||||
#
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class ExactGaussianInference(object):
|
|||
return Y
|
||||
else:
|
||||
#if Y in self.cache, return self.Cache[Y], else store Y in cache and return L.
|
||||
print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
#print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!"
|
||||
return Y
|
||||
|
||||
def inference(self, kern, X, likelihood, Y, Y_metadata=None):
|
||||
|
|
|
|||
545
GPy/inference/latent_function_inference/var_dtc_gpu.py
Normal file
545
GPy/inference/latent_function_inference/var_dtc_gpu.py
Normal file
|
|
@ -0,0 +1,545 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
from posterior import Posterior
|
||||
from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs
|
||||
from ...util import diag
|
||||
from ...core.parameterization.variational import VariationalPosterior
|
||||
import numpy as np
|
||||
from ...util.misc import param_to_array
|
||||
log_2_pi = np.log(2*np.pi)
|
||||
|
||||
from ...util import gpu_init
|
||||
|
||||
try:
|
||||
import scikits.cuda.linalg as culinalg
|
||||
import pycuda.gpuarray as gpuarray
|
||||
from scikits.cuda import cublas
|
||||
from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod
|
||||
except:
|
||||
pass
|
||||
|
||||
class VarDTC_GPU(object):
|
||||
"""
|
||||
An object for inference when the likelihood is Gaussian, but we want to do sparse inference.
|
||||
|
||||
The function self.inference returns a Posterior object, which summarizes
|
||||
the posterior.
|
||||
|
||||
For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it.
|
||||
|
||||
"""
|
||||
const_jitter = np.float64(1e-6)
|
||||
def __init__(self, batchsize=None, gpu_memory=4., limit=1):
|
||||
|
||||
self.batchsize = batchsize
|
||||
self.gpu_memory = gpu_memory
|
||||
|
||||
self.midRes = {}
|
||||
self.batch_pos = 0 # the starting position of the current mini-batch
|
||||
|
||||
self.cublas_handle = gpu_init.cublas_handle
|
||||
|
||||
# Initialize GPU caches
|
||||
self.gpuCache = None
|
||||
|
||||
def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y):
|
||||
ndata = Y.shape[0]
|
||||
if self.batchsize==None:
|
||||
self.batchsize = self._estimateBatchSize(kern, ndata, num_inducing, input_dim, output_dim)
|
||||
if self.gpuCache == None:
|
||||
self.gpuCache = {# inference_likelihood
|
||||
'Kmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'Lm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'ones_gpu' :gpuarray.empty(num_inducing, np.float64,order='F'),
|
||||
'LL_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'b_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
|
||||
'v_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
|
||||
'vvt_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'KmmInvPsi2LLInvT_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'KmmInvPsi2P_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'dL_dpsi2R_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'dL_dKmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'psi1Y_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'),
|
||||
'psi2_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'),
|
||||
'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'),
|
||||
'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN
|
||||
'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN
|
||||
'psi2_t_gpu' :gpuarray.empty((num_inducing*num_inducing*self.batchsize),np.float64,order='F'),
|
||||
# inference_minibatch
|
||||
'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
|
||||
'dL_dpsi2_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'),
|
||||
'dL_dthetaL_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'betapsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
|
||||
'thetaL_t_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'betaYT2_gpu' :gpuarray.empty((output_dim,self.batchsize),np.float64,order='F'),
|
||||
'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'),
|
||||
'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'),
|
||||
'psi2p_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'),
|
||||
}
|
||||
self.gpuCache['ones_gpu'].fill(1.0)
|
||||
|
||||
YT_gpu = self.gpuCache['YT_gpu']
|
||||
self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
|
||||
|
||||
def _estimateMemoryOccupation(self, N, M, D):
|
||||
"""
|
||||
Estimate the best batch size.
|
||||
N - the number of total datapoints
|
||||
M - the number of inducing points
|
||||
D - the number of observed (output) dimensions
|
||||
return: the constant memory size, the memory occupation of batchsize=1
|
||||
unit: GB
|
||||
"""
|
||||
return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024.
|
||||
|
||||
def _estimateBatchSize(self, kern, N, M, Q, D):
|
||||
"""
|
||||
Estimate the best batch size.
|
||||
N - the number of total datapoints
|
||||
M - the number of inducing points
|
||||
D - the number of observed (output) dimensions
|
||||
return: the constant memory size, the memory occupation of batchsize=1
|
||||
unit: GB
|
||||
"""
|
||||
if kern.useGPU:
|
||||
x0,x1 = kern.psicomp.estimateMemoryOccupation(N,M,Q)
|
||||
else:
|
||||
x0, x1 = 0.,0.
|
||||
y0, y1 = self._estimateMemoryOccupation(N, M, D)
|
||||
|
||||
opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N)
|
||||
|
||||
return opt_batchsize
|
||||
|
||||
def _get_YYTfactor(self, Y):
|
||||
"""
|
||||
find a matrix L which satisfies LLT = YYT.
|
||||
|
||||
Note that L may have fewer columns than Y.
|
||||
"""
|
||||
N, D = Y.shape
|
||||
if (N>=D):
|
||||
return param_to_array(Y)
|
||||
else:
|
||||
return jitchol(tdot(Y))
|
||||
|
||||
def inference_likelihood(self, kern, X, Z, likelihood, Y):
|
||||
"""
|
||||
The first phase of inference:
|
||||
Compute: log-likelihood, dL_dKmm
|
||||
|
||||
Cached intermediate results: Kmm, KmmInv,
|
||||
"""
|
||||
|
||||
num_inducing, input_dim = Z.shape[0], Z.shape[1]
|
||||
num_data, output_dim = Y.shape
|
||||
|
||||
self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y)
|
||||
|
||||
if isinstance(X, VariationalPosterior):
|
||||
uncertain_inputs = True
|
||||
else:
|
||||
uncertain_inputs = False
|
||||
|
||||
#see whether we've got a different noise variance for each datum
|
||||
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||
het_noise = beta.size > 1
|
||||
trYYT = self._trYYT
|
||||
|
||||
psi1Y_gpu = self.gpuCache['psi1Y_gpu']
|
||||
psi2_gpu = self.gpuCache['psi2_gpu']
|
||||
beta_gpu = self.gpuCache['beta_gpu']
|
||||
YT_gpu = self.gpuCache['YT_gpu']
|
||||
betaYT_gpu = self.gpuCache['betaYT_gpu']
|
||||
psi2_t_gpu = self.gpuCache['psi2_t_gpu']
|
||||
|
||||
if het_noise:
|
||||
beta_gpu.set(np.asfortranarray(beta))
|
||||
mul_bcast(betaYT_gpu,beta_gpu,YT_gpu,beta_gpu.size)
|
||||
YRY_full = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, betaYT_gpu.gpudata, 1, YT_gpu.gpudata, 1)
|
||||
else:
|
||||
beta_gpu.fill(beta)
|
||||
betaYT_gpu.fill(0.)
|
||||
cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1)
|
||||
YRY_full = trYYT*beta
|
||||
|
||||
if kern.useGPU:
|
||||
psi1Y_gpu.fill(0.)
|
||||
psi2_gpu.fill(0.)
|
||||
psi0_full = 0
|
||||
|
||||
for n_start in xrange(0,num_data,self.batchsize):
|
||||
n_end = min(self.batchsize+n_start, num_data)
|
||||
ndata = n_end - n_start
|
||||
X_slice = X[n_start:n_end]
|
||||
beta_gpu_slice = beta_gpu[n_start:n_end]
|
||||
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
|
||||
if ndata==self.batchsize:
|
||||
psi2_t_gpu_slice = psi2_t_gpu
|
||||
else:
|
||||
psi2_t_gpu_slice = psi2_t_gpu[:num_inducing*num_inducing*ndata]
|
||||
if uncertain_inputs:
|
||||
psi0p_gpu = kern.psi0(Z, X_slice)
|
||||
psi1p_gpu = kern.psi1(Z, X_slice)
|
||||
psi2p_gpu = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0p_gpu = kern.Kdiag(X_slice)
|
||||
psi1p_gpu = kern.K(X_slice, Z)
|
||||
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing)
|
||||
|
||||
if het_noise:
|
||||
psi0_full += cublas.cublasDdot(self.cublas_handle, psi0p_gpu.size, beta_gpu_slice.gpudata, 1, psi0p_gpu.gpudata, 1)
|
||||
else:
|
||||
psi0_full += gpuarray.sum(psi0p_gpu).get()
|
||||
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
mul_bcast(psi2_t_gpu_slice,beta_gpu_slice,psi2p_gpu,beta_gpu_slice.size)
|
||||
sum_axis(psi2_gpu,psi2_t_gpu_slice,1,ndata)
|
||||
else:
|
||||
sum_axis(psi2_gpu,psi2p_gpu,1,ndata)
|
||||
else:
|
||||
if het_noise:
|
||||
psi1_t_gpu = psi2_t_gpu_slice[:,num_inducing*ndata]
|
||||
mul_bcast(psi1_t_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, 1.0, psi1p_gpu.gpudata, ndata, psi1_t_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
|
||||
else:
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing)
|
||||
|
||||
if not het_noise:
|
||||
psi0_full *= beta
|
||||
if uncertain_inputs:
|
||||
cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1)
|
||||
|
||||
else:
|
||||
psi2_full = np.zeros((num_inducing,num_inducing),order='F')
|
||||
psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD
|
||||
psi0_full = 0
|
||||
# YRY_full = 0
|
||||
|
||||
for n_start in xrange(0,num_data,self.batchsize):
|
||||
n_end = min(self.batchsize+n_start, num_data)
|
||||
Y_slice = Y[n_start:n_end]
|
||||
X_slice = X[n_start:n_end]
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X_slice)
|
||||
psi1 = kern.psi1(Z, X_slice)
|
||||
psi2 = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0 = kern.Kdiag(X_slice)
|
||||
psi1 = kern.K(X_slice, Z)
|
||||
|
||||
if het_noise:
|
||||
beta_slice = beta[n_start:n_end]
|
||||
psi0_full += (beta_slice*psi0).sum()
|
||||
psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD
|
||||
# YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum()
|
||||
else:
|
||||
psi0_full += psi0.sum()
|
||||
psi1Y_full += np.dot(psi1.T,Y_slice) # MxD
|
||||
|
||||
if uncertain_inputs:
|
||||
if het_noise:
|
||||
psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2)
|
||||
else:
|
||||
psi2_full += psi2.sum(axis=0)
|
||||
else:
|
||||
if het_noise:
|
||||
psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1)
|
||||
else:
|
||||
psi2_full += tdot(psi1.T)
|
||||
|
||||
if not het_noise:
|
||||
psi0_full *= beta
|
||||
psi1Y_full *= beta
|
||||
psi2_full *= beta
|
||||
# YRY_full = trYYT*beta
|
||||
|
||||
psi1Y_gpu.set(psi1Y_full)
|
||||
psi2_gpu.set(psi2_full)
|
||||
|
||||
#======================================================================
|
||||
# Compute Common Components
|
||||
#======================================================================
|
||||
|
||||
Kmm = kern.K(Z).copy()
|
||||
Kmm_gpu = self.gpuCache['Kmm_gpu']
|
||||
Kmm_gpu.set(np.asfortranarray(Kmm))
|
||||
diag.add(Kmm, self.const_jitter)
|
||||
ones_gpu = self.gpuCache['ones_gpu']
|
||||
cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1)
|
||||
# assert np.allclose(Kmm, Kmm_gpu.get())
|
||||
|
||||
# Lm = jitchol(Kmm)
|
||||
#
|
||||
Lm_gpu = self.gpuCache['Lm_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1)
|
||||
culinalg.cho_factor(Lm_gpu,'L')
|
||||
# print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max()
|
||||
|
||||
# Lambda = Kmm+psi2_full
|
||||
# LL = jitchol(Lambda)
|
||||
#
|
||||
Lambda_gpu = self.gpuCache['LL_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lambda_gpu.gpudata, 1)
|
||||
cublas.cublasDaxpy(self.cublas_handle, psi2_gpu.size, np.float64(1.0), psi2_gpu.gpudata, 1, Lambda_gpu.gpudata, 1)
|
||||
LL_gpu = Lambda_gpu
|
||||
culinalg.cho_factor(LL_gpu,'L')
|
||||
# print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max()
|
||||
|
||||
# b,_ = dtrtrs(LL, psi1Y_full)
|
||||
# bbt_cpu = np.square(b).sum()
|
||||
#
|
||||
b_gpu = self.gpuCache['b_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, b_gpu.size, psi1Y_gpu.gpudata, 1, b_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing)
|
||||
bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1)
|
||||
# print np.abs(bbt-bbt_cpu)
|
||||
|
||||
# v,_ = dtrtrs(LL.T,b,lower=False)
|
||||
# vvt = np.einsum('md,od->mo',v,v)
|
||||
# LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right')
|
||||
#
|
||||
v_gpu = self.gpuCache['v_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, v_gpu.size, b_gpu.gpudata, 1, v_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing)
|
||||
vvt_gpu = self.gpuCache['vvt_gpu']
|
||||
cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, output_dim, np.float64(1.0), v_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing, np.float64(0.), vvt_gpu.gpudata, num_inducing)
|
||||
LmInvPsi2LmInvT_gpu = self.gpuCache['KmmInvPsi2LLInvT_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, LmInvPsi2LmInvT_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing)
|
||||
#tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1)
|
||||
tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get())
|
||||
# print np.abs(vvt-vvt_gpu.get()).max()
|
||||
# print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT)
|
||||
|
||||
# Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T
|
||||
# LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0]
|
||||
# KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0]
|
||||
# KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T
|
||||
#
|
||||
KmmInvPsi2LLInvT_gpu = LmInvPsi2LmInvT_gpu # Reuse GPU memory (size:MxM)
|
||||
cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, KmmInvPsi2LLInvT_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing)
|
||||
KmmInvPsi2P_gpu = self.gpuCache['KmmInvPsi2P_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1)
|
||||
cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing)
|
||||
# print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max()
|
||||
|
||||
# dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2
|
||||
#
|
||||
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
|
||||
cublas.cublasDcopy(self.cublas_handle, vvt_gpu.size, vvt_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
|
||||
cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1)
|
||||
cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1)
|
||||
# print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max()
|
||||
|
||||
#======================================================================
|
||||
# Compute log-likelihood
|
||||
#======================================================================
|
||||
if het_noise:
|
||||
logL_R = -np.log(beta).sum()
|
||||
else:
|
||||
logL_R = -num_data*np.log(beta)
|
||||
# logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum())
|
||||
|
||||
logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get())
|
||||
logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get())
|
||||
logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda)
|
||||
# print np.abs(logL_old - logL)
|
||||
|
||||
#======================================================================
|
||||
# Compute dL_dKmm
|
||||
#======================================================================
|
||||
|
||||
# dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2.
|
||||
#
|
||||
dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu']
|
||||
cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, num_inducing, np.float64(1.0), KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, np.float64(0.), dL_dKmm_gpu.gpudata, num_inducing)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dKmm_gpu.size, np.float64(1./output_dim), vvt_gpu.gpudata, 1, dL_dKmm_gpu.gpudata, 1)
|
||||
cublas.cublasDscal(self.cublas_handle, dL_dKmm_gpu.size, np.float64(-output_dim/2.), dL_dKmm_gpu.gpudata, 1)
|
||||
# print np.abs(dL_dKmm - dL_dKmm_gpu.get()).max()
|
||||
|
||||
#======================================================================
|
||||
# Compute the Posterior distribution of inducing points p(u|Y)
|
||||
#======================================================================
|
||||
|
||||
post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get())
|
||||
|
||||
return logL, dL_dKmm_gpu.get(), post
|
||||
|
||||
def inference_minibatch(self, kern, X, Z, likelihood, Y):
|
||||
"""
|
||||
The second phase of inference: Computing the derivatives over a minibatch of Y
|
||||
Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL
|
||||
return a flag showing whether it reached the end of Y (isEnd)
|
||||
"""
|
||||
|
||||
num_data, output_dim = Y.shape
|
||||
num_inducing = Z.shape[0]
|
||||
|
||||
if isinstance(X, VariationalPosterior):
|
||||
uncertain_inputs = True
|
||||
else:
|
||||
uncertain_inputs = False
|
||||
|
||||
beta = 1./np.fmax(likelihood.variance, 1e-6)
|
||||
het_noise = beta.size > 1
|
||||
|
||||
n_start = self.batch_pos
|
||||
n_end = min(self.batchsize+n_start, num_data)
|
||||
if n_end==num_data:
|
||||
isEnd = True
|
||||
self.batch_pos = 0
|
||||
else:
|
||||
isEnd = False
|
||||
self.batch_pos = n_end
|
||||
|
||||
nSlice = n_end-n_start
|
||||
X_slice = X[n_start:n_end]
|
||||
|
||||
if kern.useGPU:
|
||||
if uncertain_inputs:
|
||||
psi0p_gpu = kern.psi0(Z, X_slice)
|
||||
psi1p_gpu = kern.psi1(Z, X_slice)
|
||||
psi2p_gpu = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0p_gpu = kern.Kdiag(X_slice)
|
||||
psi1p_gpu = kern.K(X_slice, Z)
|
||||
psi2p_gpu = self.gpuCache['psi2p_gpu']
|
||||
if psi2p_gpu.shape[0] > nSlice:
|
||||
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
else:
|
||||
if uncertain_inputs:
|
||||
psi0 = kern.psi0(Z, X_slice)
|
||||
psi1 = kern.psi1(Z, X_slice)
|
||||
psi2 = kern.psi2(Z, X_slice)
|
||||
else:
|
||||
psi0 = kern.Kdiag(X_slice)
|
||||
psi1 = kern.K(X_slice, Z)
|
||||
|
||||
psi0p_gpu = self.gpuCache['psi0p_gpu']
|
||||
psi1p_gpu = self.gpuCache['psi1p_gpu']
|
||||
psi2p_gpu = self.gpuCache['psi2p_gpu']
|
||||
if psi0p_gpu.shape[0] > nSlice:
|
||||
psi0p_gpu = psi0p_gpu[:nSlice]
|
||||
psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
|
||||
psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
psi0p_gpu.set(np.asfortranarray(psi0))
|
||||
psi1p_gpu.set(np.asfortranarray(psi1))
|
||||
if uncertain_inputs:
|
||||
psi2p_gpu.set(np.asfortranarray(psi2))
|
||||
|
||||
#======================================================================
|
||||
# Prepare gpu memory
|
||||
#======================================================================
|
||||
|
||||
dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu']
|
||||
v_gpu = self.gpuCache['v_gpu']
|
||||
betaYT_gpu = self.gpuCache['betaYT_gpu']
|
||||
beta_gpu = self.gpuCache['beta_gpu']
|
||||
dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu']
|
||||
dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu']
|
||||
dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu']
|
||||
dL_dthetaL_gpu = self.gpuCache['dL_dthetaL_gpu']
|
||||
psi2R_gpu = self.gpuCache['psi2_t_gpu'][:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
betapsi1_gpu = self.gpuCache['betapsi1_gpu']
|
||||
thetaL_t_gpu = self.gpuCache['thetaL_t_gpu']
|
||||
betaYT2_gpu = self.gpuCache['betaYT2_gpu']
|
||||
|
||||
betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end]
|
||||
beta_gpu_slice = beta_gpu[n_start:n_end]
|
||||
|
||||
# Adjust to the batch size
|
||||
if dL_dpsi0_gpu.shape[0] > nSlice:
|
||||
betaYT2_gpu = betaYT2_gpu[:,:nSlice]
|
||||
dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice]
|
||||
dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
|
||||
dL_dpsi2_gpu = dL_dpsi2_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
dL_dthetaL_gpu = dL_dthetaL_gpu.ravel()[:nSlice]
|
||||
psi2R_gpu = psi2R_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing)
|
||||
thetaL_t_gpu = thetaL_t_gpu.ravel()[:nSlice]
|
||||
betapsi1_gpu = betapsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing)
|
||||
|
||||
mul_bcast(betapsi1_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size)
|
||||
|
||||
#======================================================================
|
||||
# Compute dL_dpsi
|
||||
#======================================================================
|
||||
|
||||
dL_dpsi0_gpu.fill(0.)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dpsi0_gpu.size, output_dim/(-2.), beta_gpu_slice.gpudata, 1, dL_dpsi0_gpu.gpudata, 1)
|
||||
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice)
|
||||
|
||||
if uncertain_inputs:
|
||||
outer_prod(dL_dpsi2_gpu,beta_gpu_slice,dL_dpsi2R_gpu,beta_gpu_slice.size)
|
||||
else:
|
||||
cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, 1.0, betapsi1_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice)
|
||||
|
||||
#======================================================================
|
||||
# Compute dL_dthetaL
|
||||
#======================================================================
|
||||
|
||||
if not uncertain_inputs:
|
||||
join_prod(psi2p_gpu,psi1p_gpu,psi1p_gpu,nSlice,num_inducing)
|
||||
|
||||
mul_bcast_first(psi2R_gpu,dL_dpsi2R_gpu,psi2p_gpu,nSlice)
|
||||
|
||||
|
||||
dL_dthetaL_gpu.fill(0.)
|
||||
|
||||
cublas.cublasDcopy(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata, 1, betaYT2_gpu.gpudata, 1)
|
||||
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT2_gpu,betaYT2_gpu.size)
|
||||
cublas.cublasDscal(self.cublas_handle, betaYT2_gpu.size, 0.5, betaYT2_gpu.gpudata, 1)
|
||||
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
|
||||
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/(-2.0), beta_gpu_slice.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
|
||||
cublas.cublasDcopy(self.cublas_handle, beta_gpu_slice.size, beta_gpu_slice.gpudata, 1, thetaL_t_gpu.gpudata, 1)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu.size)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,psi0p_gpu,thetaL_t_gpu.size)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/2.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
|
||||
|
||||
thetaL_t_gpu.fill(0.)
|
||||
sum_axis(thetaL_t_gpu, psi2R_gpu, nSlice, num_inducing*num_inducing)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
|
||||
mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size)
|
||||
cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, -1.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1)
|
||||
|
||||
cublas.cublasDgemm(self.cublas_handle, 'T', 'T', output_dim, nSlice, num_inducing, -1.0, v_gpu.gpudata, num_inducing, betapsi1_gpu.gpudata, nSlice, 0.0, betaYT2_gpu.gpudata, output_dim)
|
||||
mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT_gpu_slice,betaYT2_gpu.size)
|
||||
sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim)
|
||||
|
||||
if kern.useGPU:
|
||||
dL_dpsi0 = dL_dpsi0_gpu
|
||||
dL_dpsi1 = dL_dpsi1_gpu
|
||||
else:
|
||||
dL_dpsi0 = dL_dpsi0_gpu.get()
|
||||
dL_dpsi1 = dL_dpsi1_gpu.get()
|
||||
if uncertain_inputs:
|
||||
if kern.useGPU:
|
||||
dL_dpsi2 = dL_dpsi2_gpu
|
||||
else:
|
||||
dL_dpsi2 = dL_dpsi2_gpu.get()
|
||||
if het_noise:
|
||||
dL_dthetaL = dL_dthetaL_gpu.get()
|
||||
else:
|
||||
dL_dthetaL = gpuarray.sum(dL_dthetaL_gpu).get()
|
||||
if uncertain_inputs:
|
||||
grad_dict = {'dL_dpsi0':dL_dpsi0,
|
||||
'dL_dpsi1':dL_dpsi1,
|
||||
'dL_dpsi2':dL_dpsi2,
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
else:
|
||||
grad_dict = {'dL_dKdiag':dL_dpsi0,
|
||||
'dL_dKnm':dL_dpsi1,
|
||||
'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
return isEnd, (n_start,n_end), grad_dict
|
||||
|
||||
|
|
@ -279,4 +279,55 @@ class VarDTC_minibatch(object):
|
|||
'dL_dthetaL':dL_dthetaL}
|
||||
|
||||
return isEnd, (n_start,n_end), grad_dict
|
||||
|
||||
|
||||
def update_gradients(model):
|
||||
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
|
||||
het_noise = model.likelihood.variance.size > 1
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL = np.empty((model.Y.shape[0],))
|
||||
else:
|
||||
dL_dthetaL = 0
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
|
||||
kern_grad = model.kern.gradient.copy()
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
isEnd = False
|
||||
while not isEnd:
|
||||
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
X_slice = model.X[n_range[0]:n_range[1]]
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
kern_grad += model.kern.gradient
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice)
|
||||
|
||||
#gradients w.r.t. posterior parameters of X
|
||||
X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
model.set_X_gradients(X_slice, X_grad)
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
|
||||
else:
|
||||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Set the gradients w.r.t. kernel
|
||||
model.kern.gradient = kern_grad
|
||||
|
||||
# Update Log-likelihood
|
||||
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
|
||||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(model.X)
|
||||
|
||||
# dL_dthetaL
|
||||
model.likelihood.update_gradients(dL_dthetaL)
|
||||
|
|
|
|||
|
|
@ -13,9 +13,10 @@ class Kern(Parameterized):
|
|||
#===========================================================================
|
||||
# This adds input slice support. The rather ugly code for slicing can be
|
||||
# found in kernel_slice_operations
|
||||
__metaclass__ = KernCallsViaSlicerMeta
|
||||
#__metaclass__ = KernCallsViaSlicerMeta
|
||||
#===========================================================================
|
||||
def __init__(self, input_dim, active_dims, name, *a, **kw):
|
||||
_support_GPU=False
|
||||
def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw):
|
||||
"""
|
||||
The base class for a kernel: a positive definite function
|
||||
which forms of a covariance function (kernel).
|
||||
|
|
@ -39,6 +40,8 @@ class Kern(Parameterized):
|
|||
active_dim_size = len(self.active_dims)
|
||||
assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims)
|
||||
self._sliced_X = 0
|
||||
|
||||
self.useGPU = self._support_GPU and useGPU
|
||||
|
||||
@Cache_this(limit=10)
|
||||
def _slice_X(self, X):
|
||||
|
|
|
|||
535
GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py
Normal file
535
GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py
Normal file
|
|
@ -0,0 +1,535 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
"""
|
||||
The package for the psi statistics computation on GPU
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from GPy.util.caching import Cache_this
|
||||
|
||||
from ....util import gpu_init
|
||||
|
||||
try:
|
||||
import pycuda.gpuarray as gpuarray
|
||||
from scikits.cuda import cublas
|
||||
from pycuda.reduction import ReductionKernel
|
||||
from pycuda.elementwise import ElementwiseKernel
|
||||
from ....util import linalg_gpu
|
||||
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_psi1 = ElementwiseKernel(
|
||||
"double *psi1, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
|
||||
"psi1[i] = comp_psi1_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
|
||||
"comp_psi1",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_psi1_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
int n = idx%N;
|
||||
int m = idx/N;
|
||||
double psi1_exp=0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)] + muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
|
||||
psi1_exp += LOGEXPSUM(exp1,exp2);
|
||||
}
|
||||
return var*exp(psi1_exp);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi2 het_noise
|
||||
comp_psi2 = ElementwiseKernel(
|
||||
"double *psi2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
|
||||
"psi2[i] = comp_psi2_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
|
||||
"comp_psi2",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_psi2_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
// psi2 (n,m1,m2)
|
||||
int m2 = idx/(M*N);
|
||||
int m1 = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double psi2_exp=0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
|
||||
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
|
||||
psi2_exp += LOGEXPSUM(exp1,exp2);
|
||||
}
|
||||
return var*var*exp(psi2_exp);
|
||||
}
|
||||
""")
|
||||
|
||||
# compute psidenom
|
||||
comp_logpsidenom = ElementwiseKernel(
|
||||
"double *out, double *S, double *l, double scale, int N",
|
||||
"out[i] = comp_logpsidenom_element(S, l, scale, N, i)",
|
||||
"comp_logpsidenom",
|
||||
preamble="""
|
||||
__device__ double comp_logpsidenom_element(double *S, double *l, double scale, int N, int idx)
|
||||
{
|
||||
int q = idx/N;
|
||||
|
||||
return log(scale*S[idx]/l[q]+1.0);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_dpsi1_dvar = ElementwiseKernel(
|
||||
"double *dpsi1_dvar, double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q",
|
||||
"dpsi1_dvar[i] = comp_dpsi1_dvar_element(psi1_neq, psi1exp1, psi1exp2, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)",
|
||||
"comp_dpsi1_dvar",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_dpsi1_dvar_element(double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
int n = idx%N;
|
||||
int m = idx/N;
|
||||
|
||||
double psi1_sum = 0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double muZ = mu[IDX_NQ(n,q)]-Z[IDX_MQ(m,q)];
|
||||
double exp1_e = -(muZ*muZ/(S[IDX_NQ(n,q)]+l[q]) )/2.0;
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi1denom[IDX_NQ(n,q)])/2.0 + exp1_e;
|
||||
double exp2_e = - Z[IDX_MQ(m,q)]*Z[IDX_MQ(m,q)]/(l[q]*2.0);
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
|
||||
double psi1_q = LOGEXPSUM(exp1,exp2);
|
||||
psi1_neq[IDX_NMQ(n,m,q)] = -psi1_q;
|
||||
psi1exp1[IDX_NMQ(n,m,q)] = exp(exp1_e);
|
||||
psi1exp2[IDX_MQ(m,q)] = exp(exp2_e);
|
||||
psi1_sum += psi1_q;
|
||||
}
|
||||
for(int q=0;q<Q;q++) {
|
||||
psi1_neq[IDX_NMQ(n,m,q)] = exp(psi1_neq[IDX_NMQ(n,m,q)]+psi1_sum);
|
||||
}
|
||||
return exp(psi1_sum);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_psi1_der = ElementwiseKernel(
|
||||
"double *dpsi1_dl, double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
|
||||
"dpsi1_dl[i] = comp_psi1_der_element(dpsi1_dmu, dpsi1_dS, dpsi1_dgamma, dpsi1_dZ, psi1_neq, psi1exp1, psi1exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
|
||||
"comp_psi1_der",
|
||||
preamble="""
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
|
||||
__device__ double comp_psi1_der_element(double *dpsi1_dmu, double *dpsi1_dS, double *dpsi1_dgamma, double *dpsi1_dZ, double *psi1_neq, double *psi1exp1, double *psi1exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
|
||||
{
|
||||
int q = idx/(M*N);
|
||||
int m = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double neq = psi1_neq[IDX_NMQ(n,m,q)];
|
||||
double gamma_c = gamma[IDX_NQ(n,q)];
|
||||
double Z_c = Z[IDX_MQ(m,q)];
|
||||
double S_c = S[IDX_NQ(n,q)];
|
||||
double l_c = l[q];
|
||||
double l_sqrt_c = sqrt(l[q]);
|
||||
double psi1exp1_c = psi1exp1[IDX_NMQ(n,m,q)];
|
||||
double psi1exp2_c = psi1exp2[IDX_MQ(m,q)];
|
||||
|
||||
double denom = S_c/l_c+1.0;
|
||||
double denom_sqrt = sqrt(denom);
|
||||
double Zmu = Z_c-mu[IDX_NQ(n,q)];
|
||||
double psi1_common = gamma_c/(denom_sqrt*denom*l_c);
|
||||
double gamma1 = 1-gamma_c;
|
||||
|
||||
dpsi1_dgamma[IDX_NMQ(n,m,q)] = var*neq*(psi1exp1_c/denom_sqrt - psi1exp2_c);
|
||||
dpsi1_dmu[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*Zmu*psi1exp1_c);
|
||||
dpsi1_dS[IDX_NMQ(n,m,q)] = var*neq*(psi1_common*(Zmu*Zmu/(S_c+l_c)-1.0)*psi1exp1_c)/2.0;
|
||||
dpsi1_dZ[IDX_NMQ(n,m,q)] = var*neq*(-psi1_common*Zmu*psi1exp1_c-gamma1*Z_c/l_c*psi1exp2_c);
|
||||
return var*neq*(psi1_common*(S_c/l_c+Zmu*Zmu/(S_c+l_c))*psi1exp1_c+gamma1*Z_c*Z_c/l_c*psi1exp2_c)*l_sqrt_c;
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_dpsi2_dvar = ElementwiseKernel(
|
||||
"double *dpsi2_dvar, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q",
|
||||
"dpsi2_dvar[i] = comp_dpsi2_dvar_element(psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, logGamma, log1Gamma, logpsi2denom, N, M, Q, i)",
|
||||
"comp_dpsi2_dvar",
|
||||
preamble="""
|
||||
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
|
||||
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
#define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b)))
|
||||
|
||||
__device__ double comp_dpsi2_dvar_element(double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx)
|
||||
{
|
||||
// psi2 (n,m1,m2)
|
||||
int m2 = idx/(M*N);
|
||||
int m1 = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double psi2_sum=0;
|
||||
for(int q=0;q<Q;q++){
|
||||
double dZ = Z[IDX_MQ(m1,q)]-Z[IDX_MQ(m2,q)];
|
||||
double muZ = mu[IDX_NQ(n,q)] - (Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)])/2.0;
|
||||
double exp1_e = - dZ*dZ/(l[q]*4.0) - muZ*muZ/(2*S[IDX_NQ(n,q)]+l[q]);
|
||||
double exp1 = logGamma[IDX_NQ(n,q)] - (logpsi2denom[IDX_NQ(n,q)])/2.0 +exp1_e;
|
||||
double exp2_e = - (Z[IDX_MQ(m1,q)]*Z[IDX_MQ(m1,q)]+Z[IDX_MQ(m2,q)]*Z[IDX_MQ(m2,q)])/(l[q]*2.0);
|
||||
double exp2 = log1Gamma[IDX_NQ(n,q)] + exp2_e;
|
||||
double psi2_q = LOGEXPSUM(exp1,exp2);
|
||||
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = -psi2_q;
|
||||
psi2exp1[IDX_NMMQ(n,m1,m2,q)] = exp(exp1_e);
|
||||
psi2exp2[IDX_MMQ(m1,m2,q)] = exp(exp2_e);
|
||||
psi2_sum += psi2_q;
|
||||
}
|
||||
for(int q=0;q<Q;q++) {
|
||||
psi2_neq[IDX_NMMQ(n,m1,m2,q)] = exp(psi2_neq[IDX_NMMQ(n,m1,m2,q)]+psi2_sum);
|
||||
}
|
||||
return 2*var*exp(psi2_sum);
|
||||
}
|
||||
""")
|
||||
|
||||
# The kernel form computing psi1 het_noise
|
||||
comp_psi2_der = ElementwiseKernel(
|
||||
"double *dpsi2_dl, double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q",
|
||||
"dpsi2_dl[i] = comp_psi2_der_element(dpsi2_dmu, dpsi2_dS, dpsi2_dgamma, dpsi2_dZ, psi2_neq, psi2exp1, psi2exp2, var, l, Z, mu, S, gamma, N, M, Q, i)",
|
||||
"comp_psi2_der",
|
||||
preamble="""
|
||||
#define IDX_NMMQ(n,m1,m2,q) (((q*M+m2)*M+m1)*N+n)
|
||||
#define IDX_MMQ(m1,m2,q) ((q*M+m2)*M+m1)
|
||||
#define IDX_NMQ(n,m,q) ((q*M+m)*N+n)
|
||||
#define IDX_NQ(n,q) (q*N+n)
|
||||
#define IDX_MQ(m,q) (q*M+m)
|
||||
|
||||
__device__ double comp_psi2_der_element(double *dpsi2_dmu, double *dpsi2_dS, double *dpsi2_dgamma, double *dpsi2_dZ, double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *gamma, int N, int M, int Q, int idx)
|
||||
{
|
||||
// dpsi2 (n,m1,m2,q)
|
||||
int q = idx/(M*M*N);
|
||||
int m2 = (idx%(M*M*N))/(M*N);
|
||||
int m1 = (idx%(M*N))/N;
|
||||
int n = idx%N;
|
||||
|
||||
double neq = psi2_neq[IDX_NMMQ(n,m1,m2,q)];
|
||||
double gamma_c = gamma[IDX_NQ(n,q)];
|
||||
double Z1_c = Z[IDX_MQ(m1,q)];
|
||||
double Z2_c = Z[IDX_MQ(m2,q)];
|
||||
double S_c = S[IDX_NQ(n,q)];
|
||||
double l_c = l[q];
|
||||
double l_sqrt_c = sqrt(l[q]);
|
||||
double psi2exp1_c = psi2exp1[IDX_NMMQ(n,m1,m2,q)];
|
||||
double psi2exp2_c = psi2exp2[IDX_MMQ(m1,m2,q)];
|
||||
|
||||
double dZ = Z2_c - Z1_c;
|
||||
double muZ = mu[IDX_NQ(n,q)] - (Z1_c+Z2_c)/2.0;
|
||||
double Z2 = Z1_c*Z1_c+Z2_c*Z2_c;
|
||||
double denom = 2.0*S_c/l_c+1.0;
|
||||
double denom_sqrt = sqrt(denom);
|
||||
double psi2_common = gamma_c/(denom_sqrt*denom*l_c);
|
||||
double gamma1 = 1-gamma_c;
|
||||
double var2 = var*var;
|
||||
|
||||
dpsi2_dgamma[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2exp1_c/denom_sqrt - psi2exp2_c);
|
||||
dpsi2_dmu[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(-2.0*psi2_common*muZ*psi2exp1_c);
|
||||
dpsi2_dS[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(2.0*muZ*muZ/(2.0*S_c+l_c)-1.0)*psi2exp1_c);
|
||||
dpsi2_dZ[IDX_NMMQ(n,m1,m2,q)] = var2*neq*(psi2_common*(dZ*denom/-2.0+muZ)*psi2exp1_c-gamma1*Z2_c/l_c*psi2exp2_c)*2.0;
|
||||
return var2*neq*(psi2_common*(S_c/l_c+dZ*dZ*denom/(4.0*l_c)+muZ*muZ/(2.0*S_c+l_c))*psi2exp1_c+gamma1*Z2/(2.0*l_c)*psi2exp2_c)*l_sqrt_c*2.0;
|
||||
}
|
||||
""")
|
||||
|
||||
except:
|
||||
pass
|
||||
|
||||
class PSICOMP_SSRBF(object):
|
||||
def __init__(self):
|
||||
assert gpu_init.initSuccess, "GPU initialization failed!"
|
||||
self.cublas_handle = gpu_init.cublas_handle
|
||||
self.gpuCache = None
|
||||
self.gpuCacheAll = None
|
||||
|
||||
def _initGPUCache(self, N, M, Q):
|
||||
if self.gpuCache!=None and self.gpuCache['mu_gpu'].shape[0] == N:
|
||||
return
|
||||
|
||||
if self.gpuCacheAll!=None and self.gpuCacheAll['mu_gpu'].shape[0]<N: # Too small cache -> reallocate
|
||||
self._releaseMemory()
|
||||
|
||||
if self.gpuCacheAll == None:
|
||||
self.gpuCacheAll = {
|
||||
'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||
'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
||||
'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'logGamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'log1Gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'logpsi1denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'logpsi2denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'),
|
||||
'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'),
|
||||
'psi2_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'),
|
||||
# derivatives psi1
|
||||
'psi1_neq_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'psi1exp1_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'psi1exp2_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dvar_gpu' :gpuarray.empty((N,M),np.float64, order='F'),
|
||||
'dpsi1_dl_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dZ_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dgamma_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dmu_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
'dpsi1_dS_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'),
|
||||
# derivatives psi2
|
||||
'psi2_neq_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'psi2exp1_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'psi2exp2_gpu' :gpuarray.empty((M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dvar_gpu' :gpuarray.empty((N,M,M),np.float64, order='F'),
|
||||
'dpsi2_dl_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dZ_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dgamma_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dmu_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
'dpsi2_dS_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'),
|
||||
# gradients
|
||||
'grad_l_gpu' :gpuarray.empty((Q,),np.float64,order='F'),
|
||||
'grad_Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'),
|
||||
'grad_mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'grad_S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
'grad_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'),
|
||||
}
|
||||
self.gpuCache = self.gpuCacheAll
|
||||
elif self.gpuCacheAll['mu_gpu'].shape[0]==N:
|
||||
self.gpuCache = self.gpuCacheAll
|
||||
else:
|
||||
# remap to a smaller cache
|
||||
self.gpuCache = self.gpuCacheAll.copy()
|
||||
Nlist=['mu_gpu','S_gpu','gamma_gpu','logGamma_gpu','log1Gamma_gpu','logpsi1denom_gpu','logpsi2denom_gpu','psi0_gpu','psi1_gpu','psi2_gpu',
|
||||
'psi1_neq_gpu','psi1exp1_gpu','psi1exp2_gpu','dpsi1_dvar_gpu','dpsi1_dl_gpu','dpsi1_dZ_gpu','dpsi1_dgamma_gpu','dpsi1_dmu_gpu',
|
||||
'dpsi1_dS_gpu','psi2_neq_gpu','psi2exp1_gpu','dpsi2_dvar_gpu','dpsi2_dl_gpu','dpsi2_dZ_gpu','dpsi2_dgamma_gpu','dpsi2_dmu_gpu','dpsi2_dS_gpu','grad_mu_gpu','grad_S_gpu','grad_gamma_gpu',]
|
||||
oldN = self.gpuCacheAll['mu_gpu'].shape[0]
|
||||
for v in Nlist:
|
||||
u = self.gpuCacheAll[v]
|
||||
self.gpuCache[v] = u.ravel()[:u.size/oldN*N].reshape(*((N,)+u.shape[1:]))
|
||||
|
||||
def _releaseMemory(self):
|
||||
if self.gpuCacheAll!=None:
|
||||
[v.gpudata.free() for v in self.gpuCacheAll.values()]
|
||||
self.gpuCacheAll = None
|
||||
self.gpuCache = None
|
||||
|
||||
def estimateMemoryOccupation(self, N, M, Q):
|
||||
"""
|
||||
Estimate the best batch size.
|
||||
N - the number of total datapoints
|
||||
M - the number of inducing points
|
||||
Q - the number of hidden (input) dimensions
|
||||
return: the constant memory size, the memory occupation of batchsize=1
|
||||
unit: GB
|
||||
"""
|
||||
return (2.*Q+2.*M*Q+M*M*Q)*8./1024./1024./1024., (1.+2.*M+10.*Q+2.*M*M+8.*M*Q+7.*M*M*Q)*8./1024./1024./1024.
|
||||
|
||||
@Cache_this(limit=1,ignore_args=(0,))
|
||||
def psicomputations(self, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""Compute Psi statitsitcs"""
|
||||
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1:
|
||||
ARD = True
|
||||
else:
|
||||
ARD = False
|
||||
|
||||
N = mu.shape[0]
|
||||
M = Z.shape[0]
|
||||
Q = mu.shape[1]
|
||||
|
||||
self._initGPUCache(N,M,Q)
|
||||
l_gpu = self.gpuCache['l_gpu']
|
||||
Z_gpu = self.gpuCache['Z_gpu']
|
||||
mu_gpu = self.gpuCache['mu_gpu']
|
||||
S_gpu = self.gpuCache['S_gpu']
|
||||
gamma_gpu = self.gpuCache['gamma_gpu']
|
||||
logGamma_gpu = self.gpuCache['logGamma_gpu']
|
||||
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu']
|
||||
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu']
|
||||
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu']
|
||||
psi0_gpu = self.gpuCache['psi0_gpu']
|
||||
psi1_gpu = self.gpuCache['psi1_gpu']
|
||||
psi2_gpu = self.gpuCache['psi2_gpu']
|
||||
|
||||
if ARD:
|
||||
l_gpu.set(np.asfortranarray(lengthscale**2))
|
||||
else:
|
||||
l_gpu.fill(lengthscale*lengthscale)
|
||||
Z_gpu.set(np.asfortranarray(Z))
|
||||
mu_gpu.set(np.asfortranarray(mu))
|
||||
S_gpu.set(np.asfortranarray(S))
|
||||
gamma_gpu.set(np.asfortranarray(gamma))
|
||||
linalg_gpu.log(gamma_gpu,logGamma_gpu)
|
||||
linalg_gpu.logOne(gamma_gpu,log1Gamma_gpu)
|
||||
comp_logpsidenom(logpsi1denom_gpu, S_gpu,l_gpu,1.0,N)
|
||||
comp_logpsidenom(logpsi2denom_gpu, S_gpu,l_gpu,2.0,N)
|
||||
|
||||
psi0_gpu.fill(variance)
|
||||
comp_psi1(psi1_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
|
||||
comp_psi2(psi2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
|
||||
|
||||
return psi0_gpu, psi1_gpu, psi2_gpu
|
||||
|
||||
@Cache_this(limit=1,ignore_args=(0,))
|
||||
def _psiDercomputations(self, variance, lengthscale, Z, mu, S, gamma):
|
||||
"""Compute the derivatives w.r.t. Psi statistics"""
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
self._initGPUCache(N,M,Q)
|
||||
l_gpu = self.gpuCache['l_gpu']
|
||||
Z_gpu = self.gpuCache['Z_gpu']
|
||||
mu_gpu = self.gpuCache['mu_gpu']
|
||||
S_gpu = self.gpuCache['S_gpu']
|
||||
gamma_gpu = self.gpuCache['gamma_gpu']
|
||||
logGamma_gpu = self.gpuCache['logGamma_gpu']
|
||||
log1Gamma_gpu = self.gpuCache['log1Gamma_gpu']
|
||||
logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu']
|
||||
logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu']
|
||||
|
||||
psi1_neq_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi1exp1_gpu = self.gpuCache['psi1exp1_gpu']
|
||||
psi1exp2_gpu = self.gpuCache['psi1exp2_gpu']
|
||||
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
|
||||
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
|
||||
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
|
||||
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
|
||||
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
|
||||
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
|
||||
|
||||
psi2_neq_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
psi2exp1_gpu = self.gpuCache['psi2exp1_gpu']
|
||||
psi2exp2_gpu = self.gpuCache['psi2exp2_gpu']
|
||||
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
|
||||
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
|
||||
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
|
||||
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
|
||||
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
|
||||
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
|
||||
|
||||
#==========================================================================================================
|
||||
# Assuming the l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, logGamma_gpu, log1Gamma_gpu,
|
||||
# logpsi1denom_gpu, logpsi2denom_gpu has been synchonized.
|
||||
#==========================================================================================================
|
||||
|
||||
# psi1 derivatives
|
||||
comp_dpsi1_dvar(dpsi1_dvar_gpu, psi1_neq_gpu, psi1exp1_gpu,psi1exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q)
|
||||
comp_psi1_der(dpsi1_dl_gpu,dpsi1_dmu_gpu,dpsi1_dS_gpu,dpsi1_dgamma_gpu, dpsi1_dZ_gpu, psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
|
||||
|
||||
# psi2 derivatives
|
||||
comp_dpsi2_dvar(dpsi2_dvar_gpu, psi2_neq_gpu, psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q)
|
||||
comp_psi2_der(dpsi2_dl_gpu,dpsi2_dmu_gpu,dpsi2_dS_gpu,dpsi2_dgamma_gpu, dpsi2_dZ_gpu, psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q)
|
||||
|
||||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1:
|
||||
ARD = True
|
||||
else:
|
||||
ARD = False
|
||||
|
||||
dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu']
|
||||
dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu']
|
||||
dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu']
|
||||
dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu']
|
||||
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
grad_l_gpu = self.gpuCache['grad_l_gpu']
|
||||
|
||||
# variance
|
||||
variance.gradient = gpuarray.sum(dL_dpsi0).get() \
|
||||
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi1.size, dL_dpsi1.gpudata, 1, dpsi1_dvar_gpu.gpudata, 1) \
|
||||
+ cublas.cublasDdot(self.cublas_handle, dL_dpsi2.size, dL_dpsi2.gpudata, 1, dpsi2_dvar_gpu.gpudata, 1)
|
||||
|
||||
# lengscale
|
||||
if ARD:
|
||||
grad_l_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_l_gpu, psi1_comb_gpu, 1, N*M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_l_gpu, psi2_comb_gpu, 1, N*M*M)
|
||||
lengthscale.gradient = grad_l_gpu.get()
|
||||
else:
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size)
|
||||
lengthscale.gradient = gpuarray.sum(psi1_comb_gpu).get() + gpuarray.sum(psi2_comb_gpu).get()
|
||||
|
||||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu']
|
||||
dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu']
|
||||
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
grad_Z_gpu = self.gpuCache['grad_Z_gpu']
|
||||
|
||||
grad_Z_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dZ_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_Z_gpu, psi1_comb_gpu, 1, N)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dZ_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_Z_gpu, psi2_comb_gpu, 1, N*M)
|
||||
return grad_Z_gpu.get()
|
||||
|
||||
def gradients_qX_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior):
|
||||
mu = variational_posterior.mean
|
||||
S = variational_posterior.variance
|
||||
gamma = variational_posterior.binary_prob
|
||||
self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma)
|
||||
N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1]
|
||||
|
||||
dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu']
|
||||
dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu']
|
||||
dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu']
|
||||
dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu']
|
||||
dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu']
|
||||
dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu']
|
||||
psi1_comb_gpu = self.gpuCache['psi1_neq_gpu']
|
||||
psi2_comb_gpu = self.gpuCache['psi2_neq_gpu']
|
||||
grad_mu_gpu = self.gpuCache['grad_mu_gpu']
|
||||
grad_S_gpu = self.gpuCache['grad_S_gpu']
|
||||
grad_gamma_gpu = self.gpuCache['grad_gamma_gpu']
|
||||
|
||||
# mu gradients
|
||||
grad_mu_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dmu_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_mu_gpu, psi1_comb_gpu, N, M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dmu_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_mu_gpu, psi2_comb_gpu, N, M*M)
|
||||
|
||||
# S gradients
|
||||
grad_S_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dS_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_S_gpu, psi1_comb_gpu, N, M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dS_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_S_gpu, psi2_comb_gpu, N, M*M)
|
||||
|
||||
# gamma gradients
|
||||
grad_gamma_gpu.fill(0.)
|
||||
linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dgamma_gpu, dL_dpsi1.size)
|
||||
linalg_gpu.sum_axis(grad_gamma_gpu, psi1_comb_gpu, N, M)
|
||||
linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dgamma_gpu, dL_dpsi2.size)
|
||||
linalg_gpu.sum_axis(grad_gamma_gpu, psi2_comb_gpu, N, M*M)
|
||||
|
||||
return grad_mu_gpu.get(), grad_S_gpu.get(), grad_gamma_gpu.get()
|
||||
|
|
@ -9,6 +9,7 @@ from stationary import Stationary
|
|||
from GPy.util.caching import Cache_this
|
||||
from ...core.parameterization import variational
|
||||
from psi_comp import ssrbf_psi_comp
|
||||
from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF
|
||||
|
||||
class RBF(Stationary):
|
||||
"""
|
||||
|
|
@ -19,9 +20,15 @@ class RBF(Stationary):
|
|||
k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg)
|
||||
|
||||
"""
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'):
|
||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name)
|
||||
_support_GPU = True
|
||||
def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False):
|
||||
super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU)
|
||||
self.weave_options = {}
|
||||
self.group_spike_prob = False
|
||||
|
||||
if self.useGPU:
|
||||
self.psicomp = PSICOMP_SSRBF()
|
||||
|
||||
|
||||
def K_of_r(self, r):
|
||||
return self.variance * np.exp(-0.5 * r**2)
|
||||
|
|
@ -34,18 +41,28 @@ class RBF(Stationary):
|
|||
#---------------------------------------#
|
||||
|
||||
def psi0(self, Z, variational_posterior):
|
||||
return self.Kdiag(variational_posterior.mean)
|
||||
if self.useGPU:
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0]
|
||||
else:
|
||||
return self.Kdiag(variational_posterior.mean)
|
||||
|
||||
def psi1(self, Z, variational_posterior):
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
if self.useGPU:
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1]
|
||||
else:
|
||||
psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
else:
|
||||
_, _, _, psi1 = self._psi1computations(Z, variational_posterior)
|
||||
return psi1
|
||||
|
||||
def psi2(self, Z, variational_posterior):
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
if self.useGPU:
|
||||
return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2]
|
||||
else:
|
||||
psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
else:
|
||||
_, _, _, _, psi2 = self._psi2computations(Z, variational_posterior)
|
||||
return psi2
|
||||
|
|
@ -53,26 +70,30 @@ class RBF(Stationary):
|
|||
def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#contributions from psi0:
|
||||
self.variance.gradient = np.sum(dL_dpsi0)
|
||||
|
||||
#from psi1
|
||||
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
if self.useGPU:
|
||||
self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
|
||||
|
||||
#from psi2
|
||||
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
|
||||
|
||||
|
||||
_, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#contributions from psi0:
|
||||
self.variance.gradient = np.sum(dL_dpsi0)
|
||||
|
||||
#from psi1
|
||||
self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance)
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum()
|
||||
|
||||
#from psi2
|
||||
self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum()
|
||||
if self.ARD:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0)
|
||||
else:
|
||||
self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum()
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
l2 = self.lengthscale**2
|
||||
if l2.size != self.input_dim:
|
||||
|
|
@ -107,16 +128,19 @@ class RBF(Stationary):
|
|||
def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#psi1
|
||||
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
|
||||
|
||||
#psi2
|
||||
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
|
||||
|
||||
return grad
|
||||
if self.useGPU:
|
||||
return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
_, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#psi1
|
||||
grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0)
|
||||
|
||||
#psi2
|
||||
grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1)
|
||||
|
||||
return grad
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
l2 = self.lengthscale **2
|
||||
|
|
@ -140,22 +164,28 @@ class RBF(Stationary):
|
|||
def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior):
|
||||
# Spike-and-Slab GPLVM
|
||||
if isinstance(variational_posterior, variational.SpikeAndSlabPosterior):
|
||||
ndata = variational_posterior.mean.shape[0]
|
||||
|
||||
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#psi1
|
||||
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
|
||||
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
|
||||
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
|
||||
|
||||
#psi2
|
||||
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
if self.useGPU:
|
||||
return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior)
|
||||
else:
|
||||
ndata = variational_posterior.mean.shape[0]
|
||||
|
||||
_, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
_, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)
|
||||
|
||||
#psi1
|
||||
grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1)
|
||||
grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1)
|
||||
grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1)
|
||||
|
||||
#psi2
|
||||
grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1)
|
||||
|
||||
if self.group_spike_prob:
|
||||
grad_gamma[:] = grad_gamma.mean(axis=0)
|
||||
|
||||
return grad_mu, grad_S, grad_gamma
|
||||
|
||||
elif isinstance(variational_posterior, variational.NormalPosterior):
|
||||
|
||||
|
|
|
|||
|
|
@ -41,8 +41,8 @@ class Stationary(Kern):
|
|||
|
||||
"""
|
||||
|
||||
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name):
|
||||
super(Stationary, self).__init__(input_dim, active_dims, name)
|
||||
def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False):
|
||||
super(Stationary, self).__init__(input_dim, active_dims, name,useGPU=useGPU)
|
||||
self.ARD = ARD
|
||||
if not ARD:
|
||||
if lengthscale is None:
|
||||
|
|
|
|||
|
|
@ -15,8 +15,8 @@ except ImportError:
|
|||
if sympy_available:
|
||||
# These are likelihoods that rely on symbolic.
|
||||
from symbolic import Symbolic
|
||||
from sstudent_t import SstudentT
|
||||
#from sstudent_t import SstudentT
|
||||
from negative_binomial import Negative_binomial
|
||||
from skew_normal import Skew_normal
|
||||
from skew_exponential import Skew_exponential
|
||||
from null_category import Null_category
|
||||
#from skew_normal import Skew_normal
|
||||
#from skew_exponential import Skew_exponential
|
||||
#from null_category import Null_category
|
||||
|
|
|
|||
|
|
@ -7,7 +7,9 @@ from ..core import SparseGP
|
|||
from ..likelihoods import Gaussian
|
||||
from ..inference.optimization import SCG
|
||||
from ..util import linalg
|
||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior,VariationalPosterior
|
||||
from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
|
||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
class BayesianGPLVM(SparseGP):
|
||||
"""
|
||||
|
|
@ -65,6 +67,10 @@ class BayesianGPLVM(SparseGP):
|
|||
X.mean.gradient, X.variance.gradient = X_grad
|
||||
|
||||
def parameters_changed(self):
|
||||
if isinstance(self.inference_method, VarDTC_GPU):
|
||||
update_gradients(self)
|
||||
return
|
||||
|
||||
super(BayesianGPLVM, self).parameters_changed()
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
|
||||
|
|
@ -153,57 +159,7 @@ class BayesianGPLVM(SparseGP):
|
|||
from ..plotting.matplot_dep import dim_reduction_plots
|
||||
|
||||
return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs)
|
||||
|
||||
|
||||
def update_gradients(model):
|
||||
model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
|
||||
het_noise = model.likelihood.variance.size > 1
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL = np.empty((model.Y.shape[0],))
|
||||
else:
|
||||
dL_dthetaL = 0
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_full(dL_dKmm, model.Z, None)
|
||||
kern_grad = model.kern.gradient.copy()
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z)
|
||||
|
||||
isEnd = False
|
||||
while not isEnd:
|
||||
isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y)
|
||||
if isinstance(model.X, VariationalPosterior):
|
||||
|
||||
#gradients w.r.t. kernel
|
||||
model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
kern_grad += model.kern.gradient
|
||||
|
||||
#gradients w.r.t. Z
|
||||
model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations(
|
||||
grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]])
|
||||
|
||||
#gradients w.r.t. posterior parameters of X
|
||||
X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2'])
|
||||
model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad)
|
||||
|
||||
if het_noise:
|
||||
dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL']
|
||||
else:
|
||||
dL_dthetaL += grad_dict['dL_dthetaL']
|
||||
|
||||
# Set the gradients w.r.t. kernel
|
||||
model.kern.gradient = kern_grad
|
||||
|
||||
# Update Log-likelihood
|
||||
model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X)
|
||||
# update for the KL divergence
|
||||
model.variational_prior.update_gradients_KL(model.X)
|
||||
|
||||
# dL_dthetaL
|
||||
model.likelihood.update_gradients(dL_dthetaL)
|
||||
|
||||
def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -11,6 +11,9 @@ from ..likelihoods import Gaussian
|
|||
from ..inference.optimization import SCG
|
||||
from ..util import linalg
|
||||
from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior
|
||||
from ..inference.latent_function_inference.var_dtc_parallel import update_gradients
|
||||
from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU
|
||||
|
||||
|
||||
class SSGPLVM(SparseGP):
|
||||
"""
|
||||
|
|
@ -25,11 +28,14 @@ class SSGPLVM(SparseGP):
|
|||
|
||||
"""
|
||||
def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10,
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', **kwargs):
|
||||
Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs):
|
||||
|
||||
if X == None: # The mean of variational approximation (mu)
|
||||
if X == None:
|
||||
from ..util.initialization import initialize_latent
|
||||
X = initialize_latent(init, input_dim, Y)
|
||||
X, fracs = initialize_latent(init, input_dim, Y)
|
||||
else:
|
||||
fracs = np.ones(input_dim)
|
||||
|
||||
self.init = init
|
||||
|
||||
if X_variance is None: # The variance of the variational approximation (S)
|
||||
|
|
@ -38,6 +44,9 @@ class SSGPLVM(SparseGP):
|
|||
gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation
|
||||
gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim)
|
||||
|
||||
if group_spike:
|
||||
gamma[:] = gamma.mean(axis=0)
|
||||
|
||||
if Z is None:
|
||||
Z = np.random.permutation(X.copy())[:num_inducing]
|
||||
assert Z.shape[1] == X.shape[1]
|
||||
|
|
@ -46,18 +55,31 @@ class SSGPLVM(SparseGP):
|
|||
likelihood = Gaussian()
|
||||
|
||||
if kernel is None:
|
||||
kernel = kern.SSRBF(input_dim)
|
||||
|
||||
kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim)
|
||||
|
||||
pi = np.empty((input_dim))
|
||||
pi[:] = 0.5
|
||||
self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b
|
||||
X = SpikeAndSlabPosterior(X, X_variance, gamma)
|
||||
|
||||
if group_spike:
|
||||
kernel.group_spike_prob = True
|
||||
self.variational_prior.group_spike_prob = True
|
||||
|
||||
|
||||
SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs)
|
||||
self.add_parameter(self.X, index=0)
|
||||
self.add_parameter(self.variational_prior)
|
||||
|
||||
def set_X_gradients(self, X, X_grad):
|
||||
"""Set the gradients of the posterior distribution of X in its specific form."""
|
||||
X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad
|
||||
|
||||
def parameters_changed(self):
|
||||
if isinstance(self.inference_method, VarDTC_GPU):
|
||||
update_gradients(self)
|
||||
return
|
||||
|
||||
super(SSGPLVM, self).parameters_changed()
|
||||
self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X)
|
||||
|
||||
|
|
|
|||
|
|
@ -15,3 +15,5 @@ import latent_space_visualizations
|
|||
import netpbmfile
|
||||
import inference_plots
|
||||
import maps
|
||||
import img_plots
|
||||
from ssgplvm import SSGPLVM_plot
|
||||
|
|
|
|||
56
GPy/plotting/matplot_dep/img_plots.py
Normal file
56
GPy/plotting/matplot_dep/img_plots.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
"""
|
||||
The module contains the tools for ploting 2D image visualizations
|
||||
"""
|
||||
|
||||
import numpy as np
|
||||
from matplotlib.cm import jet
|
||||
|
||||
width_max = 15
|
||||
height_max = 12
|
||||
|
||||
def _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad):
|
||||
width = (x_size*fig_ncols+pad*(fig_ncols-1))
|
||||
height = (y_size*fig_nrows+pad*(fig_nrows-1))
|
||||
if width > float(height)/height_max*width_max:
|
||||
return (width_max, float(width_max)/width*height)
|
||||
else:
|
||||
return (float(height_max)/height*width, height_max)
|
||||
|
||||
def plot_2D_images(figure, arr, symmetric=False, pad=None, zoom=None, mode=None, interpolation='nearest'):
|
||||
ax = figure.add_subplot(111)
|
||||
if len(arr.shape)==2:
|
||||
arr = arr.reshape(*((1,)+arr.shape))
|
||||
fig_num = arr.shape[0]
|
||||
y_size = arr.shape[1]
|
||||
x_size = arr.shape[2]
|
||||
fig_ncols = int(np.ceil(np.sqrt(fig_num)))
|
||||
fig_nrows = int(np.ceil((float)(fig_num)/fig_ncols))
|
||||
if pad==None:
|
||||
pad = max(int(min(y_size,x_size)/10),1)
|
||||
|
||||
figsize = _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad)
|
||||
#figure.set_size_inches(figsize,forward=True)
|
||||
#figure.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95)
|
||||
|
||||
if symmetric:
|
||||
# symmetric around zero: fix zero as the middle color
|
||||
mval = max(abs(arr.max()),abs(arr.min()))
|
||||
arr = arr/(2.*mval)+0.5
|
||||
else:
|
||||
minval,maxval = arr.min(),arr.max()
|
||||
arr = (arr-minval)/(maxval-minval)
|
||||
|
||||
if mode=='L':
|
||||
arr_color = np.empty(arr.shape+(3,))
|
||||
arr_color[:] = arr.reshape(*(arr.shape+(1,)))
|
||||
elif mode==None or mode=='jet':
|
||||
arr_color = jet(arr)
|
||||
|
||||
buf = np.ones((y_size*fig_nrows+pad*(fig_nrows-1), x_size*fig_ncols+pad*(fig_ncols-1), 3),dtype=arr.dtype)
|
||||
|
||||
for y in xrange(fig_nrows):
|
||||
for x in xrange(fig_ncols):
|
||||
if y*fig_ncols+x<fig_num:
|
||||
buf[y*y_size+y*pad:(y+1)*y_size+y*pad, x*x_size+x*pad:(x+1)*x_size+x*pad] = arr_color[y*fig_ncols+x,:,:,:3]
|
||||
img_plot = ax.imshow(buf, interpolation=interpolation)
|
||||
ax.axis('off')
|
||||
29
GPy/plotting/matplot_dep/ssgplvm.py
Normal file
29
GPy/plotting/matplot_dep/ssgplvm.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
"""
|
||||
The module plotting results for SSGPLVM
|
||||
"""
|
||||
|
||||
import pylab
|
||||
|
||||
from ...models import SSGPLVM
|
||||
from img_plots import plot_2D_images
|
||||
from ...util.misc import param_to_array
|
||||
|
||||
class SSGPLVM_plot(object):
|
||||
def __init__(self,model, imgsize):
|
||||
assert isinstance(model,SSGPLVM)
|
||||
self.model = model
|
||||
self.imgsize= imgsize
|
||||
assert model.Y.shape[1] == imgsize[0]*imgsize[1]
|
||||
|
||||
def plot_inducing(self):
|
||||
fig1 = pylab.figure()
|
||||
mean = self.model.posterior.mean
|
||||
arr = mean.reshape(*(mean.shape[0],self.imgsize[1],self.imgsize[0]))
|
||||
plot_2D_images(fig1, arr)
|
||||
fig1.gca().set_title('The mean of inducing points')
|
||||
|
||||
fig2 = pylab.figure()
|
||||
covar = self.model.posterior.covariance
|
||||
plot_2D_images(fig2, covar)
|
||||
fig2.gca().set_title('The variance of inducing points')
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ def plot(parameterized, fignum=None, ax=None, colors=None):
|
|||
fig.tight_layout(h_pad=.01) # , rect=(0, 0, 1, .95))
|
||||
return fig
|
||||
|
||||
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
||||
def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None, side_by_side=True):
|
||||
"""
|
||||
Plot latent space X in 1D:
|
||||
|
||||
|
|
@ -58,7 +58,10 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
|
||||
"""
|
||||
if ax is None:
|
||||
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
if side_by_side:
|
||||
fig = pb.figure(num=fignum, figsize=(16, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
else:
|
||||
fig = pb.figure(num=fignum, figsize=(8, min(12, (2 * parameterized.mean.shape[1]))))
|
||||
if colors is None:
|
||||
colors = pb.gca()._get_lines.color_cycle
|
||||
pb.clf()
|
||||
|
|
@ -68,8 +71,15 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
means, variances, gamma = param_to_array(parameterized.mean, parameterized.variance, parameterized.binary_prob)
|
||||
x = np.arange(means.shape[0])
|
||||
for i in range(means.shape[1]):
|
||||
if side_by_side:
|
||||
sub1 = (means.shape[1],2,2*i+1)
|
||||
sub2 = (means.shape[1],2,2*i+2)
|
||||
else:
|
||||
sub1 = (means.shape[1]*2,1,2*i+1)
|
||||
sub2 = (means.shape[1]*2,1,2*i+2)
|
||||
|
||||
# mean and variance plot
|
||||
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 1)
|
||||
a = fig.add_subplot(*sub1)
|
||||
a.plot(means, c='k', alpha=.3)
|
||||
plots.extend(a.plot(x, means.T[i], c=colors.next(), label=r"$\mathbf{{X_{{{}}}}}$".format(i)))
|
||||
a.fill_between(x,
|
||||
|
|
@ -82,7 +92,7 @@ def plot_SpikeSlab(parameterized, fignum=None, ax=None, colors=None):
|
|||
if i < means.shape[1] - 1:
|
||||
a.set_xticklabels('')
|
||||
# binary prob plot
|
||||
a = fig.add_subplot(means.shape[1]*2, 1, 2*i + 2)
|
||||
a = fig.add_subplot(*sub2)
|
||||
a.bar(x,gamma[:,i],bottom=0.,linewidth=0,align='center')
|
||||
a.set_xlim(x.min(), x.max())
|
||||
a.set_ylim([0.,1.])
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ class vector_show(matplotlib_show):
|
|||
|
||||
|
||||
class lvm(matplotlib_show):
|
||||
|
||||
def __init__(self, vals, model, data_visualize, latent_axes=None, sense_axes=None, latent_index=[0,1]):
|
||||
"""Visualize a latent variable model
|
||||
|
||||
|
|
@ -98,7 +99,7 @@ class lvm(matplotlib_show):
|
|||
vals = param_to_array(model.X.mean)
|
||||
else:
|
||||
vals = param_to_array(model.X)
|
||||
|
||||
|
||||
vals = param_to_array(vals)
|
||||
matplotlib_show.__init__(self, vals, axes=latent_axes)
|
||||
|
||||
|
|
@ -121,7 +122,7 @@ class lvm(matplotlib_show):
|
|||
self.move_on = False
|
||||
self.latent_index = latent_index
|
||||
self.latent_dim = model.input_dim
|
||||
|
||||
|
||||
# The red cross which shows current latent point.
|
||||
self.latent_values = vals
|
||||
self.latent_handle = self.latent_axes.plot([0],[0],'rx',mew=2)[0]
|
||||
|
|
@ -130,10 +131,10 @@ class lvm(matplotlib_show):
|
|||
|
||||
def modify(self, vals):
|
||||
"""When latent values are modified update the latent representation and ulso update the output visualization."""
|
||||
self.vals = vals.copy()
|
||||
self.vals = vals[None,:].copy()
|
||||
y = self.model.predict(self.vals)[0]
|
||||
self.data_visualize.modify(y)
|
||||
self.latent_handle.set_data(self.vals[self.latent_index[0]], self.vals[self.latent_index[1]])
|
||||
self.latent_handle.set_data(self.vals[:,self.latent_index[0]], self.vals[:,self.latent_index[1]])
|
||||
self.axes.figure.canvas.draw()
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -191,13 +191,14 @@ class Test(ListDictTestCase):
|
|||
par.count = 0
|
||||
par.add_observer(self, self._callback, 1)
|
||||
pcopy = GPRegression(par.X.copy(), par.Y.copy(), kernel=par.kern.copy())
|
||||
self.assertNotIn(par._observer_callables_[0], pcopy._observer_callables_)
|
||||
self.assertNotIn(par.observers[0], pcopy.observers)
|
||||
pcopy = par.copy()
|
||||
pcopy.name = "copy"
|
||||
self.assertTrue(par.checkgrad())
|
||||
self.assertTrue(pcopy.checkgrad())
|
||||
self.assertTrue(pcopy.kern.checkgrad())
|
||||
self.assertIn(par._observer_callables_[0], pcopy._observer_callables_)
|
||||
import ipdb;ipdb.set_trace()
|
||||
self.assertIn(par.observers[0], pcopy.observers)
|
||||
self.assertEqual(par.count, 3)
|
||||
self.assertEqual(pcopy.count, 6) # 3 of each call to checkgrad
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import caching
|
|||
import diag
|
||||
import initialization
|
||||
import multioutput
|
||||
import linalg_gpu
|
||||
|
||||
try:
|
||||
import sympy
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ class Cacher(object):
|
|||
#first make sure the depth limit isn't exceeded
|
||||
if len(self.cached_inputs) == self.limit:
|
||||
args_ = self.cached_inputs.pop(0)
|
||||
args_ = [a for i,a in enumerate(args_) if i not in self.ignore_args and i not in self.force_kwargs]
|
||||
[a.remove_observer(self, self.on_cache_changed) for a in args_ if a is not None]
|
||||
self.inputs_changed.pop(0)
|
||||
self.cached_outputs.pop(0)
|
||||
|
|
|
|||
16
GPy/util/gpu_init.py
Normal file
16
GPy/util/gpu_init.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
"""
|
||||
The package for scikits.cuda initialization
|
||||
|
||||
Global variables: initSuccess
|
||||
providing CUBLAS handle: cublas_handle
|
||||
"""
|
||||
|
||||
try:
|
||||
import pycuda.autoinit
|
||||
from scikits.cuda import cublas
|
||||
import scikits.cuda.linalg as culinalg
|
||||
culinalg.init()
|
||||
cublas_handle = cublas.cublasCreate()
|
||||
initSuccess = True
|
||||
except:
|
||||
initSuccess = False
|
||||
59
GPy/util/linalg_gpu.py
Normal file
59
GPy/util/linalg_gpu.py
Normal file
|
|
@ -0,0 +1,59 @@
|
|||
# Copyright (c) 2012, GPy authors (see AUTHORS.txt).
|
||||
# Licensed under the BSD 3-clause license (see LICENSE.txt)
|
||||
|
||||
|
||||
#
|
||||
# The utility functions for GPU computation
|
||||
#
|
||||
import numpy as np
|
||||
|
||||
from ..util import gpu_init
|
||||
|
||||
try:
|
||||
from pycuda.reduction import ReductionKernel
|
||||
from pycuda.elementwise import ElementwiseKernel
|
||||
|
||||
# log|A| for A is a low triangle matrix
|
||||
# logDiagSum(A, A.shape[0]+1)
|
||||
logDiagSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?log(x[i]):0", arguments="double *x, int step")
|
||||
|
||||
strideSum = ReductionKernel(np.float64, neutral="0", reduce_expr="a+b", map_expr="i%step==0?x[i]:0", arguments="double *x, int step")
|
||||
|
||||
#=======================================================================================
|
||||
# Element-wise functions
|
||||
#=======================================================================================
|
||||
|
||||
# log(X)
|
||||
log = ElementwiseKernel("double *in, double *out", "out[i] = log(in[i])", "log_element")
|
||||
|
||||
# log(1.0-X)
|
||||
logOne = ElementwiseKernel("double *in, double *out", "out[i] = log(1.-in[i])", "logOne_element")
|
||||
|
||||
# multiplication with broadcast on the last dimension (out = shorter[:,None]*longer)
|
||||
mul_bcast = ElementwiseKernel("double *out, double *shorter, double *longer, int shorter_size", "out[i] = longer[i]*shorter[i%shorter_size]", "mul_bcast")
|
||||
|
||||
# multiplication with broadcast on the first dimension (out = shorter[None,:]*longer)
|
||||
mul_bcast_first = ElementwiseKernel("double *out, double *shorter, double *longer, int first_dim", "out[i] = longer[i]*shorter[i/first_dim]", "mul_bcast")
|
||||
|
||||
# sum through the middle dimension (size_2) of a 3D matrix (size_1, size_2, size_3)
|
||||
sum_axis = ElementwiseKernel("double *out, double *in, int size_1, int size_2", "out[i] += sum_axis_element(in, size_1, size_2, i)", "sum_axis",preamble="""
|
||||
__device__ double sum_axis_element(double *in, int size_1, int size_2, int idx)
|
||||
{
|
||||
int k = idx/size_1;
|
||||
int i = idx%size_1;
|
||||
double sum=0;
|
||||
for(int j=0;j<size_2;j++) {
|
||||
sum += in[(k*size_2+j)*size_1+i];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
""")
|
||||
|
||||
# the outer product between two vectors (out = np.dot(v1,v2.T))
|
||||
outer_prod = ElementwiseKernel("double *out, double *v1, double *v2, int v1_size", "out[i] = v1[i%v1_size]*v2[i/v1_size]", "outer_prod")
|
||||
|
||||
# the outer product between two vectors (out = np.einsum('na,nb->nab',m1,m2) a=dim1, b=dim2 )
|
||||
join_prod = ElementwiseKernel("double *out, double *m1, double *m2, int dim1, int dim2", "out[i] = m1[(i%dim1)*dim1+(i%(dim1*dim2))/dim1]*m2[(i%dim1)*dim1+i/(dim1*dim2)]", "join_prod")
|
||||
|
||||
except:
|
||||
pass
|
||||
Loading…
Add table
Add a link
Reference in a new issue