diff --git a/GPy/core/__init__.py b/GPy/core/__init__.py index a42d76ed..25651827 100644 --- a/GPy/core/__init__.py +++ b/GPy/core/__init__.py @@ -4,6 +4,7 @@ from model import * from parameterization.parameterized import adjust_name_for_printing, Parameterizable from parameterization.param import Param, ParamConcatenation +from parameterization.observable_array import ObsAr from gp import GP from sparse_gp import SparseGP diff --git a/GPy/core/gp.py b/GPy/core/gp.py index 490bcc72..692e5d01 100644 --- a/GPy/core/gp.py +++ b/GPy/core/gp.py @@ -121,7 +121,7 @@ class GP(Model): If full_cov and self.input_dim > 1, the return shape of var is Nnew x Nnew x self.input_dim. If self.input_dim == 1, the return shape is Nnew x Nnew. This is to allow for different normalizations of the output dimensions. - """ + """ #predict the latent function values mu, var = self._raw_predict(Xnew, full_cov=full_cov) diff --git a/GPy/core/parameterization/lists_and_dicts.py b/GPy/core/parameterization/lists_and_dicts.py index 6902c249..604d0a01 100644 --- a/GPy/core/parameterization/lists_and_dicts.py +++ b/GPy/core/parameterization/lists_and_dicts.py @@ -5,6 +5,7 @@ Created on 27 Feb 2014 ''' from collections import defaultdict +import weakref def intarray_default_factory(): import numpy as np @@ -41,60 +42,66 @@ class ObservablesList(object): def __init__(self): self._poc = [] - def remove(self, value): - return self._poc.remove(value) - - - def __delitem__(self, ind): - return self._poc.__delitem__(ind) - - - def __setitem__(self, ind, item): - return self._poc.__setitem__(ind, item) - - def __getitem__(self, ind): - return self._poc.__getitem__(ind) + p,o,c = self._poc[ind] + return p, o(), c + def remove(self, priority, observable, callble): + """ + """ + self.flush() + for i in range(len(self) - 1, -1, -1): + p,o,c = self[i] + if priority==p and observable==o and callble==c: + del self._poc[i] def __repr__(self): return self._poc.__repr__() - - def append(self, obj): - return self._poc.append(obj) - - - def index(self, value): - return self._poc.index(value) - - - def extend(self, iterable): - return self._poc.extend(iterable) - - + def add(self, priority, observable, callble): + ins = 0 + for pr, _, _ in self: + if priority > pr: + break + ins += 1 + self._poc.insert(ins, (priority, weakref.ref(observable), callble)) + def __str__(self): - return self._poc.__str__() + ret = [] + curr_p = None + for p, o, c in self: + curr = '' + if curr_p != p: + pre = "{!s}: ".format(p) + curr_pre = pre + else: curr_pre = " "*len(pre) + curr_p = p + curr += curr_pre + ret.append(curr + ", ".join(map(repr, [o,c]))) + return '\n'.join(ret) + def flush(self): + self._poc = [(p,o,c) for p,o,c in self._poc if o() is not None] def __iter__(self): - return self._poc.__iter__() - - - def insert(self, index, obj): - return self._poc.insert(index, obj) - + self.flush() + for p, o, c in self._poc: + if o() is not None: + yield p, o(), c def __len__(self): + self.flush() return self._poc.__len__() def __deepcopy__(self, memo): + self.flush() s = ObservablesList() import copy s._poc = copy.deepcopy(self._poc, memo) return s def __getstate__(self): + self.flush() from ...util.caching import Cacher obs = [] for p, o, c in self: @@ -106,6 +113,6 @@ class ObservablesList(object): def __setstate__(self, state): self._poc = [] for p, o, c in state: - self._poc.append((p,o,getattr(o, c))) + self.add(p,o,getattr(o, c)) pass diff --git a/GPy/core/parameterization/observable_array.py b/GPy/core/parameterization/observable_array.py new file mode 100644 index 00000000..cd0c85d6 --- /dev/null +++ b/GPy/core/parameterization/observable_array.py @@ -0,0 +1,137 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +__updated__ = '2014-03-31' + +import numpy as np +from parameter_core import Observable, Pickleable + +class ObsAr(np.ndarray, Pickleable, Observable): + """ + An ndarray which reports changes to its observers. + The observers can add themselves with a callable, which + will be called every time this array changes. The callable + takes exactly one argument, which is this array itself. + """ + __array_priority__ = -1 # Never give back ObsAr + def __new__(cls, input_array, *a, **kw): + if not isinstance(input_array, ObsAr): + obj = np.atleast_1d(np.require(input_array, dtype=np.float64, requirements=['W', 'C'])).view(cls) + else: obj = input_array + #cls.__name__ = "ObsAr" # because of fixed printing of `array` in np printing + super(ObsAr, obj).__init__(*a, **kw) + return obj + + def __array_finalize__(self, obj): + # see InfoArray.__array_finalize__ for comments + if obj is None: return + self.observers = getattr(obj, 'observers', None) + + def __array_wrap__(self, out_arr, context=None): + return out_arr.view(np.ndarray) + + def copy(self): + memo = {} + memo[id(self)] = self + return self.__deepcopy__(memo) + + def __deepcopy__(self, memo): + s = self.__new__(self.__class__, input_array=self.view(np.ndarray).copy()) + memo[id(self)] = s + import copy + s.__dict__.update(copy.deepcopy(self.__dict__, memo)) + return s + + def __reduce__(self): + func, args, state = super(ObsAr, self).__reduce__() + return func, args, (state, Pickleable.__getstate__(self)) + + def __setstate__(self, state): + np.ndarray.__setstate__(self, state[0]) + Pickleable.__setstate__(self, state[1]) + + def __setitem__(self, s, val): + super(ObsAr, self).__setitem__(s, val) + self.notify_observers() + + def __getslice__(self, start, stop): + return self.__getitem__(slice(start, stop)) + + def __setslice__(self, start, stop, val): + return self.__setitem__(slice(start, stop), val) + + def __ilshift__(self, *args, **kwargs): + r = np.ndarray.__ilshift__(self, *args, **kwargs) + self.notify_observers() + return r + + def __irshift__(self, *args, **kwargs): + r = np.ndarray.__irshift__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ixor__(self, *args, **kwargs): + r = np.ndarray.__ixor__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ipow__(self, *args, **kwargs): + r = np.ndarray.__ipow__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ifloordiv__(self, *args, **kwargs): + r = np.ndarray.__ifloordiv__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __isub__(self, *args, **kwargs): + r = np.ndarray.__isub__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __ior__(self, *args, **kwargs): + r = np.ndarray.__ior__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __itruediv__(self, *args, **kwargs): + r = np.ndarray.__itruediv__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __idiv__(self, *args, **kwargs): + r = np.ndarray.__idiv__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __iand__(self, *args, **kwargs): + r = np.ndarray.__iand__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __imod__(self, *args, **kwargs): + r = np.ndarray.__imod__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __iadd__(self, *args, **kwargs): + r = np.ndarray.__iadd__(self, *args, **kwargs) + self.notify_observers() + return r + + + def __imul__(self, *args, **kwargs): + r = np.ndarray.__imul__(self, *args, **kwargs) + self.notify_observers() + return r diff --git a/GPy/core/parameterization/param.py b/GPy/core/parameterization/param.py index 60bdfe9d..9c3d7bd3 100644 --- a/GPy/core/parameterization/param.py +++ b/GPy/core/parameterization/param.py @@ -59,7 +59,7 @@ class Param(OptimizationHandlable, ObsAr): import pydot node = pydot.Node(id(self), shape='record', label=self.name) G.add_node(node) - for o in self._observer_callables_.keys(): + for o in self.observers.keys(): label = o.name if hasattr(o, 'name') else str(o) observed_node = pydot.Node(id(o), label=label) G.add_node(observed_node) @@ -324,7 +324,7 @@ class ParamConcatenation(object): if update: self.update_all_params() def values(self): - return numpy.hstack([p.param_array for p in self.params]) + return numpy.hstack([p.param_array.flat for p in self.params]) #=========================================================================== # parameter operations: #=========================================================================== diff --git a/GPy/core/parameterization/parameter_core.py b/GPy/core/parameterization/parameter_core.py index 2dac9bf3..43bc7177 100644 --- a/GPy/core/parameterization/parameter_core.py +++ b/GPy/core/parameterization/parameter_core.py @@ -44,22 +44,23 @@ class Observable(object): def __init__(self, *args, **kwargs): super(Observable, self).__init__() from lists_and_dicts import ObservablesList - self._observer_callables_ = ObservablesList() + self.observers = ObservablesList() def add_observer(self, observer, callble, priority=0): - self._insert_sorted(priority, observer, callble) + self.observers.add(priority, observer, callble) def remove_observer(self, observer, callble=None): to_remove = [] - for p, obs, clble in self._observer_callables_: + for poc in self.observers: + _, obs, clble = poc if callble is not None: if (obs == observer) and (callble == clble): - to_remove.append((p, obs, clble)) + to_remove.append(poc) else: if obs is observer: - to_remove.append((p, obs, clble)) + to_remove.append(poc) for r in to_remove: - self._observer_callables_.remove(r) + self.observers.remove(*r) def notify_observers(self, which=None, min_priority=None): """ @@ -74,21 +75,13 @@ class Observable(object): if which is None: which = self if min_priority is None: - [callble(self, which=which) for _, _, callble in self._observer_callables_] + [callble(self, which=which) for _, _, callble in self.observers] else: - for p, _, callble in self._observer_callables_: + for p, _, callble in self.observers: if p <= min_priority: break callble(self, which=which) - def _insert_sorted(self, p, o, c): - ins = 0 - for pr, _, _ in self._observer_callables_: - if p > pr: - break - ins += 1 - self._observer_callables_.insert(ins, (p, o, c)) - #=============================================================================== # Foundation framework for parameterized and param objects: #=============================================================================== @@ -192,7 +185,7 @@ class Pickleable(object): def __getstate__(self): ignore_list = ([#'_parent_', '_parent_index_', - #'_observer_callables_', + #'observers', '_param_array_', '_gradient_array_', '_fixes_', '_Cacher_wrap__cachers'] #+ self.parameter_names(recursive=False) diff --git a/GPy/core/parameterization/parameterized.py b/GPy/core/parameterization/parameterized.py index 75085ca2..a794ab40 100644 --- a/GPy/core/parameterization/parameterized.py +++ b/GPy/core/parameterization/parameterized.py @@ -90,7 +90,7 @@ class Parameterized(Parameterizable): child_node = child.build_pydot(G) G.add_edge(pydot.Edge(node, child_node)) - for o in self._observer_callables_.keys(): + for o in self.observers.keys(): label = o.name if hasattr(o, 'name') else str(o) observed_node = pydot.Node(id(o), label=label) G.add_node(observed_node) diff --git a/GPy/core/parameterization/variational.py b/GPy/core/parameterization/variational.py index f8fd165f..3730baed 100644 --- a/GPy/core/parameterization/variational.py +++ b/GPy/core/parameterization/variational.py @@ -40,6 +40,7 @@ class SpikeAndSlabPrior(VariationalPrior): self.pi = Param('pi', pi, Logistic(1e-10,1.-1e-10)) self.variance = Param('variance',variance) self.add_parameters(self.pi) + self.group_spike_prob = False def KL_divergence(self, variational_posterior): mu = variational_posterior.mean @@ -55,7 +56,11 @@ class SpikeAndSlabPrior(VariationalPrior): S = variational_posterior.variance gamma = variational_posterior.binary_prob - gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. + if self.group_spike_prob: + gamma_grad = np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. + gamma.gradient -= gamma_grad.mean(axis=0) + else: + gamma.gradient -= np.log((1-self.pi)/self.pi*gamma/(1.-gamma))+(np.square(mu)+S-np.log(S)-1.)/2. mu.gradient -= gamma*mu S.gradient -= (1. - (1. / (S))) * gamma /2. self.pi.gradient = (gamma/self.pi - (1.-gamma)/(1.-self.pi)).sum(axis=0) diff --git a/GPy/examples/dimensionality_reduction.py b/GPy/examples/dimensionality_reduction.py index 07623d6b..c1911e75 100644 --- a/GPy/examples/dimensionality_reduction.py +++ b/GPy/examples/dimensionality_reduction.py @@ -409,12 +409,12 @@ def stick(kernel=None, optimize=True, verbose=True, plot=True): # optimize m = GPy.models.GPLVM(data['Y'], 2, kernel=kernel) if optimize: m.optimize(messages=verbose, max_f_eval=10000) - if plot and GPy.plotting.matplot_dep.visualize.visual_available: + if plot: plt.clf ax = m.plot_latent() - y = m.likelihood.Y[0, :] + y = m.Y[0, :] data_show = GPy.plotting.matplot_dep.visualize.stick_show(y[None, :], connect=data['connect']) - GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, ax) + vis = GPy.plotting.matplot_dep.visualize.lvm(m.X[0, :].copy(), m, data_show, latent_axes=ax) raw_input('Press enter to finish') return m diff --git a/GPy/inference/latent_function_inference/__init__.py b/GPy/inference/latent_function_inference/__init__.py index ee459a76..effa077c 100644 --- a/GPy/inference/latent_function_inference/__init__.py +++ b/GPy/inference/latent_function_inference/__init__.py @@ -32,6 +32,7 @@ from expectation_propagation import EP from dtc import DTC from fitc import FITC from var_dtc_parallel import VarDTC_minibatch +from var_dtc_gpu import VarDTC_GPU # class FullLatentFunctionData(object): # diff --git a/GPy/inference/latent_function_inference/exact_gaussian_inference.py b/GPy/inference/latent_function_inference/exact_gaussian_inference.py index 074b67a6..c0177e9f 100644 --- a/GPy/inference/latent_function_inference/exact_gaussian_inference.py +++ b/GPy/inference/latent_function_inference/exact_gaussian_inference.py @@ -32,7 +32,7 @@ class ExactGaussianInference(object): return Y else: #if Y in self.cache, return self.Cache[Y], else store Y in cache and return L. - print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!" + #print "WARNING: N>D of Y, we need caching of L, such that L*L^T = Y, returning Y still!" return Y def inference(self, kern, X, likelihood, Y, Y_metadata=None): diff --git a/GPy/inference/latent_function_inference/var_dtc_gpu.py b/GPy/inference/latent_function_inference/var_dtc_gpu.py new file mode 100644 index 00000000..9b2da1c9 --- /dev/null +++ b/GPy/inference/latent_function_inference/var_dtc_gpu.py @@ -0,0 +1,545 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +from posterior import Posterior +from ...util.linalg import jitchol, backsub_both_sides, tdot, dtrtrs +from ...util import diag +from ...core.parameterization.variational import VariationalPosterior +import numpy as np +from ...util.misc import param_to_array +log_2_pi = np.log(2*np.pi) + +from ...util import gpu_init + +try: + import scikits.cuda.linalg as culinalg + import pycuda.gpuarray as gpuarray + from scikits.cuda import cublas + from ...util.linalg_gpu import logDiagSum, strideSum, mul_bcast, sum_axis, outer_prod, mul_bcast_first, join_prod +except: + pass + +class VarDTC_GPU(object): + """ + An object for inference when the likelihood is Gaussian, but we want to do sparse inference. + + The function self.inference returns a Posterior object, which summarizes + the posterior. + + For efficiency, we sometimes work with the cholesky of Y*Y.T. To save repeatedly recomputing this, we cache it. + + """ + const_jitter = np.float64(1e-6) + def __init__(self, batchsize=None, gpu_memory=4., limit=1): + + self.batchsize = batchsize + self.gpu_memory = gpu_memory + + self.midRes = {} + self.batch_pos = 0 # the starting position of the current mini-batch + + self.cublas_handle = gpu_init.cublas_handle + + # Initialize GPU caches + self.gpuCache = None + + def _initGPUCache(self, kern, num_inducing, input_dim, output_dim, Y): + ndata = Y.shape[0] + if self.batchsize==None: + self.batchsize = self._estimateBatchSize(kern, ndata, num_inducing, input_dim, output_dim) + if self.gpuCache == None: + self.gpuCache = {# inference_likelihood + 'Kmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'Lm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'ones_gpu' :gpuarray.empty(num_inducing, np.float64,order='F'), + 'LL_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'b_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'), + 'v_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'), + 'vvt_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'KmmInvPsi2LLInvT_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'KmmInvPsi2P_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'dL_dpsi2R_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'dL_dKmm_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'psi1Y_gpu' :gpuarray.empty((num_inducing,output_dim),np.float64,order='F'), + 'psi2_gpu' :gpuarray.empty((num_inducing,num_inducing),np.float64,order='F'), + 'beta_gpu' :gpuarray.empty((ndata,),np.float64,order='F'), + 'YT_gpu' :gpuarray.to_gpu(np.asfortranarray(Y.T)), # DxN + 'betaYT_gpu' :gpuarray.empty(Y.T.shape,np.float64,order='F'), # DxN + 'psi2_t_gpu' :gpuarray.empty((num_inducing*num_inducing*self.batchsize),np.float64,order='F'), + # inference_minibatch + 'dL_dpsi0_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), + 'dL_dpsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), + 'dL_dpsi2_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'), + 'dL_dthetaL_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), + 'betapsi1_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), + 'thetaL_t_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), + 'betaYT2_gpu' :gpuarray.empty((output_dim,self.batchsize),np.float64,order='F'), + 'psi0p_gpu' :gpuarray.empty((self.batchsize,),np.float64,order='F'), + 'psi1p_gpu' :gpuarray.empty((self.batchsize,num_inducing),np.float64,order='F'), + 'psi2p_gpu' :gpuarray.empty((self.batchsize,num_inducing,num_inducing),np.float64,order='F'), + } + self.gpuCache['ones_gpu'].fill(1.0) + + YT_gpu = self.gpuCache['YT_gpu'] + self._trYYT = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, YT_gpu.gpudata, 1, YT_gpu.gpudata, 1) + + def _estimateMemoryOccupation(self, N, M, D): + """ + Estimate the best batch size. + N - the number of total datapoints + M - the number of inducing points + D - the number of observed (output) dimensions + return: the constant memory size, the memory occupation of batchsize=1 + unit: GB + """ + return (M+9.*M*M+3*M*D+N+2.*N*D)*8./1024./1024./1024., (4.+3.*M+D+3.*M*M)*8./1024./1024./1024. + + def _estimateBatchSize(self, kern, N, M, Q, D): + """ + Estimate the best batch size. + N - the number of total datapoints + M - the number of inducing points + D - the number of observed (output) dimensions + return: the constant memory size, the memory occupation of batchsize=1 + unit: GB + """ + if kern.useGPU: + x0,x1 = kern.psicomp.estimateMemoryOccupation(N,M,Q) + else: + x0, x1 = 0.,0. + y0, y1 = self._estimateMemoryOccupation(N, M, D) + + opt_batchsize = min(int((self.gpu_memory-y0-x0)/(x1+y1)), N) + + return opt_batchsize + + def _get_YYTfactor(self, Y): + """ + find a matrix L which satisfies LLT = YYT. + + Note that L may have fewer columns than Y. + """ + N, D = Y.shape + if (N>=D): + return param_to_array(Y) + else: + return jitchol(tdot(Y)) + + def inference_likelihood(self, kern, X, Z, likelihood, Y): + """ + The first phase of inference: + Compute: log-likelihood, dL_dKmm + + Cached intermediate results: Kmm, KmmInv, + """ + + num_inducing, input_dim = Z.shape[0], Z.shape[1] + num_data, output_dim = Y.shape + + self._initGPUCache(kern, num_inducing, input_dim, output_dim, Y) + + if isinstance(X, VariationalPosterior): + uncertain_inputs = True + else: + uncertain_inputs = False + + #see whether we've got a different noise variance for each datum + beta = 1./np.fmax(likelihood.variance, 1e-6) + het_noise = beta.size > 1 + trYYT = self._trYYT + + psi1Y_gpu = self.gpuCache['psi1Y_gpu'] + psi2_gpu = self.gpuCache['psi2_gpu'] + beta_gpu = self.gpuCache['beta_gpu'] + YT_gpu = self.gpuCache['YT_gpu'] + betaYT_gpu = self.gpuCache['betaYT_gpu'] + psi2_t_gpu = self.gpuCache['psi2_t_gpu'] + + if het_noise: + beta_gpu.set(np.asfortranarray(beta)) + mul_bcast(betaYT_gpu,beta_gpu,YT_gpu,beta_gpu.size) + YRY_full = cublas.cublasDdot(self.cublas_handle, YT_gpu.size, betaYT_gpu.gpudata, 1, YT_gpu.gpudata, 1) + else: + beta_gpu.fill(beta) + betaYT_gpu.fill(0.) + cublas.cublasDaxpy(self.cublas_handle, betaYT_gpu.size, beta, YT_gpu.gpudata, 1, betaYT_gpu.gpudata, 1) + YRY_full = trYYT*beta + + if kern.useGPU: + psi1Y_gpu.fill(0.) + psi2_gpu.fill(0.) + psi0_full = 0 + + for n_start in xrange(0,num_data,self.batchsize): + n_end = min(self.batchsize+n_start, num_data) + ndata = n_end - n_start + X_slice = X[n_start:n_end] + beta_gpu_slice = beta_gpu[n_start:n_end] + betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end] + if ndata==self.batchsize: + psi2_t_gpu_slice = psi2_t_gpu + else: + psi2_t_gpu_slice = psi2_t_gpu[:num_inducing*num_inducing*ndata] + if uncertain_inputs: + psi0p_gpu = kern.psi0(Z, X_slice) + psi1p_gpu = kern.psi1(Z, X_slice) + psi2p_gpu = kern.psi2(Z, X_slice) + else: + psi0p_gpu = kern.Kdiag(X_slice) + psi1p_gpu = kern.K(X_slice, Z) + + cublas.cublasDgemm(self.cublas_handle, 'T', 'T', num_inducing, output_dim, ndata, 1.0, psi1p_gpu.gpudata, ndata, betaYT_gpu_slice.gpudata, output_dim, 1.0, psi1Y_gpu.gpudata, num_inducing) + + if het_noise: + psi0_full += cublas.cublasDdot(self.cublas_handle, psi0p_gpu.size, beta_gpu_slice.gpudata, 1, psi0p_gpu.gpudata, 1) + else: + psi0_full += gpuarray.sum(psi0p_gpu).get() + + if uncertain_inputs: + if het_noise: + mul_bcast(psi2_t_gpu_slice,beta_gpu_slice,psi2p_gpu,beta_gpu_slice.size) + sum_axis(psi2_gpu,psi2_t_gpu_slice,1,ndata) + else: + sum_axis(psi2_gpu,psi2p_gpu,1,ndata) + else: + if het_noise: + psi1_t_gpu = psi2_t_gpu_slice[:,num_inducing*ndata] + mul_bcast(psi1_t_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size) + cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, 1.0, psi1p_gpu.gpudata, ndata, psi1_t_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing) + else: + cublas.cublasDgemm(self.cublas_handle, 'T', 'N', num_inducing, num_inducing, ndata, beta, psi1p_gpu.gpudata, ndata, psi1p_gpu.gpudata, ndata, 1.0, psi2_gpu.gpudata, num_inducing) + + if not het_noise: + psi0_full *= beta + if uncertain_inputs: + cublas.cublasDscal(self.cublas_handle, psi2_gpu.size, beta, psi2_gpu.gpudata, 1) + + else: + psi2_full = np.zeros((num_inducing,num_inducing),order='F') + psi1Y_full = np.zeros((num_inducing,output_dim),order='F') # MxD + psi0_full = 0 +# YRY_full = 0 + + for n_start in xrange(0,num_data,self.batchsize): + n_end = min(self.batchsize+n_start, num_data) + Y_slice = Y[n_start:n_end] + X_slice = X[n_start:n_end] + if uncertain_inputs: + psi0 = kern.psi0(Z, X_slice) + psi1 = kern.psi1(Z, X_slice) + psi2 = kern.psi2(Z, X_slice) + else: + psi0 = kern.Kdiag(X_slice) + psi1 = kern.K(X_slice, Z) + + if het_noise: + beta_slice = beta[n_start:n_end] + psi0_full += (beta_slice*psi0).sum() + psi1Y_full += np.dot(psi1.T,beta_slice[:,None]*Y_slice) # MxD +# YRY_full += (beta_slice*np.square(Y_slice).sum(axis=-1)).sum() + else: + psi0_full += psi0.sum() + psi1Y_full += np.dot(psi1.T,Y_slice) # MxD + + if uncertain_inputs: + if het_noise: + psi2_full += np.einsum('n,nmo->mo',beta_slice,psi2) + else: + psi2_full += psi2.sum(axis=0) + else: + if het_noise: + psi2_full += np.einsum('n,nm,no->mo',beta_slice,psi1,psi1) + else: + psi2_full += tdot(psi1.T) + + if not het_noise: + psi0_full *= beta + psi1Y_full *= beta + psi2_full *= beta +# YRY_full = trYYT*beta + + psi1Y_gpu.set(psi1Y_full) + psi2_gpu.set(psi2_full) + + #====================================================================== + # Compute Common Components + #====================================================================== + + Kmm = kern.K(Z).copy() + Kmm_gpu = self.gpuCache['Kmm_gpu'] + Kmm_gpu.set(np.asfortranarray(Kmm)) + diag.add(Kmm, self.const_jitter) + ones_gpu = self.gpuCache['ones_gpu'] + cublas.cublasDaxpy(self.cublas_handle, num_inducing, self.const_jitter, ones_gpu.gpudata, 1, Kmm_gpu.gpudata, num_inducing+1) +# assert np.allclose(Kmm, Kmm_gpu.get()) + +# Lm = jitchol(Kmm) + # + Lm_gpu = self.gpuCache['Lm_gpu'] + cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lm_gpu.gpudata, 1) + culinalg.cho_factor(Lm_gpu,'L') +# print np.abs(np.tril(Lm)-np.tril(Lm_gpu.get())).max() + +# Lambda = Kmm+psi2_full +# LL = jitchol(Lambda) + # + Lambda_gpu = self.gpuCache['LL_gpu'] + cublas.cublasDcopy(self.cublas_handle, Kmm_gpu.size, Kmm_gpu.gpudata, 1, Lambda_gpu.gpudata, 1) + cublas.cublasDaxpy(self.cublas_handle, psi2_gpu.size, np.float64(1.0), psi2_gpu.gpudata, 1, Lambda_gpu.gpudata, 1) + LL_gpu = Lambda_gpu + culinalg.cho_factor(LL_gpu,'L') +# print np.abs(np.tril(LL)-np.tril(LL_gpu.get())).max() + +# b,_ = dtrtrs(LL, psi1Y_full) +# bbt_cpu = np.square(b).sum() + # + b_gpu = self.gpuCache['b_gpu'] + cublas.cublasDcopy(self.cublas_handle, b_gpu.size, psi1Y_gpu.gpudata, 1, b_gpu.gpudata, 1) + cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, b_gpu.gpudata, num_inducing) + bbt = cublas.cublasDdot(self.cublas_handle, b_gpu.size, b_gpu.gpudata, 1, b_gpu.gpudata, 1) +# print np.abs(bbt-bbt_cpu) + +# v,_ = dtrtrs(LL.T,b,lower=False) +# vvt = np.einsum('md,od->mo',v,v) +# LmInvPsi2LmInvT = backsub_both_sides(Lm,psi2_full,transpose='right') + # + v_gpu = self.gpuCache['v_gpu'] + cublas.cublasDcopy(self.cublas_handle, v_gpu.size, b_gpu.gpudata, 1, v_gpu.gpudata, 1) + cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, output_dim, np.float64(1.0), LL_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing) + vvt_gpu = self.gpuCache['vvt_gpu'] + cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, output_dim, np.float64(1.0), v_gpu.gpudata, num_inducing, v_gpu.gpudata, num_inducing, np.float64(0.), vvt_gpu.gpudata, num_inducing) + LmInvPsi2LmInvT_gpu = self.gpuCache['KmmInvPsi2LLInvT_gpu'] + cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, LmInvPsi2LmInvT_gpu.gpudata, 1) + cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing) + cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing) + #tr_LmInvPsi2LmInvT = cublas.cublasDasum(self.cublas_handle, num_inducing, LmInvPsi2LmInvT_gpu.gpudata, num_inducing+1) + tr_LmInvPsi2LmInvT = float(strideSum(LmInvPsi2LmInvT_gpu, num_inducing+1).get()) +# print np.abs(vvt-vvt_gpu.get()).max() +# print np.abs(np.trace(LmInvPsi2LmInvT)-tr_LmInvPsi2LmInvT) + +# Psi2LLInvT = dtrtrs(LL,psi2_full)[0].T +# LmInvPsi2LLInvT= dtrtrs(Lm,Psi2LLInvT)[0] +# KmmInvPsi2LLInvT = dtrtrs(Lm,LmInvPsi2LLInvT,trans=True)[0] +# KmmInvPsi2P = dtrtrs(LL,KmmInvPsi2LLInvT.T, trans=True)[0].T + # + KmmInvPsi2LLInvT_gpu = LmInvPsi2LmInvT_gpu # Reuse GPU memory (size:MxM) + cublas.cublasDcopy(self.cublas_handle, psi2_gpu.size, psi2_gpu.gpudata, 1, KmmInvPsi2LLInvT_gpu.gpudata, 1) + cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing) + cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing) + cublas.cublasDtrsm(self.cublas_handle , 'L', 'L', 'T', 'N', num_inducing, num_inducing, np.float64(1.0), Lm_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing) + KmmInvPsi2P_gpu = self.gpuCache['KmmInvPsi2P_gpu'] + cublas.cublasDcopy(self.cublas_handle, KmmInvPsi2LLInvT_gpu.size, KmmInvPsi2LLInvT_gpu.gpudata, 1, KmmInvPsi2P_gpu.gpudata, 1) + cublas.cublasDtrsm(self.cublas_handle , 'r', 'L', 'N', 'N', num_inducing, num_inducing, np.float64(1.0), LL_gpu.gpudata, num_inducing, KmmInvPsi2P_gpu.gpudata, num_inducing) +# print np.abs(KmmInvPsi2P-KmmInvPsi2P_gpu.get()).max() + +# dL_dpsi2R = (output_dim*KmmInvPsi2P - vvt)/2. # dL_dpsi2 with R inside psi2 + # + dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu'] + cublas.cublasDcopy(self.cublas_handle, vvt_gpu.size, vvt_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1) + cublas.cublasDaxpy(self.cublas_handle, KmmInvPsi2P_gpu.size, np.float64(-output_dim), KmmInvPsi2P_gpu.gpudata, 1, dL_dpsi2R_gpu.gpudata, 1) + cublas.cublasDscal(self.cublas_handle, dL_dpsi2R_gpu.size, np.float64(-0.5), dL_dpsi2R_gpu.gpudata, 1) +# print np.abs(dL_dpsi2R_gpu.get()-dL_dpsi2R).max() + + #====================================================================== + # Compute log-likelihood + #====================================================================== + if het_noise: + logL_R = -np.log(beta).sum() + else: + logL_R = -num_data*np.log(beta) +# logL_old = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-np.trace(LmInvPsi2LmInvT))+YRY_full-bbt)/2.-output_dim*(-np.log(np.diag(Lm)).sum()+np.log(np.diag(LL)).sum()) + + logdetKmm = float(logDiagSum(Lm_gpu,num_inducing+1).get()) + logdetLambda = float(logDiagSum(LL_gpu,num_inducing+1).get()) + logL = -(output_dim*(num_data*log_2_pi+logL_R+psi0_full-tr_LmInvPsi2LmInvT)+YRY_full-bbt)/2.+output_dim*(logdetKmm-logdetLambda) +# print np.abs(logL_old - logL) + + #====================================================================== + # Compute dL_dKmm + #====================================================================== + +# dL_dKmm = -(output_dim*np.einsum('md,od->mo',KmmInvPsi2LLInvT,KmmInvPsi2LLInvT) + vvt)/2. + # + dL_dKmm_gpu = self.gpuCache['dL_dKmm_gpu'] + cublas.cublasDgemm(self.cublas_handle, 'N', 'T', num_inducing, num_inducing, num_inducing, np.float64(1.0), KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, KmmInvPsi2LLInvT_gpu.gpudata, num_inducing, np.float64(0.), dL_dKmm_gpu.gpudata, num_inducing) + cublas.cublasDaxpy(self.cublas_handle, dL_dKmm_gpu.size, np.float64(1./output_dim), vvt_gpu.gpudata, 1, dL_dKmm_gpu.gpudata, 1) + cublas.cublasDscal(self.cublas_handle, dL_dKmm_gpu.size, np.float64(-output_dim/2.), dL_dKmm_gpu.gpudata, 1) +# print np.abs(dL_dKmm - dL_dKmm_gpu.get()).max() + + #====================================================================== + # Compute the Posterior distribution of inducing points p(u|Y) + #====================================================================== + + post = Posterior(woodbury_inv=KmmInvPsi2P_gpu.get(), woodbury_vector=v_gpu.get(), K=Kmm_gpu.get(), mean=None, cov=None, K_chol=Lm_gpu.get()) + + return logL, dL_dKmm_gpu.get(), post + + def inference_minibatch(self, kern, X, Z, likelihood, Y): + """ + The second phase of inference: Computing the derivatives over a minibatch of Y + Compute: dL_dpsi0, dL_dpsi1, dL_dpsi2, dL_dthetaL + return a flag showing whether it reached the end of Y (isEnd) + """ + + num_data, output_dim = Y.shape + num_inducing = Z.shape[0] + + if isinstance(X, VariationalPosterior): + uncertain_inputs = True + else: + uncertain_inputs = False + + beta = 1./np.fmax(likelihood.variance, 1e-6) + het_noise = beta.size > 1 + + n_start = self.batch_pos + n_end = min(self.batchsize+n_start, num_data) + if n_end==num_data: + isEnd = True + self.batch_pos = 0 + else: + isEnd = False + self.batch_pos = n_end + + nSlice = n_end-n_start + X_slice = X[n_start:n_end] + + if kern.useGPU: + if uncertain_inputs: + psi0p_gpu = kern.psi0(Z, X_slice) + psi1p_gpu = kern.psi1(Z, X_slice) + psi2p_gpu = kern.psi2(Z, X_slice) + else: + psi0p_gpu = kern.Kdiag(X_slice) + psi1p_gpu = kern.K(X_slice, Z) + psi2p_gpu = self.gpuCache['psi2p_gpu'] + if psi2p_gpu.shape[0] > nSlice: + psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing) + else: + if uncertain_inputs: + psi0 = kern.psi0(Z, X_slice) + psi1 = kern.psi1(Z, X_slice) + psi2 = kern.psi2(Z, X_slice) + else: + psi0 = kern.Kdiag(X_slice) + psi1 = kern.K(X_slice, Z) + + psi0p_gpu = self.gpuCache['psi0p_gpu'] + psi1p_gpu = self.gpuCache['psi1p_gpu'] + psi2p_gpu = self.gpuCache['psi2p_gpu'] + if psi0p_gpu.shape[0] > nSlice: + psi0p_gpu = psi0p_gpu[:nSlice] + psi1p_gpu = psi1p_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing) + psi2p_gpu = psi2p_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing) + psi0p_gpu.set(np.asfortranarray(psi0)) + psi1p_gpu.set(np.asfortranarray(psi1)) + if uncertain_inputs: + psi2p_gpu.set(np.asfortranarray(psi2)) + + #====================================================================== + # Prepare gpu memory + #====================================================================== + + dL_dpsi2R_gpu = self.gpuCache['dL_dpsi2R_gpu'] + v_gpu = self.gpuCache['v_gpu'] + betaYT_gpu = self.gpuCache['betaYT_gpu'] + beta_gpu = self.gpuCache['beta_gpu'] + dL_dpsi0_gpu = self.gpuCache['dL_dpsi0_gpu'] + dL_dpsi1_gpu = self.gpuCache['dL_dpsi1_gpu'] + dL_dpsi2_gpu = self.gpuCache['dL_dpsi2_gpu'] + dL_dthetaL_gpu = self.gpuCache['dL_dthetaL_gpu'] + psi2R_gpu = self.gpuCache['psi2_t_gpu'][:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing) + betapsi1_gpu = self.gpuCache['betapsi1_gpu'] + thetaL_t_gpu = self.gpuCache['thetaL_t_gpu'] + betaYT2_gpu = self.gpuCache['betaYT2_gpu'] + + betaYT_gpu_slice = betaYT_gpu[:,n_start:n_end] + beta_gpu_slice = beta_gpu[n_start:n_end] + + # Adjust to the batch size + if dL_dpsi0_gpu.shape[0] > nSlice: + betaYT2_gpu = betaYT2_gpu[:,:nSlice] + dL_dpsi0_gpu = dL_dpsi0_gpu.ravel()[:nSlice] + dL_dpsi1_gpu = dL_dpsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing) + dL_dpsi2_gpu = dL_dpsi2_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing) + dL_dthetaL_gpu = dL_dthetaL_gpu.ravel()[:nSlice] + psi2R_gpu = psi2R_gpu.ravel()[:nSlice*num_inducing*num_inducing].reshape(nSlice,num_inducing,num_inducing) + thetaL_t_gpu = thetaL_t_gpu.ravel()[:nSlice] + betapsi1_gpu = betapsi1_gpu.ravel()[:nSlice*num_inducing].reshape(nSlice,num_inducing) + + mul_bcast(betapsi1_gpu,beta_gpu_slice,psi1p_gpu,beta_gpu_slice.size) + + #====================================================================== + # Compute dL_dpsi + #====================================================================== + + dL_dpsi0_gpu.fill(0.) + cublas.cublasDaxpy(self.cublas_handle, dL_dpsi0_gpu.size, output_dim/(-2.), beta_gpu_slice.gpudata, 1, dL_dpsi0_gpu.gpudata, 1) + + cublas.cublasDgemm(self.cublas_handle, 'T', 'T', nSlice, num_inducing, output_dim, 1.0, betaYT_gpu_slice.gpudata, output_dim, v_gpu.gpudata, num_inducing, 0., dL_dpsi1_gpu.gpudata, nSlice) + + if uncertain_inputs: + outer_prod(dL_dpsi2_gpu,beta_gpu_slice,dL_dpsi2R_gpu,beta_gpu_slice.size) + else: + cublas.cublasDgemm(self.cublas_handle, 'N', 'N', nSlice, num_inducing, output_dim, 1.0, betapsi1_gpu.gpudata, nSlice, dL_dpsi2R_gpu.gpudata, num_inducing, 1.0, dL_dpsi1_gpu.gpudata, nSlice) + + #====================================================================== + # Compute dL_dthetaL + #====================================================================== + + if not uncertain_inputs: + join_prod(psi2p_gpu,psi1p_gpu,psi1p_gpu,nSlice,num_inducing) + + mul_bcast_first(psi2R_gpu,dL_dpsi2R_gpu,psi2p_gpu,nSlice) + + + dL_dthetaL_gpu.fill(0.) + + cublas.cublasDcopy(self.cublas_handle, betaYT_gpu_slice.size, betaYT_gpu_slice.gpudata, 1, betaYT2_gpu.gpudata, 1) + mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT2_gpu,betaYT2_gpu.size) + cublas.cublasDscal(self.cublas_handle, betaYT2_gpu.size, 0.5, betaYT2_gpu.gpudata, 1) + sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim) + + cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/(-2.0), beta_gpu_slice.gpudata, 1, dL_dthetaL_gpu.gpudata, 1) + cublas.cublasDcopy(self.cublas_handle, beta_gpu_slice.size, beta_gpu_slice.gpudata, 1, thetaL_t_gpu.gpudata, 1) + mul_bcast(thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu,thetaL_t_gpu.size) + mul_bcast(thetaL_t_gpu,thetaL_t_gpu,psi0p_gpu,thetaL_t_gpu.size) + cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, output_dim/2.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1) + + thetaL_t_gpu.fill(0.) + sum_axis(thetaL_t_gpu, psi2R_gpu, nSlice, num_inducing*num_inducing) + mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size) + mul_bcast(thetaL_t_gpu,thetaL_t_gpu,beta_gpu_slice,thetaL_t_gpu.size) + cublas.cublasDaxpy(self.cublas_handle, dL_dthetaL_gpu.size, -1.0, thetaL_t_gpu.gpudata, 1, dL_dthetaL_gpu.gpudata, 1) + + cublas.cublasDgemm(self.cublas_handle, 'T', 'T', output_dim, nSlice, num_inducing, -1.0, v_gpu.gpudata, num_inducing, betapsi1_gpu.gpudata, nSlice, 0.0, betaYT2_gpu.gpudata, output_dim) + mul_bcast(betaYT2_gpu,betaYT2_gpu,betaYT_gpu_slice,betaYT2_gpu.size) + sum_axis(dL_dthetaL_gpu, betaYT2_gpu, 1, output_dim) + + if kern.useGPU: + dL_dpsi0 = dL_dpsi0_gpu + dL_dpsi1 = dL_dpsi1_gpu + else: + dL_dpsi0 = dL_dpsi0_gpu.get() + dL_dpsi1 = dL_dpsi1_gpu.get() + if uncertain_inputs: + if kern.useGPU: + dL_dpsi2 = dL_dpsi2_gpu + else: + dL_dpsi2 = dL_dpsi2_gpu.get() + if het_noise: + dL_dthetaL = dL_dthetaL_gpu.get() + else: + dL_dthetaL = gpuarray.sum(dL_dthetaL_gpu).get() + if uncertain_inputs: + grad_dict = {'dL_dpsi0':dL_dpsi0, + 'dL_dpsi1':dL_dpsi1, + 'dL_dpsi2':dL_dpsi2, + 'dL_dthetaL':dL_dthetaL} + else: + grad_dict = {'dL_dKdiag':dL_dpsi0, + 'dL_dKnm':dL_dpsi1, + 'dL_dthetaL':dL_dthetaL} + + return isEnd, (n_start,n_end), grad_dict + diff --git a/GPy/inference/latent_function_inference/var_dtc_parallel.py b/GPy/inference/latent_function_inference/var_dtc_parallel.py index bb69b88d..87236e2a 100644 --- a/GPy/inference/latent_function_inference/var_dtc_parallel.py +++ b/GPy/inference/latent_function_inference/var_dtc_parallel.py @@ -279,4 +279,55 @@ class VarDTC_minibatch(object): 'dL_dthetaL':dL_dthetaL} return isEnd, (n_start,n_end), grad_dict + + +def update_gradients(model): + model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) + het_noise = model.likelihood.variance.size > 1 + + if het_noise: + dL_dthetaL = np.empty((model.Y.shape[0],)) + else: + dL_dthetaL = 0 + + #gradients w.r.t. kernel + model.kern.update_gradients_full(dL_dKmm, model.Z, None) + kern_grad = model.kern.gradient.copy() + + #gradients w.r.t. Z + model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z) + + isEnd = False + while not isEnd: + isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) + if isinstance(model.X, VariationalPosterior): + X_slice = model.X[n_range[0]:n_range[1]] + + #gradients w.r.t. kernel + model.kern.update_gradients_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) + kern_grad += model.kern.gradient + + #gradients w.r.t. Z + model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations( + grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=X_slice) + + #gradients w.r.t. posterior parameters of X + X_grad = model.kern.gradients_qX_expectations(variational_posterior=X_slice, Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) + model.set_X_gradients(X_slice, X_grad) + + if het_noise: + dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] + else: + dL_dthetaL += grad_dict['dL_dthetaL'] + + # Set the gradients w.r.t. kernel + model.kern.gradient = kern_grad + + # Update Log-likelihood + model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X) + # update for the KL divergence + model.variational_prior.update_gradients_KL(model.X) + + # dL_dthetaL + model.likelihood.update_gradients(dL_dthetaL) diff --git a/GPy/kern/_src/kern.py b/GPy/kern/_src/kern.py index de99bddb..f871e676 100644 --- a/GPy/kern/_src/kern.py +++ b/GPy/kern/_src/kern.py @@ -13,9 +13,10 @@ class Kern(Parameterized): #=========================================================================== # This adds input slice support. The rather ugly code for slicing can be # found in kernel_slice_operations - __metaclass__ = KernCallsViaSlicerMeta + #__metaclass__ = KernCallsViaSlicerMeta #=========================================================================== - def __init__(self, input_dim, active_dims, name, *a, **kw): + _support_GPU=False + def __init__(self, input_dim, active_dims, name, useGPU=False, *a, **kw): """ The base class for a kernel: a positive definite function which forms of a covariance function (kernel). @@ -39,6 +40,8 @@ class Kern(Parameterized): active_dim_size = len(self.active_dims) assert active_dim_size == self.input_dim, "input_dim={} does not match len(active_dim)={}, active_dims={}".format(self.input_dim, active_dim_size, self.active_dims) self._sliced_X = 0 + + self.useGPU = self._support_GPU and useGPU @Cache_this(limit=10) def _slice_X(self, X): diff --git a/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py b/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py new file mode 100644 index 00000000..f49dc52a --- /dev/null +++ b/GPy/kern/_src/psi_comp/ssrbf_psi_gpucomp.py @@ -0,0 +1,535 @@ +# Copyright (c) 2012, GPy authors (see AUTHORS.txt). +# Licensed under the BSD 3-clause license (see LICENSE.txt) + +""" +The package for the psi statistics computation on GPU +""" + +import numpy as np +from GPy.util.caching import Cache_this + +from ....util import gpu_init + +try: + import pycuda.gpuarray as gpuarray + from scikits.cuda import cublas + from pycuda.reduction import ReductionKernel + from pycuda.elementwise import ElementwiseKernel + from ....util import linalg_gpu + + + # The kernel form computing psi1 het_noise + comp_psi1 = ElementwiseKernel( + "double *psi1, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q", + "psi1[i] = comp_psi1_element(var, l, Z, mu, S, logGamma, log1Gamma, logpsi1denom, N, M, Q, i)", + "comp_psi1", + preamble=""" + #define IDX_NMQ(n,m,q) ((q*M+m)*N+n) + #define IDX_NQ(n,q) (q*N+n) + #define IDX_MQ(m,q) (q*M+m) + #define LOGEXPSUM(a,b) (a>=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b))) + + __device__ double comp_psi1_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx) + { + int n = idx%N; + int m = idx/N; + double psi1_exp=0; + for(int q=0;q=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b))) + + __device__ double comp_psi2_element(double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx) + { + // psi2 (n,m1,m2) + int m2 = idx/(M*N); + int m1 = (idx%(M*N))/N; + int n = idx%N; + + double psi2_exp=0; + for(int q=0;q=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b))) + + __device__ double comp_dpsi1_dvar_element(double *psi1_neq, double *psi1exp1, double *psi1exp2, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi1denom, int N, int M, int Q, int idx) + { + int n = idx%N; + int m = idx/N; + + double psi1_sum = 0; + for(int q=0;q=b?a+log(1.0+exp(b-a)):b+log(1.0+exp(a-b))) + + __device__ double comp_dpsi2_dvar_element(double *psi2_neq, double *psi2exp1, double *psi2exp2, double var, double *l, double *Z, double *mu, double *S, double *logGamma, double *log1Gamma, double *logpsi2denom, int N, int M, int Q, int idx) + { + // psi2 (n,m1,m2) + int m2 = idx/(M*N); + int m1 = (idx%(M*N))/N; + int n = idx%N; + + double psi2_sum=0; + for(int q=0;q reallocate + self._releaseMemory() + + if self.gpuCacheAll == None: + self.gpuCacheAll = { + 'l_gpu' :gpuarray.empty((Q,),np.float64,order='F'), + 'Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'), + 'mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'logGamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'log1Gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'logpsi1denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'logpsi2denom_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'psi0_gpu' :gpuarray.empty((N,),np.float64,order='F'), + 'psi1_gpu' :gpuarray.empty((N,M),np.float64,order='F'), + 'psi2_gpu' :gpuarray.empty((N,M,M),np.float64,order='F'), + # derivatives psi1 + 'psi1_neq_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + 'psi1exp1_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + 'psi1exp2_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + 'dpsi1_dvar_gpu' :gpuarray.empty((N,M),np.float64, order='F'), + 'dpsi1_dl_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + 'dpsi1_dZ_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + 'dpsi1_dgamma_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + 'dpsi1_dmu_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + 'dpsi1_dS_gpu' :gpuarray.empty((N,M,Q),np.float64, order='F'), + # derivatives psi2 + 'psi2_neq_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), + 'psi2exp1_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), + 'psi2exp2_gpu' :gpuarray.empty((M,M,Q),np.float64, order='F'), + 'dpsi2_dvar_gpu' :gpuarray.empty((N,M,M),np.float64, order='F'), + 'dpsi2_dl_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), + 'dpsi2_dZ_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), + 'dpsi2_dgamma_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), + 'dpsi2_dmu_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), + 'dpsi2_dS_gpu' :gpuarray.empty((N,M,M,Q),np.float64, order='F'), + # gradients + 'grad_l_gpu' :gpuarray.empty((Q,),np.float64,order='F'), + 'grad_Z_gpu' :gpuarray.empty((M,Q),np.float64,order='F'), + 'grad_mu_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'grad_S_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + 'grad_gamma_gpu' :gpuarray.empty((N,Q),np.float64,order='F'), + } + self.gpuCache = self.gpuCacheAll + elif self.gpuCacheAll['mu_gpu'].shape[0]==N: + self.gpuCache = self.gpuCacheAll + else: + # remap to a smaller cache + self.gpuCache = self.gpuCacheAll.copy() + Nlist=['mu_gpu','S_gpu','gamma_gpu','logGamma_gpu','log1Gamma_gpu','logpsi1denom_gpu','logpsi2denom_gpu','psi0_gpu','psi1_gpu','psi2_gpu', + 'psi1_neq_gpu','psi1exp1_gpu','psi1exp2_gpu','dpsi1_dvar_gpu','dpsi1_dl_gpu','dpsi1_dZ_gpu','dpsi1_dgamma_gpu','dpsi1_dmu_gpu', + 'dpsi1_dS_gpu','psi2_neq_gpu','psi2exp1_gpu','dpsi2_dvar_gpu','dpsi2_dl_gpu','dpsi2_dZ_gpu','dpsi2_dgamma_gpu','dpsi2_dmu_gpu','dpsi2_dS_gpu','grad_mu_gpu','grad_S_gpu','grad_gamma_gpu',] + oldN = self.gpuCacheAll['mu_gpu'].shape[0] + for v in Nlist: + u = self.gpuCacheAll[v] + self.gpuCache[v] = u.ravel()[:u.size/oldN*N].reshape(*((N,)+u.shape[1:])) + + def _releaseMemory(self): + if self.gpuCacheAll!=None: + [v.gpudata.free() for v in self.gpuCacheAll.values()] + self.gpuCacheAll = None + self.gpuCache = None + + def estimateMemoryOccupation(self, N, M, Q): + """ + Estimate the best batch size. + N - the number of total datapoints + M - the number of inducing points + Q - the number of hidden (input) dimensions + return: the constant memory size, the memory occupation of batchsize=1 + unit: GB + """ + return (2.*Q+2.*M*Q+M*M*Q)*8./1024./1024./1024., (1.+2.*M+10.*Q+2.*M*M+8.*M*Q+7.*M*M*Q)*8./1024./1024./1024. + + @Cache_this(limit=1,ignore_args=(0,)) + def psicomputations(self, variance, lengthscale, Z, mu, S, gamma): + """Compute Psi statitsitcs""" + if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1: + ARD = True + else: + ARD = False + + N = mu.shape[0] + M = Z.shape[0] + Q = mu.shape[1] + + self._initGPUCache(N,M,Q) + l_gpu = self.gpuCache['l_gpu'] + Z_gpu = self.gpuCache['Z_gpu'] + mu_gpu = self.gpuCache['mu_gpu'] + S_gpu = self.gpuCache['S_gpu'] + gamma_gpu = self.gpuCache['gamma_gpu'] + logGamma_gpu = self.gpuCache['logGamma_gpu'] + log1Gamma_gpu = self.gpuCache['log1Gamma_gpu'] + logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu'] + logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu'] + psi0_gpu = self.gpuCache['psi0_gpu'] + psi1_gpu = self.gpuCache['psi1_gpu'] + psi2_gpu = self.gpuCache['psi2_gpu'] + + if ARD: + l_gpu.set(np.asfortranarray(lengthscale**2)) + else: + l_gpu.fill(lengthscale*lengthscale) + Z_gpu.set(np.asfortranarray(Z)) + mu_gpu.set(np.asfortranarray(mu)) + S_gpu.set(np.asfortranarray(S)) + gamma_gpu.set(np.asfortranarray(gamma)) + linalg_gpu.log(gamma_gpu,logGamma_gpu) + linalg_gpu.logOne(gamma_gpu,log1Gamma_gpu) + comp_logpsidenom(logpsi1denom_gpu, S_gpu,l_gpu,1.0,N) + comp_logpsidenom(logpsi2denom_gpu, S_gpu,l_gpu,2.0,N) + + psi0_gpu.fill(variance) + comp_psi1(psi1_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q) + comp_psi2(psi2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q) + + return psi0_gpu, psi1_gpu, psi2_gpu + + @Cache_this(limit=1,ignore_args=(0,)) + def _psiDercomputations(self, variance, lengthscale, Z, mu, S, gamma): + """Compute the derivatives w.r.t. Psi statistics""" + N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1] + + self._initGPUCache(N,M,Q) + l_gpu = self.gpuCache['l_gpu'] + Z_gpu = self.gpuCache['Z_gpu'] + mu_gpu = self.gpuCache['mu_gpu'] + S_gpu = self.gpuCache['S_gpu'] + gamma_gpu = self.gpuCache['gamma_gpu'] + logGamma_gpu = self.gpuCache['logGamma_gpu'] + log1Gamma_gpu = self.gpuCache['log1Gamma_gpu'] + logpsi1denom_gpu = self.gpuCache['logpsi1denom_gpu'] + logpsi2denom_gpu = self.gpuCache['logpsi2denom_gpu'] + + psi1_neq_gpu = self.gpuCache['psi1_neq_gpu'] + psi1exp1_gpu = self.gpuCache['psi1exp1_gpu'] + psi1exp2_gpu = self.gpuCache['psi1exp2_gpu'] + dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu'] + dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu'] + dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu'] + dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu'] + dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu'] + dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu'] + + psi2_neq_gpu = self.gpuCache['psi2_neq_gpu'] + psi2exp1_gpu = self.gpuCache['psi2exp1_gpu'] + psi2exp2_gpu = self.gpuCache['psi2exp2_gpu'] + dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu'] + dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu'] + dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu'] + dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu'] + dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu'] + dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu'] + + #========================================================================================================== + # Assuming the l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, logGamma_gpu, log1Gamma_gpu, + # logpsi1denom_gpu, logpsi2denom_gpu has been synchonized. + #========================================================================================================== + + # psi1 derivatives + comp_dpsi1_dvar(dpsi1_dvar_gpu, psi1_neq_gpu, psi1exp1_gpu,psi1exp2_gpu, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi1denom_gpu, N, M, Q) + comp_psi1_der(dpsi1_dl_gpu,dpsi1_dmu_gpu,dpsi1_dS_gpu,dpsi1_dgamma_gpu, dpsi1_dZ_gpu, psi1_neq_gpu,psi1exp1_gpu,psi1exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q) + + # psi2 derivatives + comp_dpsi2_dvar(dpsi2_dvar_gpu, psi2_neq_gpu, psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, logGamma_gpu, log1Gamma_gpu, logpsi2denom_gpu, N, M, Q) + comp_psi2_der(dpsi2_dl_gpu,dpsi2_dmu_gpu,dpsi2_dS_gpu,dpsi2_dgamma_gpu, dpsi2_dZ_gpu, psi2_neq_gpu,psi2exp1_gpu,psi2exp2_gpu, variance, l_gpu, Z_gpu, mu_gpu, S_gpu, gamma_gpu, N, M, Q) + + def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob + self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma) + N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1] + + if isinstance(lengthscale, np.ndarray) and len(lengthscale)>1: + ARD = True + else: + ARD = False + + dpsi1_dvar_gpu = self.gpuCache['dpsi1_dvar_gpu'] + dpsi2_dvar_gpu = self.gpuCache['dpsi2_dvar_gpu'] + dpsi1_dl_gpu = self.gpuCache['dpsi1_dl_gpu'] + dpsi2_dl_gpu = self.gpuCache['dpsi2_dl_gpu'] + psi1_comb_gpu = self.gpuCache['psi1_neq_gpu'] + psi2_comb_gpu = self.gpuCache['psi2_neq_gpu'] + grad_l_gpu = self.gpuCache['grad_l_gpu'] + + # variance + variance.gradient = gpuarray.sum(dL_dpsi0).get() \ + + cublas.cublasDdot(self.cublas_handle, dL_dpsi1.size, dL_dpsi1.gpudata, 1, dpsi1_dvar_gpu.gpudata, 1) \ + + cublas.cublasDdot(self.cublas_handle, dL_dpsi2.size, dL_dpsi2.gpudata, 1, dpsi2_dvar_gpu.gpudata, 1) + + # lengscale + if ARD: + grad_l_gpu.fill(0.) + linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size) + linalg_gpu.sum_axis(grad_l_gpu, psi1_comb_gpu, 1, N*M) + linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size) + linalg_gpu.sum_axis(grad_l_gpu, psi2_comb_gpu, 1, N*M*M) + lengthscale.gradient = grad_l_gpu.get() + else: + linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dl_gpu, dL_dpsi1.size) + linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dl_gpu, dL_dpsi2.size) + lengthscale.gradient = gpuarray.sum(psi1_comb_gpu).get() + gpuarray.sum(psi2_comb_gpu).get() + + def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob + self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma) + N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1] + + dpsi1_dZ_gpu = self.gpuCache['dpsi1_dZ_gpu'] + dpsi2_dZ_gpu = self.gpuCache['dpsi2_dZ_gpu'] + psi1_comb_gpu = self.gpuCache['psi1_neq_gpu'] + psi2_comb_gpu = self.gpuCache['psi2_neq_gpu'] + grad_Z_gpu = self.gpuCache['grad_Z_gpu'] + + grad_Z_gpu.fill(0.) + linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dZ_gpu, dL_dpsi1.size) + linalg_gpu.sum_axis(grad_Z_gpu, psi1_comb_gpu, 1, N) + linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dZ_gpu, dL_dpsi2.size) + linalg_gpu.sum_axis(grad_Z_gpu, psi2_comb_gpu, 1, N*M) + return grad_Z_gpu.get() + + def gradients_qX_expectations(self, dL_dpsi1, dL_dpsi2, variance, lengthscale, Z, variational_posterior): + mu = variational_posterior.mean + S = variational_posterior.variance + gamma = variational_posterior.binary_prob + self._psiDercomputations(variance, lengthscale, Z, mu, S, gamma) + N, M, Q = mu.shape[0],Z.shape[0], mu.shape[1] + + dpsi1_dmu_gpu = self.gpuCache['dpsi1_dmu_gpu'] + dpsi2_dmu_gpu = self.gpuCache['dpsi2_dmu_gpu'] + dpsi1_dS_gpu = self.gpuCache['dpsi1_dS_gpu'] + dpsi2_dS_gpu = self.gpuCache['dpsi2_dS_gpu'] + dpsi1_dgamma_gpu = self.gpuCache['dpsi1_dgamma_gpu'] + dpsi2_dgamma_gpu = self.gpuCache['dpsi2_dgamma_gpu'] + psi1_comb_gpu = self.gpuCache['psi1_neq_gpu'] + psi2_comb_gpu = self.gpuCache['psi2_neq_gpu'] + grad_mu_gpu = self.gpuCache['grad_mu_gpu'] + grad_S_gpu = self.gpuCache['grad_S_gpu'] + grad_gamma_gpu = self.gpuCache['grad_gamma_gpu'] + + # mu gradients + grad_mu_gpu.fill(0.) + linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dmu_gpu, dL_dpsi1.size) + linalg_gpu.sum_axis(grad_mu_gpu, psi1_comb_gpu, N, M) + linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dmu_gpu, dL_dpsi2.size) + linalg_gpu.sum_axis(grad_mu_gpu, psi2_comb_gpu, N, M*M) + + # S gradients + grad_S_gpu.fill(0.) + linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dS_gpu, dL_dpsi1.size) + linalg_gpu.sum_axis(grad_S_gpu, psi1_comb_gpu, N, M) + linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dS_gpu, dL_dpsi2.size) + linalg_gpu.sum_axis(grad_S_gpu, psi2_comb_gpu, N, M*M) + + # gamma gradients + grad_gamma_gpu.fill(0.) + linalg_gpu.mul_bcast(psi1_comb_gpu, dL_dpsi1, dpsi1_dgamma_gpu, dL_dpsi1.size) + linalg_gpu.sum_axis(grad_gamma_gpu, psi1_comb_gpu, N, M) + linalg_gpu.mul_bcast(psi2_comb_gpu, dL_dpsi2, dpsi2_dgamma_gpu, dL_dpsi2.size) + linalg_gpu.sum_axis(grad_gamma_gpu, psi2_comb_gpu, N, M*M) + + return grad_mu_gpu.get(), grad_S_gpu.get(), grad_gamma_gpu.get() diff --git a/GPy/kern/_src/rbf.py b/GPy/kern/_src/rbf.py index c5914d58..e0071fb9 100644 --- a/GPy/kern/_src/rbf.py +++ b/GPy/kern/_src/rbf.py @@ -9,6 +9,7 @@ from stationary import Stationary from GPy.util.caching import Cache_this from ...core.parameterization import variational from psi_comp import ssrbf_psi_comp +from psi_comp.ssrbf_psi_gpucomp import PSICOMP_SSRBF class RBF(Stationary): """ @@ -19,9 +20,15 @@ class RBF(Stationary): k(r) = \sigma^2 \exp \\bigg(- \\frac{1}{2} r^2 \\bigg) """ - def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf'): - super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name) + _support_GPU = True + def __init__(self, input_dim, variance=1., lengthscale=None, ARD=False, active_dims=None, name='rbf', useGPU=False): + super(RBF, self).__init__(input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=useGPU) self.weave_options = {} + self.group_spike_prob = False + + if self.useGPU: + self.psicomp = PSICOMP_SSRBF() + def K_of_r(self, r): return self.variance * np.exp(-0.5 * r**2) @@ -34,18 +41,28 @@ class RBF(Stationary): #---------------------------------------# def psi0(self, Z, variational_posterior): - return self.Kdiag(variational_posterior.mean) + if self.useGPU: + if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[0] + else: + return self.Kdiag(variational_posterior.mean) def psi1(self, Z, variational_posterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + if self.useGPU: + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[1] + else: + psi1, _, _, _, _, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) else: _, _, _, psi1 = self._psi1computations(Z, variational_posterior) return psi1 def psi2(self, Z, variational_posterior): if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + if self.useGPU: + return self.psicomp.psicomputations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob)[2] + else: + psi2, _, _, _, _, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) else: _, _, _, _, psi2 = self._psi2computations(Z, variational_posterior) return psi2 @@ -53,26 +70,30 @@ class RBF(Stationary): def update_gradients_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): # Spike-and-Slab GPLVM if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - #contributions from psi0: - self.variance.gradient = np.sum(dL_dpsi0) - - #from psi1 - self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) - if self.ARD: - self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) + if self.useGPU: + self.psicomp.update_gradients_expectations(dL_dpsi0, dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) else: - self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() - - #from psi2 - self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() - if self.ARD: - self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) - else: - self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() - + + _, _dpsi1_dvariance, _, _, _, _, _dpsi1_dlengthscale = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _dpsi2_dvariance, _, _, _, _, _dpsi2_dlengthscale = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + #contributions from psi0: + self.variance.gradient = np.sum(dL_dpsi0) + + #from psi1 + self.variance.gradient += np.sum(dL_dpsi1 * _dpsi1_dvariance) + if self.ARD: + self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) + else: + self.lengthscale.gradient = (dL_dpsi1[:,:,None]*_dpsi1_dlengthscale).sum() + + #from psi2 + self.variance.gradient += (dL_dpsi2 * _dpsi2_dvariance).sum() + if self.ARD: + self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).reshape(-1,self.input_dim).sum(axis=0) + else: + self.lengthscale.gradient += (dL_dpsi2[:,:,:,None] * _dpsi2_dlengthscale).sum() + elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale**2 if l2.size != self.input_dim: @@ -107,16 +128,19 @@ class RBF(Stationary): def gradients_Z_expectations(self, dL_dpsi1, dL_dpsi2, Z, variational_posterior): # Spike-and-Slab GPLVM if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - #psi1 - grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) - - #psi2 - grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) - - return grad + if self.useGPU: + return self.psicomp.gradients_Z_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + else: + _, _, _, _, _, _dpsi1_dZ, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _, _, _, _, _dpsi2_dZ, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + #psi1 + grad = (dL_dpsi1[:, :, None] * _dpsi1_dZ).sum(axis=0) + + #psi2 + grad += (dL_dpsi2[:, :, :, None] * _dpsi2_dZ).sum(axis=0).sum(axis=1) + + return grad elif isinstance(variational_posterior, variational.NormalPosterior): l2 = self.lengthscale **2 @@ -140,22 +164,28 @@ class RBF(Stationary): def gradients_qX_expectations(self, dL_dpsi0, dL_dpsi1, dL_dpsi2, Z, variational_posterior): # Spike-and-Slab GPLVM if isinstance(variational_posterior, variational.SpikeAndSlabPosterior): - ndata = variational_posterior.mean.shape[0] - - _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) - - #psi1 - grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) - grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) - grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) - - #psi2 - grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) - grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) - grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) - - return grad_mu, grad_S, grad_gamma + if self.useGPU: + return self.psicomp.gradients_qX_expectations(dL_dpsi1, dL_dpsi2, self.variance, self.lengthscale, Z, variational_posterior) + else: + ndata = variational_posterior.mean.shape[0] + + _, _, _dpsi1_dgamma, _dpsi1_dmu, _dpsi1_dS, _, _ = ssrbf_psi_comp._psi1computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + _, _, _dpsi2_dgamma, _dpsi2_dmu, _dpsi2_dS, _, _ = ssrbf_psi_comp._psi2computations(self.variance, self.lengthscale, Z, variational_posterior.mean, variational_posterior.variance, variational_posterior.binary_prob) + + #psi1 + grad_mu = (dL_dpsi1[:, :, None] * _dpsi1_dmu).sum(axis=1) + grad_S = (dL_dpsi1[:, :, None] * _dpsi1_dS).sum(axis=1) + grad_gamma = (dL_dpsi1[:,:,None] * _dpsi1_dgamma).sum(axis=1) + + #psi2 + grad_mu += (dL_dpsi2[:, :, :, None] * _dpsi2_dmu).reshape(ndata,-1,self.input_dim).sum(axis=1) + grad_S += (dL_dpsi2[:, :, :, None] * _dpsi2_dS).reshape(ndata,-1,self.input_dim).sum(axis=1) + grad_gamma += (dL_dpsi2[:,:,:, None] * _dpsi2_dgamma).reshape(ndata,-1,self.input_dim).sum(axis=1) + + if self.group_spike_prob: + grad_gamma[:] = grad_gamma.mean(axis=0) + + return grad_mu, grad_S, grad_gamma elif isinstance(variational_posterior, variational.NormalPosterior): diff --git a/GPy/kern/_src/stationary.py b/GPy/kern/_src/stationary.py index b6fea5ef..37acbf2d 100644 --- a/GPy/kern/_src/stationary.py +++ b/GPy/kern/_src/stationary.py @@ -41,8 +41,8 @@ class Stationary(Kern): """ - def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name): - super(Stationary, self).__init__(input_dim, active_dims, name) + def __init__(self, input_dim, variance, lengthscale, ARD, active_dims, name, useGPU=False): + super(Stationary, self).__init__(input_dim, active_dims, name,useGPU=useGPU) self.ARD = ARD if not ARD: if lengthscale is None: diff --git a/GPy/likelihoods/__init__.py b/GPy/likelihoods/__init__.py index 5d5d692a..d7ad5753 100644 --- a/GPy/likelihoods/__init__.py +++ b/GPy/likelihoods/__init__.py @@ -15,8 +15,8 @@ except ImportError: if sympy_available: # These are likelihoods that rely on symbolic. from symbolic import Symbolic - from sstudent_t import SstudentT + #from sstudent_t import SstudentT from negative_binomial import Negative_binomial - from skew_normal import Skew_normal - from skew_exponential import Skew_exponential - from null_category import Null_category + #from skew_normal import Skew_normal + #from skew_exponential import Skew_exponential + #from null_category import Null_category diff --git a/GPy/models/bayesian_gplvm.py b/GPy/models/bayesian_gplvm.py index d623c8f1..03cd361c 100644 --- a/GPy/models/bayesian_gplvm.py +++ b/GPy/models/bayesian_gplvm.py @@ -7,7 +7,9 @@ from ..core import SparseGP from ..likelihoods import Gaussian from ..inference.optimization import SCG from ..util import linalg -from ..core.parameterization.variational import NormalPosterior, NormalPrior,VariationalPosterior +from ..core.parameterization.variational import NormalPosterior, NormalPrior, VariationalPosterior +from ..inference.latent_function_inference.var_dtc_parallel import update_gradients +from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU class BayesianGPLVM(SparseGP): """ @@ -65,6 +67,10 @@ class BayesianGPLVM(SparseGP): X.mean.gradient, X.variance.gradient = X_grad def parameters_changed(self): + if isinstance(self.inference_method, VarDTC_GPU): + update_gradients(self) + return + super(BayesianGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) @@ -153,57 +159,7 @@ class BayesianGPLVM(SparseGP): from ..plotting.matplot_dep import dim_reduction_plots return dim_reduction_plots.plot_steepest_gradient_map(self,*args,**kwargs) - - -def update_gradients(model): - model._log_marginal_likelihood, dL_dKmm, model.posterior = model.inference_method.inference_likelihood(model.kern, model.X, model.Z, model.likelihood, model.Y) - het_noise = model.likelihood.variance.size > 1 - - if het_noise: - dL_dthetaL = np.empty((model.Y.shape[0],)) - else: - dL_dthetaL = 0 - - #gradients w.r.t. kernel - model.kern.update_gradients_full(dL_dKmm, model.Z, None) - kern_grad = model.kern.gradient.copy() - - #gradients w.r.t. Z - model.Z.gradient[:,model.kern.active_dims] = model.kern.gradients_X(dL_dKmm, model.Z) - - isEnd = False - while not isEnd: - isEnd, n_range, grad_dict = model.inference_method.inference_minibatch(model.kern, model.X, model.Z, model.likelihood, model.Y) - if isinstance(model.X, VariationalPosterior): - - #gradients w.r.t. kernel - model.kern.update_gradients_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) - kern_grad += model.kern.gradient - - #gradients w.r.t. Z - model.Z.gradient[:,model.kern.active_dims] += model.kern.gradients_Z_expectations( - grad_dict['dL_dpsi1'], grad_dict['dL_dpsi2'], Z=model.Z, variational_posterior=model.X[n_range[0]:n_range[1]]) - - #gradients w.r.t. posterior parameters of X - X_grad = model.kern.gradients_qX_expectations(variational_posterior=model.X[n_range[0]:n_range[1]], Z=model.Z, dL_dpsi0=grad_dict['dL_dpsi0'], dL_dpsi1=grad_dict['dL_dpsi1'], dL_dpsi2=grad_dict['dL_dpsi2']) - model.set_X_gradients(model.X[n_range[0]:n_range[1]], X_grad) - - if het_noise: - dL_dthetaL[n_range[0]:n_range[1]] = grad_dict['dL_dthetaL'] - else: - dL_dthetaL += grad_dict['dL_dthetaL'] - - # Set the gradients w.r.t. kernel - model.kern.gradient = kern_grad - - # Update Log-likelihood - model._log_marginal_likelihood -= model.variational_prior.KL_divergence(model.X) - # update for the KL divergence - model.variational_prior.update_gradients_KL(model.X) - - # dL_dthetaL - model.likelihood.update_gradients(dL_dthetaL) def latent_cost_and_grad(mu_S, kern, Z, dL_dpsi0, dL_dpsi1, dL_dpsi2): """ diff --git a/GPy/models/ss_gplvm.py b/GPy/models/ss_gplvm.py index 1c2ecf4c..57be302a 100644 --- a/GPy/models/ss_gplvm.py +++ b/GPy/models/ss_gplvm.py @@ -11,6 +11,9 @@ from ..likelihoods import Gaussian from ..inference.optimization import SCG from ..util import linalg from ..core.parameterization.variational import SpikeAndSlabPrior, SpikeAndSlabPosterior +from ..inference.latent_function_inference.var_dtc_parallel import update_gradients +from ..inference.latent_function_inference.var_dtc_gpu import VarDTC_GPU + class SSGPLVM(SparseGP): """ @@ -25,11 +28,14 @@ class SSGPLVM(SparseGP): """ def __init__(self, Y, input_dim, X=None, X_variance=None, init='PCA', num_inducing=10, - Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', **kwargs): + Z=None, kernel=None, inference_method=None, likelihood=None, name='Spike-and-Slab GPLVM', group_spike=False, **kwargs): - if X == None: # The mean of variational approximation (mu) + if X == None: from ..util.initialization import initialize_latent - X = initialize_latent(init, input_dim, Y) + X, fracs = initialize_latent(init, input_dim, Y) + else: + fracs = np.ones(input_dim) + self.init = init if X_variance is None: # The variance of the variational approximation (S) @@ -38,6 +44,9 @@ class SSGPLVM(SparseGP): gamma = np.empty_like(X) # The posterior probabilities of the binary variable in the variational approximation gamma[:] = 0.5 + 0.01 * np.random.randn(X.shape[0], input_dim) + if group_spike: + gamma[:] = gamma.mean(axis=0) + if Z is None: Z = np.random.permutation(X.copy())[:num_inducing] assert Z.shape[1] == X.shape[1] @@ -46,18 +55,31 @@ class SSGPLVM(SparseGP): likelihood = Gaussian() if kernel is None: - kernel = kern.SSRBF(input_dim) - + kernel = kern.RBF(input_dim, lengthscale=fracs, ARD=True) # + kern.white(input_dim) + pi = np.empty((input_dim)) pi[:] = 0.5 self.variational_prior = SpikeAndSlabPrior(pi=pi) # the prior probability of the latent binary variable b X = SpikeAndSlabPosterior(X, X_variance, gamma) + + if group_spike: + kernel.group_spike_prob = True + self.variational_prior.group_spike_prob = True + SparseGP.__init__(self, X, Y, Z, kernel, likelihood, inference_method, name, **kwargs) self.add_parameter(self.X, index=0) self.add_parameter(self.variational_prior) + + def set_X_gradients(self, X, X_grad): + """Set the gradients of the posterior distribution of X in its specific form.""" + X.mean.gradient, X.variance.gradient, X.binary_prob.gradient = X_grad def parameters_changed(self): + if isinstance(self.inference_method, VarDTC_GPU): + update_gradients(self) + return + super(SSGPLVM, self).parameters_changed() self._log_marginal_likelihood -= self.variational_prior.KL_divergence(self.X) diff --git a/GPy/plotting/matplot_dep/__init__.py b/GPy/plotting/matplot_dep/__init__.py index e2706903..f493513a 100644 --- a/GPy/plotting/matplot_dep/__init__.py +++ b/GPy/plotting/matplot_dep/__init__.py @@ -15,3 +15,5 @@ import latent_space_visualizations import netpbmfile import inference_plots import maps +import img_plots +from ssgplvm import SSGPLVM_plot diff --git a/GPy/plotting/matplot_dep/img_plots.py b/GPy/plotting/matplot_dep/img_plots.py new file mode 100644 index 00000000..21dbd64f --- /dev/null +++ b/GPy/plotting/matplot_dep/img_plots.py @@ -0,0 +1,56 @@ +""" +The module contains the tools for ploting 2D image visualizations +""" + +import numpy as np +from matplotlib.cm import jet + +width_max = 15 +height_max = 12 + +def _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad): + width = (x_size*fig_ncols+pad*(fig_ncols-1)) + height = (y_size*fig_nrows+pad*(fig_nrows-1)) + if width > float(height)/height_max*width_max: + return (width_max, float(width_max)/width*height) + else: + return (float(height_max)/height*width, height_max) + +def plot_2D_images(figure, arr, symmetric=False, pad=None, zoom=None, mode=None, interpolation='nearest'): + ax = figure.add_subplot(111) + if len(arr.shape)==2: + arr = arr.reshape(*((1,)+arr.shape)) + fig_num = arr.shape[0] + y_size = arr.shape[1] + x_size = arr.shape[2] + fig_ncols = int(np.ceil(np.sqrt(fig_num))) + fig_nrows = int(np.ceil((float)(fig_num)/fig_ncols)) + if pad==None: + pad = max(int(min(y_size,x_size)/10),1) + + figsize = _calculateFigureSize(x_size, y_size, fig_ncols, fig_nrows, pad) + #figure.set_size_inches(figsize,forward=True) + #figure.subplots_adjust(left=0.05, bottom=0.05, right=0.95, top=0.95) + + if symmetric: + # symmetric around zero: fix zero as the middle color + mval = max(abs(arr.max()),abs(arr.min())) + arr = arr/(2.*mval)+0.5 + else: + minval,maxval = arr.min(),arr.max() + arr = (arr-minval)/(maxval-minval) + + if mode=='L': + arr_color = np.empty(arr.shape+(3,)) + arr_color[:] = arr.reshape(*(arr.shape+(1,))) + elif mode==None or mode=='jet': + arr_color = jet(arr) + + buf = np.ones((y_size*fig_nrows+pad*(fig_nrows-1), x_size*fig_ncols+pad*(fig_ncols-1), 3),dtype=arr.dtype) + + for y in xrange(fig_nrows): + for x in xrange(fig_ncols): + if y*fig_ncols+xnab',m1,m2) a=dim1, b=dim2 ) + join_prod = ElementwiseKernel("double *out, double *m1, double *m2, int dim1, int dim2", "out[i] = m1[(i%dim1)*dim1+(i%(dim1*dim2))/dim1]*m2[(i%dim1)*dim1+i/(dim1*dim2)]", "join_prod") + +except: + pass