diff options
author | Peter Hawkins <phawkins@google.com> | 2016-11-29 18:55:46 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2016-11-29 19:05:55 -0800 |
commit | bada4a5339d4567419e993b7736eb23a6f3535c4 (patch) | |
tree | d793b3a70518a2ab63fbc24c0e41d72eb7d7d97f /tensorflow/stream_executor/stream_executor_pimpl.h | |
parent | 347d3ef2a871d8212f39c7ea2b7defe63468dfbc (diff) |
StreamExecutor: Optimize kernel argument packing
Create a single class to hold all kernel arguments and optimize how they are added into this class.
Change: 140556725
Diffstat (limited to 'tensorflow/stream_executor/stream_executor_pimpl.h')
-rw-r--r-- | tensorflow/stream_executor/stream_executor_pimpl.h | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h index 2b5a70f807..83fd27599e 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.h +++ b/tensorflow/stream_executor/stream_executor_pimpl.h @@ -392,7 +392,7 @@ class StreamExecutor { // implementation in StreamExecutorInterface::Launch(). bool Launch(Stream *stream, const ThreadDim &thread_dims, const BlockDim &block_dims, const KernelBase &kernel, - const std::vector<KernelArg> &args); + const KernelArgsArrayBase &args); // Gets-or-creates (creates with memoization) a FftSupport datatype that can // be used to execute FFT routines on the current platform. @@ -427,10 +427,6 @@ class StreamExecutor { // previously registered. bool UnregisterTraceListener(TraceListener* listener); - // Converts a DeviceMemory object into a KernelArg object for passing to the - // device driver for kernel launch. - KernelArg DeviceMemoryToKernelArg(const DeviceMemoryBase &gpu_mem) const; - private: template <typename BeginCallT, typename CompleteCallT, typename ReturnT, typename... BeginArgsT> @@ -758,9 +754,9 @@ inline Stream &Stream::ThenLaunch(ThreadDim thread_dims, BlockDim block_dims, // we pack the variadic parameters passed as ...args into the desired // tuple form and pass that packed form to the StreamExecutor::Launch() // implementation. - std::vector<KernelArg> kernel_args; - kernel_args.reserve(kernel.Arity()); + KernelArgsArray<sizeof...(args)> kernel_args; kernel.PackParams(&kernel_args, args...); + DCHECK(parent_ != nullptr); bool ok = parent_->Launch(this, thread_dims, block_dims, kernel, kernel_args); if (!ok) { |