aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/stream_executor_pimpl.h
diff options
context:
space:
mode:
authorGravatar Peter Hawkins <phawkins@google.com>2016-11-29 18:55:46 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-11-29 19:05:55 -0800
commitbada4a5339d4567419e993b7736eb23a6f3535c4 (patch)
treed793b3a70518a2ab63fbc24c0e41d72eb7d7d97f /tensorflow/stream_executor/stream_executor_pimpl.h
parent347d3ef2a871d8212f39c7ea2b7defe63468dfbc (diff)
StreamExecutor: Optimize kernel argument packing
Create a single class to hold all kernel arguments and optimize how they are added into this class. Change: 140556725
Diffstat (limited to 'tensorflow/stream_executor/stream_executor_pimpl.h')
-rw-r--r--tensorflow/stream_executor/stream_executor_pimpl.h10
1 files changed, 3 insertions, 7 deletions
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index 2b5a70f807..83fd27599e 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -392,7 +392,7 @@ class StreamExecutor {
// implementation in StreamExecutorInterface::Launch().
bool Launch(Stream *stream, const ThreadDim &thread_dims,
const BlockDim &block_dims, const KernelBase &kernel,
- const std::vector<KernelArg> &args);
+ const KernelArgsArrayBase &args);
// Gets-or-creates (creates with memoization) a FftSupport datatype that can
// be used to execute FFT routines on the current platform.
@@ -427,10 +427,6 @@ class StreamExecutor {
// previously registered.
bool UnregisterTraceListener(TraceListener* listener);
- // Converts a DeviceMemory object into a KernelArg object for passing to the
- // device driver for kernel launch.
- KernelArg DeviceMemoryToKernelArg(const DeviceMemoryBase &gpu_mem) const;
-
private:
template <typename BeginCallT, typename CompleteCallT,
typename ReturnT, typename... BeginArgsT>
@@ -758,9 +754,9 @@ inline Stream &Stream::ThenLaunch(ThreadDim thread_dims, BlockDim block_dims,
// we pack the variadic parameters passed as ...args into the desired
// tuple form and pass that packed form to the StreamExecutor::Launch()
// implementation.
- std::vector<KernelArg> kernel_args;
- kernel_args.reserve(kernel.Arity());
+ KernelArgsArray<sizeof...(args)> kernel_args;
kernel.PackParams(&kernel_args, args...);
+ DCHECK(parent_ != nullptr);
bool ok =
parent_->Launch(this, thread_dims, block_dims, kernel, kernel_args);
if (!ok) {