diff options
Diffstat (limited to 'tensorflow/stream_executor/stream_executor.h')
-rw-r--r-- | tensorflow/stream_executor/stream_executor.h | 50 |
1 files changed, 50 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/stream_executor.h b/tensorflow/stream_executor/stream_executor.h new file mode 100644 index 0000000000..3bccaec5e3 --- /dev/null +++ b/tensorflow/stream_executor/stream_executor.h @@ -0,0 +1,50 @@ +// The StreamExecutor is a single-device abstraction for: +// +// * Loading/launching data-parallel-kernels +// * Invoking pre-canned high-performance library routines (like matrix +// multiply) +// +// The appropriately-typed kernel and "loader spec" are automatically generated +// for the user within a namespace by the gcudacc compiler output, so typical +// use looks like so: +// +// namespace gpu = ::perftools::gputools; +// namespace gcudacc = ::platforms::gpus::gcudacc; +// +// gpu::StreamExecutor stream_exec{PlatformKind::kCuda}; +// gcudacc::kernel::MyKernel my_kernel{&stream_exec}; +// bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(), +// &my_kernel); +// if (!ok) { ... } +// gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>(); +// if (result == nullptr) { ... } +// int host_result; +// gpu::Stream my_stream{&stream_exec}; +// my_stream +// .Init() +// .ThenLaunch(ThreadDim{1024}, BlockDim{1}, my_kernel, result) +// .ThenMemcpy(&host_result, result, sizeof(host_result)) +// .BlockHostUntilDone() +// if (!my_stream.ok()) { ... } +// printf("%d\n", host_result); +// +// Since the device may operate asynchronously to the host, the +// Stream::BlockHostUntilDone() call forces the calling host thread to wait for +// the chain of commands specified for the Stream to complete execution. + +#ifndef TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_ +#define TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_ + +#include "tensorflow/stream_executor/device_description.h" // IWYU pragma: export +#include "tensorflow/stream_executor/device_memory.h" // IWYU pragma: export +#include "tensorflow/stream_executor/device_options.h" // IWYU pragma: export +#include "tensorflow/stream_executor/event.h" // IWYU pragma: export +#include "tensorflow/stream_executor/kernel.h" // IWYU pragma: export +#include "tensorflow/stream_executor/kernel_spec.h" // IWYU pragma: export +#include "tensorflow/stream_executor/launch_dim.h" // IWYU pragma: export +#include "tensorflow/stream_executor/platform.h" // IWYU pragma: export +#include "tensorflow/stream_executor/stream.h" // IWYU pragma: export +#include "tensorflow/stream_executor/stream_executor_pimpl.h" // IWYU pragma: export +#include "tensorflow/stream_executor/timer.h" // IWYU pragma: export + +#endif // TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_ |