aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/stream_executor.h
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/stream_executor/stream_executor.h')
-rw-r--r--tensorflow/stream_executor/stream_executor.h50
1 files changed, 50 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/stream_executor.h b/tensorflow/stream_executor/stream_executor.h
new file mode 100644
index 0000000000..3bccaec5e3
--- /dev/null
+++ b/tensorflow/stream_executor/stream_executor.h
@@ -0,0 +1,50 @@
+// The StreamExecutor is a single-device abstraction for:
+//
+// * Loading/launching data-parallel-kernels
+// * Invoking pre-canned high-performance library routines (like matrix
+// multiply)
+//
+// The appropriately-typed kernel and "loader spec" are automatically generated
+// for the user within a namespace by the gcudacc compiler output, so typical
+// use looks like so:
+//
+// namespace gpu = ::perftools::gputools;
+// namespace gcudacc = ::platforms::gpus::gcudacc;
+//
+// gpu::StreamExecutor stream_exec{PlatformKind::kCuda};
+// gcudacc::kernel::MyKernel my_kernel{&stream_exec};
+// bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(),
+// &my_kernel);
+// if (!ok) { ... }
+// gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>();
+// if (result == nullptr) { ... }
+// int host_result;
+// gpu::Stream my_stream{&stream_exec};
+// my_stream
+// .Init()
+// .ThenLaunch(ThreadDim{1024}, BlockDim{1}, my_kernel, result)
+// .ThenMemcpy(&host_result, result, sizeof(host_result))
+// .BlockHostUntilDone()
+// if (!my_stream.ok()) { ... }
+// printf("%d\n", host_result);
+//
+// Since the device may operate asynchronously to the host, the
+// Stream::BlockHostUntilDone() call forces the calling host thread to wait for
+// the chain of commands specified for the Stream to complete execution.
+
+#ifndef TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_
+#define TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_
+
+#include "tensorflow/stream_executor/device_description.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/device_memory.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/device_options.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/event.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/kernel.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/kernel_spec.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/launch_dim.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/platform.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/stream.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/stream_executor_pimpl.h" // IWYU pragma: export
+#include "tensorflow/stream_executor/timer.h" // IWYU pragma: export
+
+#endif // TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_