blob: 3bccaec5e3ee6c24e622f5dc7b2afb52e740603d (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
|
// The StreamExecutor is a single-device abstraction for:
//
// * Loading/launching data-parallel-kernels
// * Invoking pre-canned high-performance library routines (like matrix
// multiply)
//
// The appropriately-typed kernel and "loader spec" are automatically generated
// for the user within a namespace by the gcudacc compiler output, so typical
// use looks like so:
//
// namespace gpu = ::perftools::gputools;
// namespace gcudacc = ::platforms::gpus::gcudacc;
//
// gpu::StreamExecutor stream_exec{PlatformKind::kCuda};
// gcudacc::kernel::MyKernel my_kernel{&stream_exec};
// bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(),
// &my_kernel);
// if (!ok) { ... }
// gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>();
// if (result == nullptr) { ... }
// int host_result;
// gpu::Stream my_stream{&stream_exec};
// my_stream
// .Init()
// .ThenLaunch(ThreadDim{1024}, BlockDim{1}, my_kernel, result)
// .ThenMemcpy(&host_result, result, sizeof(host_result))
// .BlockHostUntilDone()
// if (!my_stream.ok()) { ... }
// printf("%d\n", host_result);
//
// Since the device may operate asynchronously to the host, the
// Stream::BlockHostUntilDone() call forces the calling host thread to wait for
// the chain of commands specified for the Stream to complete execution.
#ifndef TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_
#define TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_
#include "tensorflow/stream_executor/device_description.h" // IWYU pragma: export
#include "tensorflow/stream_executor/device_memory.h" // IWYU pragma: export
#include "tensorflow/stream_executor/device_options.h" // IWYU pragma: export
#include "tensorflow/stream_executor/event.h" // IWYU pragma: export
#include "tensorflow/stream_executor/kernel.h" // IWYU pragma: export
#include "tensorflow/stream_executor/kernel_spec.h" // IWYU pragma: export
#include "tensorflow/stream_executor/launch_dim.h" // IWYU pragma: export
#include "tensorflow/stream_executor/platform.h" // IWYU pragma: export
#include "tensorflow/stream_executor/stream.h" // IWYU pragma: export
#include "tensorflow/stream_executor/stream_executor_pimpl.h" // IWYU pragma: export
#include "tensorflow/stream_executor/timer.h" // IWYU pragma: export
#endif // TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_
|