// Types to express dimensionality of a kernel launch. Blocks and threads // are (up to) 3-dimensional. // // A thread is conceptually like a SIMD lane. Some number, typically 32 // (though that fact should not be relied on) SIMD lanes are tied together with // a single PC in a unit called a warp. There is a maximum number of threads // that can execute in a shared-context entity called a block. Presently, that // number is 1024 -- again, something that should not be relied on from this // comment, but checked via perftools::gputools::DeviceDescription. // // For additional information, see // http://docs.nvidia.com/cuda/kepler-tuning-guide/#device-utilization-and-occupancy // // Because of that modest thread-per-block limit, a kernel can be launched with // multiple blocks. Each block is indivisibly scheduled onto a single core. // Blocks can also be used in a multi-dimensional configuration, and the block // count has much less modest limits -- typically they're similar to the maximum // amount of addressable memory. #ifndef TENSORFLOW_STREAM_EXECUTOR_LAUNCH_DIM_H_ #define TENSORFLOW_STREAM_EXECUTOR_LAUNCH_DIM_H_ #include "tensorflow/stream_executor/platform/port.h" #include "tensorflow/stream_executor/lib/strcat.h" #include "tensorflow/stream_executor/platform/port.h" namespace perftools { namespace gputools { // Basic type that represents a 3-dimensional index space. struct Dim3D { uint64 x, y, z; Dim3D(uint64 x, uint64 y, uint64 z) : x(x), y(y), z(z) {} }; // Thread dimensionality for use in a kernel launch. See file comment for // details. struct ThreadDim : public Dim3D { explicit ThreadDim(uint64 x = 1, uint64 y = 1, uint64 z = 1) : Dim3D(x, y, z) {} // Returns a string representation of the thread dimensionality. string ToString() const { return port::StrCat("ThreadDim{", x, ", ", y, ", ", z, "}"); } }; // Block dimensionality for use in a kernel launch. See file comment for // details. struct BlockDim : public Dim3D { explicit BlockDim(uint64 x = 1, uint64 y = 1, uint64 z = 1) : Dim3D(x, y, z) {} // Returns a string representation of the block dimensionality. string ToString() const { return port::StrCat("BlockDim{", x, ", ", y, ", ", z, "}"); } }; } // namespace gputools } // namespace perftools #endif // TENSORFLOW_STREAM_EXECUTOR_LAUNCH_DIM_H_