diff options
Diffstat (limited to 'tensorflow/stream_executor/kernel_spec.cc')
-rw-r--r-- | tensorflow/stream_executor/kernel_spec.cc | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/kernel_spec.cc b/tensorflow/stream_executor/kernel_spec.cc new file mode 100644 index 0000000000..e3b4b0d951 --- /dev/null +++ b/tensorflow/stream_executor/kernel_spec.cc @@ -0,0 +1,236 @@ +#include "tensorflow/stream_executor/kernel_spec.h" + + +namespace perftools { +namespace gputools { + +KernelLoaderSpec::KernelLoaderSpec(port::StringPiece kernelname) + : kernelname_(kernelname.ToString()) {} + +OnDiskKernelLoaderSpec::OnDiskKernelLoaderSpec(port::StringPiece filename, + port::StringPiece kernelname) + : KernelLoaderSpec(kernelname), filename_(filename.ToString()) {} + +CudaPtxOnDisk::CudaPtxOnDisk(port::StringPiece filename, + port::StringPiece kernelname) + : OnDiskKernelLoaderSpec(filename, kernelname) {} + +CudaCubinOnDisk::CudaCubinOnDisk(port::StringPiece filename, + port::StringPiece kernelname) + : OnDiskKernelLoaderSpec(filename, kernelname) {} + +CudaCubinInMemory::CudaCubinInMemory(const char *bytes, + port::StringPiece kernelname) + : KernelLoaderSpec(kernelname), bytes_(bytes) {} + +bool CompareComputeCapability(const std::tuple<int, int> &lhs, + const std::tuple<int, int> &rhs) { + return std::get<0>(lhs) < std::get<0>(rhs) || + (std::get<0>(lhs) == std::get<0>(rhs) && + std::get<1>(lhs) < std::get<1>(rhs)); +} + +const std::tuple<int, int> CudaPtxInMemory::kMinimumCapability{1, 0}; + +CudaPtxInMemory::CudaPtxInMemory(port::StringPiece ptx, + port::StringPiece kernel_name, + bool ptx_compressed) + : KernelLoaderSpec(kernel_name), + ptx_by_compute_capability_(CompareComputeCapability) { + if (ptx_compressed) { + // Lazy decompression. Put an empty string in decompressed_ptx_ showing that + // the original ptx is compressed. + decompressed_ptx_[ptx.data()] = ""; + } + ptx_by_compute_capability_[kMinimumCapability] = ptx.data(); +} + +CudaPtxInMemory::CudaPtxInMemory( + const std::initializer_list<CudaPtxInMemory::PtxSpec> &spec_list, + port::StringPiece kernel_name, bool ptx_compressed) + : KernelLoaderSpec(kernel_name), + ptx_by_compute_capability_(CompareComputeCapability) { + for (const auto &spec : spec_list) { + int major, minor; + port::StringPiece ptx; + std::tie(major, minor, ptx) = spec; + if (ptx_compressed) { + // Lazy decompression. Put an empty string in decompressed_ptx_ showing + // that the original ptx is compressed. + decompressed_ptx_[ptx.data()] = ""; + } + ptx_by_compute_capability_[std::tuple<int, int>{major, minor}] = ptx.data(); + } +} + +string CudaPtxInMemory::DecompressPtx(const char *ptx) { + // Get the length of the PTX string from the beginning of the buffer. + uint64 ptx_length = *reinterpret_cast<const uint64 *>(ptx); + // Get the PTX string from the buffer with offset and length. + string compressed_ptx(ptx + sizeof(uint64), + ptx + sizeof(uint64) + ptx_length); + string decompressed_ptx; + // Decompress the PTX string with bzip2. + LOG(FATAL) << "bzip2 decompression is not supported yet."; + return decompressed_ptx; +} + +const char *CudaPtxInMemory::default_text() const { + if (ptx_by_compute_capability_.empty()) { + return nullptr; + } + + mutex_lock lock{mu_}; + + auto ptx = ptx_by_compute_capability_.begin()->second; + // Check if there is an entry in decompressed ptx table. + auto decompressed_ptx_iter = decompressed_ptx_.find(ptx); + if (decompressed_ptx_iter != decompressed_ptx_.end()) { + // If the decompressed string is empty, which means the ptx hasn't been + // decompressed, decompress it here. + if (decompressed_ptx_iter->second.size() == 0) { + decompressed_ptx_iter->second = DecompressPtx(ptx); + } + return decompressed_ptx_iter->second.c_str(); + } + return ptx; +} + +const char *CudaPtxInMemory::original_default_text() const { + if (ptx_by_compute_capability_.empty()) { + return nullptr; + } + + return ptx_by_compute_capability_.begin()->second; +} + +const char *CudaPtxInMemory::text(int compute_capability_major, + int compute_capability_minor) const { + std::tuple<int, int> capability{compute_capability_major, + compute_capability_minor}; + + auto ptx_iter = ptx_by_compute_capability_.find(capability); + if (ptx_iter == ptx_by_compute_capability_.end()) { + return nullptr; + } + + mutex_lock lock{mu_}; + + // Check if there is an entry in decompressed ptx table. + auto decompressed_ptx_iter = decompressed_ptx_.find(ptx_iter->second); + if (decompressed_ptx_iter != decompressed_ptx_.end()) { + // If the decompressed string is empty, which means the ptx hasn't been + // decompressed, decompress it here. + if (decompressed_ptx_iter->second.size() == 0) { + decompressed_ptx_iter->second = DecompressPtx(ptx_iter->second); + } + return decompressed_ptx_iter->second.c_str(); + } + return ptx_iter->second; +} + +const char *CudaPtxInMemory::original_text(int compute_capability_major, + int compute_capability_minor) const { + std::tuple<int, int> capability{compute_capability_major, + compute_capability_minor}; + + auto ptx_iter = ptx_by_compute_capability_.find(capability); + if (ptx_iter == ptx_by_compute_capability_.end()) { + return nullptr; + } + + return ptx_iter->second; +} + +OpenCLTextOnDisk::OpenCLTextOnDisk(port::StringPiece filename, + port::StringPiece kernelname) + : OnDiskKernelLoaderSpec(filename, kernelname) {} + +OpenCLTextInMemory::OpenCLTextInMemory(port::StringPiece text, + port::StringPiece kernelname) + : KernelLoaderSpec(kernelname), text_(text.ToString()) {} + +OpenCLBinaryOnDisk::OpenCLBinaryOnDisk(port::StringPiece filename, + port::StringPiece kernelname) + : OnDiskKernelLoaderSpec(filename, kernelname) {} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextOnDisk( + port::StringPiece filename, port::StringPiece kernelname) { + CHECK(ocl_text_on_disk_ == nullptr); + ocl_text_on_disk_.reset(new OpenCLTextOnDisk{filename, kernelname}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLBinaryOnDisk( + port::StringPiece filename, port::StringPiece kernelname) { + CHECK(ocl_binary_on_disk_ == nullptr); + ocl_binary_on_disk_.reset(new OpenCLBinaryOnDisk{filename, kernelname}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddOpenCLTextInMemory( + port::StringPiece filename, port::StringPiece kernelname) { + CHECK(ocl_text_in_memory_ == nullptr); + ocl_text_in_memory_.reset(new OpenCLTextInMemory{filename, kernelname}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxOnDisk( + port::StringPiece filename, port::StringPiece kernelname) { + CHECK(cuda_ptx_on_disk_ == nullptr); + cuda_ptx_on_disk_.reset(new CudaPtxOnDisk{filename, kernelname}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinInMemory( + const char *bytes, port::StringPiece kernelname) { + CHECK(cuda_cubin_in_memory_ == nullptr); + cuda_cubin_in_memory_.reset(new CudaCubinInMemory{bytes, kernelname}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCubinOnDisk( + port::StringPiece filename, port::StringPiece kernelname) { + CHECK(cuda_cubin_on_disk_ == nullptr); + cuda_cubin_on_disk_.reset(new CudaCubinOnDisk{filename, kernelname}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory( + port::StringPiece ptx, port::StringPiece kernelname) { + CHECK(cuda_ptx_in_memory_ == nullptr); + cuda_ptx_in_memory_.reset( + new CudaPtxInMemory{ptx, kernelname, false /* ptx_compressed */}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory( + port::StringPiece ptx, port::StringPiece kernelname) { + CHECK(cuda_ptx_in_memory_ == nullptr); + cuda_ptx_in_memory_.reset( + new CudaPtxInMemory{ptx, kernelname, true /* ptx_compressed */}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaPtxInMemory( + std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list, + port::StringPiece kernelname) { + CHECK(cuda_ptx_in_memory_ == nullptr); + cuda_ptx_in_memory_.reset( + new CudaPtxInMemory{spec_list, kernelname, false /* ptx_compressed */}); + return this; +} + +MultiKernelLoaderSpec *MultiKernelLoaderSpec::AddCudaCompressedPtxInMemory( + std::initializer_list<CudaPtxInMemory::PtxSpec> spec_list, + port::StringPiece kernelname) { + CHECK(cuda_ptx_in_memory_ == nullptr); + cuda_ptx_in_memory_.reset( + new CudaPtxInMemory{spec_list, kernelname, true /* ptx_compressed */}); + return this; +} + +MultiKernelLoaderSpec::MultiKernelLoaderSpec(size_t arity) : arity_(arity) {} + +} // namespace gputools +} // namespace perftools |