diff options
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc | 64 |
1 files changed, 22 insertions, 42 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc index 3c8018a030..79b3f1efec 100644 --- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc +++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc @@ -22,7 +22,7 @@ limitations under the License. #include "llvm/IR/DataLayout.h" #include "tensorflow/compiler/xla/literal.h" #include "tensorflow/compiler/xla/literal_util.h" -#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h" +#include "tensorflow/compiler/xla/service/gpu/nvptx_compiler.h" #include "tensorflow/compiler/xla/service/gpu/outfeed_manager.h" #include "tensorflow/compiler/xla/shape_util.h" #include "tensorflow/compiler/xla/status_macros.h" @@ -36,15 +36,14 @@ limitations under the License. #include "tensorflow/core/platform/stream_executor_no_cuda.h" namespace xla { +namespace gpu { // TODO(b/30467474) Once GPU infeed implementation settles, consider // folding back the cpu and gpu infeed implementations into a generic // one if possible. -GpuTransferManager::GpuTransferManager() - : GenericTransferManager( - se::cuda::kCudaPlatformId, - /*pointer_size=*/llvm::DataLayout(gpu::GpuCompiler::kDataLayout) - .getPointerSize(0 /* default address space */)) {} +GpuTransferManager::GpuTransferManager(se::Platform::Id id, + unsigned pointer_size) + : GenericTransferManager(id, pointer_size) {} Status GpuTransferManager::TransferLiteralToInfeed( se::StreamExecutor* executor, const LiteralSlice& literal) { @@ -52,48 +51,28 @@ Status GpuTransferManager::TransferLiteralToInfeed( VLOG(2) << "Transferring literal to infeed with shape: " << ShapeUtil::HumanString(shape); - if (!ShapeUtil::IsTuple(shape)) { - int64 size = GetByteSizeRequirement(shape); - return TransferBufferToInfeed(executor, size, literal.untyped_data()); - } - // For a tuple, we transfer each of its elements to the device and // enqueue the resulting destination device addresses with the // infeed manager. - std::vector<gpu::InfeedBuffer*> buffers; - auto cleanup = tensorflow::gtl::MakeCleanup([buffers]() { - for (gpu::InfeedBuffer* b : buffers) { - b->Done(); - } - }); + ShapeTree<InfeedBuffer> buffer_tree(shape); TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus( shape, [&](const Shape& literal_subshape, const ShapeIndex& index) { if (ShapeUtil::IsArray(literal_subshape)) { int64 tuple_element_size = GetByteSizeRequirement(literal_subshape); TF_ASSIGN_OR_RETURN( - gpu::InfeedBuffer * buffer, + *buffer_tree.mutable_element(index), TransferBufferToInfeedInternal(executor, tuple_element_size, literal.untyped_data(index))); - buffers.push_back(buffer); } return Status::OK(); })); - cleanup.release(); - return EnqueueBuffersToInfeed(executor, buffers); -} - -Status GpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor, - int64 size, - const void* source) { - TF_ASSIGN_OR_RETURN(gpu::InfeedBuffer * buffer, - TransferBufferToInfeedInternal(executor, size, source)); - return EnqueueBuffersToInfeed(executor, {buffer}); + return EnqueueBuffersToInfeed(executor, std::move(buffer_tree)); } Status GpuTransferManager::EnqueueBuffersToInfeed( - se::StreamExecutor* executor, std::vector<gpu::InfeedBuffer*> buffers) { + se::StreamExecutor* executor, ShapeTree<InfeedBuffer> buffers) { gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager(); se::Stream* stream = infeed_manager->GetStream(executor); @@ -103,21 +82,18 @@ Status GpuTransferManager::EnqueueBuffersToInfeed( // possible. Status block_status = stream->BlockHostUntilDone(); if (!block_status.ok()) { - for (gpu::InfeedBuffer* b : buffers) { - b->Done(); - } return InternalError("Failed to complete data transfer on stream %p: %s", stream, block_status.error_message().c_str()); } - infeed_manager->EnqueueBuffers(buffers); + infeed_manager->EnqueueDestination(std::move(buffers)); VLOG(2) << "Infeed data transferred"; return Status::OK(); } -StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal( +StatusOr<InfeedBuffer> GpuTransferManager::TransferBufferToInfeedInternal( se::StreamExecutor* executor, int64 size, const void* source) { if (size > std::numeric_limits<int32>::max()) { return InvalidArgument("Infeed shape is too large: needs %lld bytes", size); @@ -133,12 +109,12 @@ StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal( return InternalError("Failed to obtain a stream"); } - gpu::InfeedBuffer* buffer = new gpu::InfeedBuffer(executor, size); - stream->ThenMemcpy(buffer->device_memory(), source, size); + InfeedBuffer buffer(executor, size); + stream->ThenMemcpy(buffer.device_memory(), source, size); VLOG(2) << "Queued infeed data on stream " << stream; - return buffer; + return std::move(buffer); } static std::unique_ptr<Literal> ShapeTreeToLiteral( @@ -191,22 +167,26 @@ Status GpuTransferManager::TransferLiteralFromOutfeed( // Give the tree of buffers to the outfeed mananger. The device will fill it // while we're waiting for it below. gpu::OutfeedManager* outfeed_manager = gpu::GetOrCreateOutfeedManager(); - outfeed_manager->EnqueueOutfeedDestination(&outfeed_buffers); + outfeed_manager->EnqueueDestination(&outfeed_buffers); // Now turn the tree of buffers back into a literal. *literal = std::move(*ShapeTreeToLiteral(&outfeed_buffers)); return Status::OK(); } +} // namespace gpu } // namespace xla -static std::unique_ptr<xla::TransferManager> CreateGpuTransferManager() { - return xla::MakeUnique<xla::GpuTransferManager>(); +static std::unique_ptr<xla::TransferManager> CreateNVPTXTransferManager() { + return xla::MakeUnique<xla::gpu::GpuTransferManager>( + /*id=*/stream_executor::cuda::kCudaPlatformId, + /*pointer_size=*/llvm::DataLayout(xla::gpu::NVPTXCompiler::kDataLayout) + .getPointerSize(0 /* default address space */)); } static bool InitModule() { xla::TransferManager::RegisterTransferManager( - stream_executor::cuda::kCudaPlatformId, &CreateGpuTransferManager); + stream_executor::cuda::kCudaPlatformId, &CreateNVPTXTransferManager); return true; } static bool module_initialized = InitModule(); |