aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
diff options
context:
space:
mode:
Diffstat (limited to 'tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc')
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc64
1 files changed, 22 insertions, 42 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
index 3c8018a030..79b3f1efec 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_transfer_manager.cc
@@ -22,7 +22,7 @@ limitations under the License.
#include "llvm/IR/DataLayout.h"
#include "tensorflow/compiler/xla/literal.h"
#include "tensorflow/compiler/xla/literal_util.h"
-#include "tensorflow/compiler/xla/service/gpu/gpu_compiler.h"
+#include "tensorflow/compiler/xla/service/gpu/nvptx_compiler.h"
#include "tensorflow/compiler/xla/service/gpu/outfeed_manager.h"
#include "tensorflow/compiler/xla/shape_util.h"
#include "tensorflow/compiler/xla/status_macros.h"
@@ -36,15 +36,14 @@ limitations under the License.
#include "tensorflow/core/platform/stream_executor_no_cuda.h"
namespace xla {
+namespace gpu {
// TODO(b/30467474) Once GPU infeed implementation settles, consider
// folding back the cpu and gpu infeed implementations into a generic
// one if possible.
-GpuTransferManager::GpuTransferManager()
- : GenericTransferManager(
- se::cuda::kCudaPlatformId,
- /*pointer_size=*/llvm::DataLayout(gpu::GpuCompiler::kDataLayout)
- .getPointerSize(0 /* default address space */)) {}
+GpuTransferManager::GpuTransferManager(se::Platform::Id id,
+ unsigned pointer_size)
+ : GenericTransferManager(id, pointer_size) {}
Status GpuTransferManager::TransferLiteralToInfeed(
se::StreamExecutor* executor, const LiteralSlice& literal) {
@@ -52,48 +51,28 @@ Status GpuTransferManager::TransferLiteralToInfeed(
VLOG(2) << "Transferring literal to infeed with shape: "
<< ShapeUtil::HumanString(shape);
- if (!ShapeUtil::IsTuple(shape)) {
- int64 size = GetByteSizeRequirement(shape);
- return TransferBufferToInfeed(executor, size, literal.untyped_data());
- }
-
// For a tuple, we transfer each of its elements to the device and
// enqueue the resulting destination device addresses with the
// infeed manager.
- std::vector<gpu::InfeedBuffer*> buffers;
- auto cleanup = tensorflow::gtl::MakeCleanup([buffers]() {
- for (gpu::InfeedBuffer* b : buffers) {
- b->Done();
- }
- });
+ ShapeTree<InfeedBuffer> buffer_tree(shape);
TF_RETURN_IF_ERROR(ShapeUtil::ForEachSubshapeWithStatus(
shape, [&](const Shape& literal_subshape, const ShapeIndex& index) {
if (ShapeUtil::IsArray(literal_subshape)) {
int64 tuple_element_size = GetByteSizeRequirement(literal_subshape);
TF_ASSIGN_OR_RETURN(
- gpu::InfeedBuffer * buffer,
+ *buffer_tree.mutable_element(index),
TransferBufferToInfeedInternal(executor, tuple_element_size,
literal.untyped_data(index)));
- buffers.push_back(buffer);
}
return Status::OK();
}));
- cleanup.release();
- return EnqueueBuffersToInfeed(executor, buffers);
-}
-
-Status GpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor,
- int64 size,
- const void* source) {
- TF_ASSIGN_OR_RETURN(gpu::InfeedBuffer * buffer,
- TransferBufferToInfeedInternal(executor, size, source));
- return EnqueueBuffersToInfeed(executor, {buffer});
+ return EnqueueBuffersToInfeed(executor, std::move(buffer_tree));
}
Status GpuTransferManager::EnqueueBuffersToInfeed(
- se::StreamExecutor* executor, std::vector<gpu::InfeedBuffer*> buffers) {
+ se::StreamExecutor* executor, ShapeTree<InfeedBuffer> buffers) {
gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager();
se::Stream* stream = infeed_manager->GetStream(executor);
@@ -103,21 +82,18 @@ Status GpuTransferManager::EnqueueBuffersToInfeed(
// possible.
Status block_status = stream->BlockHostUntilDone();
if (!block_status.ok()) {
- for (gpu::InfeedBuffer* b : buffers) {
- b->Done();
- }
return InternalError("Failed to complete data transfer on stream %p: %s",
stream, block_status.error_message().c_str());
}
- infeed_manager->EnqueueBuffers(buffers);
+ infeed_manager->EnqueueDestination(std::move(buffers));
VLOG(2) << "Infeed data transferred";
return Status::OK();
}
-StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal(
+StatusOr<InfeedBuffer> GpuTransferManager::TransferBufferToInfeedInternal(
se::StreamExecutor* executor, int64 size, const void* source) {
if (size > std::numeric_limits<int32>::max()) {
return InvalidArgument("Infeed shape is too large: needs %lld bytes", size);
@@ -133,12 +109,12 @@ StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal(
return InternalError("Failed to obtain a stream");
}
- gpu::InfeedBuffer* buffer = new gpu::InfeedBuffer(executor, size);
- stream->ThenMemcpy(buffer->device_memory(), source, size);
+ InfeedBuffer buffer(executor, size);
+ stream->ThenMemcpy(buffer.device_memory(), source, size);
VLOG(2) << "Queued infeed data on stream " << stream;
- return buffer;
+ return std::move(buffer);
}
static std::unique_ptr<Literal> ShapeTreeToLiteral(
@@ -191,22 +167,26 @@ Status GpuTransferManager::TransferLiteralFromOutfeed(
// Give the tree of buffers to the outfeed mananger. The device will fill it
// while we're waiting for it below.
gpu::OutfeedManager* outfeed_manager = gpu::GetOrCreateOutfeedManager();
- outfeed_manager->EnqueueOutfeedDestination(&outfeed_buffers);
+ outfeed_manager->EnqueueDestination(&outfeed_buffers);
// Now turn the tree of buffers back into a literal.
*literal = std::move(*ShapeTreeToLiteral(&outfeed_buffers));
return Status::OK();
}
+} // namespace gpu
} // namespace xla
-static std::unique_ptr<xla::TransferManager> CreateGpuTransferManager() {
- return xla::MakeUnique<xla::GpuTransferManager>();
+static std::unique_ptr<xla::TransferManager> CreateNVPTXTransferManager() {
+ return xla::MakeUnique<xla::gpu::GpuTransferManager>(
+ /*id=*/stream_executor::cuda::kCudaPlatformId,
+ /*pointer_size=*/llvm::DataLayout(xla::gpu::NVPTXCompiler::kDataLayout)
+ .getPointerSize(0 /* default address space */));
}
static bool InitModule() {
xla::TransferManager::RegisterTransferManager(
- stream_executor::cuda::kCudaPlatformId, &CreateGpuTransferManager);
+ stream_executor::cuda::kCudaPlatformId, &CreateNVPTXTransferManager);
return true;
}
static bool module_initialized = InitModule();