[XLA] Add transfer buffer to infeed.

Mirroring the transfer buffer to device interface, add a transfer buffer to infeed interface. PiperOrigin-RevId: 159152897
author: Jacques Pienaar <jpienaar@google.com> 2017-06-15 14:16:59 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-06-15 14:20:48 -0700
commit: b52debb4e63cce1e0733d6d34975d4efb9934680 (patch)
tree: 6cfc65a41e8c98b938272f2df6436e1b9a48f8dd /tensorflow
parent: a203bbb98c7c72d7a68064098ec051bba20219cd (diff)
7 files changed, 55 insertions, 9 deletions
diff --git a/tensorflow/compiler/xla/service/cpu_transfer_manager.cc b/tensorflow/compiler/xla/service/cpu_transfer_manager.cc
index 2d9d9c7de6..5e431687c4 100644
--- a/tensorflow/compiler/xla/service/cpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/cpu_transfer_manager.cc
@@ -75,20 +75,27 @@ Status CpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
                          ShapeUtil::HumanString(literal.shape()).c_str());
   }
 
-  cpu::runtime::InfeedManager* infeed_manager =
-      cpu::runtime::GetInfeedManager();
-
   int64 size = GetByteSizeRequirement(shape);
   if (size > std::numeric_limits<int32>::max()) {
     return Unimplemented("Infeed shape is too large: %s needs %lld bytes",
                          ShapeUtil::HumanString(literal.shape()).c_str(), size);
   }
+
+  return TransferBufferToInfeed(executor, size,
+                                LiteralUtil::InternalData(literal));
+}
+
+Status CpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor,
+                                                  int64 size,
+                                                  const void* source) {
   int32 size_32 = static_cast<int32>(size);
   CpuInfeedBuffer* queued_buffer = new CpuInfeedBuffer(size_32);
-  TF_RETURN_IF_ERROR(TransferBufferToDevice(
-      executor, /*size=*/size, /*source=*/LiteralUtil::InternalData(literal),
-      queued_buffer->device_memory()));
+  TF_RETURN_IF_ERROR(TransferBufferToDevice(executor, /*size=*/size,
+                                            /*source=*/source,
+                                            queued_buffer->device_memory()));
 
+  cpu::runtime::InfeedManager* infeed_manager =
+      cpu::runtime::GetInfeedManager();
   infeed_manager->EnqueueBuffer(queued_buffer);
 
   return Status::OK();
diff --git a/tensorflow/compiler/xla/service/cpu_transfer_manager.h b/tensorflow/compiler/xla/service/cpu_transfer_manager.h
index 727462252d..5d10b62a17 100644
--- a/tensorflow/compiler/xla/service/cpu_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/cpu_transfer_manager.h
@@ -37,6 +37,8 @@ class CpuTransferManager : public GenericTransferManager {
 
   Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor,
                                  const Literal& literal) override;
+  Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor,
+                                int64 size, const void* source) override;
 
  private:
   TF_DISALLOW_COPY_AND_ASSIGN(CpuTransferManager);
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.cc b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
index eb8b93330f..b1c7eadf6a 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.cc
@@ -159,7 +159,13 @@ Status GenericTransferManager::TransferLiteralToDevice(
 
 Status GenericTransferManager::TransferLiteralToInfeed(
     se::StreamExecutor* executor, const Literal& literal) {
-  return Unimplemented("Infeed is not supported on GPU (b/30467474)");
+  return Unimplemented("Generic transfer to Infeed");
+}
+
+Status GenericTransferManager::TransferBufferToInfeed(
+    perftools::gputools::StreamExecutor* executor, int64 size,
+    const void* source) {
+  return Unimplemented("Generic transfer to Infeed");
 }
 
 Status GenericTransferManager::TransferLiteralFromOutfeed(
diff --git a/tensorflow/compiler/xla/service/generic_transfer_manager.h b/tensorflow/compiler/xla/service/generic_transfer_manager.h
index 2fbdb94f06..48c061d28e 100644
--- a/tensorflow/compiler/xla/service/generic_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/generic_transfer_manager.h
@@ -54,6 +54,8 @@ class GenericTransferManager : public TransferManager {
 
   Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor,
                                  const Literal& literal) override;
+  Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor,
+                                int64 size, const void* source) override;
 
   Status TransferLiteralFromOutfeed(
       perftools::gputools::StreamExecutor* executor, const Shape& literal_shape,
diff --git a/tensorflow/compiler/xla/service/gpu_transfer_manager.cc b/tensorflow/compiler/xla/service/gpu_transfer_manager.cc
index 4971de74ae..3e4e590d3b 100644
--- a/tensorflow/compiler/xla/service/gpu_transfer_manager.cc
+++ b/tensorflow/compiler/xla/service/gpu_transfer_manager.cc
@@ -89,6 +89,12 @@ Status GpuTransferManager::TransferLiteralToInfeed(se::StreamExecutor* executor,
   return Status::OK();
 }
 
+Status GpuTransferManager::TransferBufferToInfeed(se::StreamExecutor* executor,
+                                                  int64 size,
+                                                  const void* source) {
+  return TransferBufferToInfeedInternal(executor, size, source).status();
+}
+
 StatusOr<gpu::InfeedBuffer*>
 GpuTransferManager::TransferLiteralToInfeedInternal(
     se::StreamExecutor* executor, const Literal& literal) {
@@ -107,6 +113,12 @@ GpuTransferManager::TransferLiteralToInfeedInternal(
                          ShapeUtil::HumanString(literal.shape()).c_str());
   }
 
+  return TransferBufferToInfeedInternal(executor, size,
+                                        LiteralUtil::InternalData(literal));
+}
+
+StatusOr<gpu::InfeedBuffer*> GpuTransferManager::TransferBufferToInfeedInternal(
+    se::StreamExecutor* executor, int64 size, const void* source) {
   gpu::InfeedManager* infeed_manager = gpu::GetOrCreateInfeedManager();
   se::Stream* stream = infeed_manager->GetStream(executor);
   if (stream == nullptr) {
@@ -114,8 +126,7 @@ GpuTransferManager::TransferLiteralToInfeedInternal(
   }
 
   gpu::InfeedBuffer* buffer = new gpu::InfeedBuffer(executor, size);
-  stream->ThenMemcpy(buffer->device_memory(),
-                     LiteralUtil::InternalData(literal), size);
+  stream->ThenMemcpy(buffer->device_memory(), source, size);
 
   VLOG(2) << "Queued infeed data on stream " << stream;
 
diff --git a/tensorflow/compiler/xla/service/gpu_transfer_manager.h b/tensorflow/compiler/xla/service/gpu_transfer_manager.h
index 6b736b9b9a..4fc6c911a4 100644
--- a/tensorflow/compiler/xla/service/gpu_transfer_manager.h
+++ b/tensorflow/compiler/xla/service/gpu_transfer_manager.h
@@ -38,6 +38,8 @@ class GpuTransferManager : public GenericTransferManager {
 
   Status TransferLiteralToInfeed(perftools::gputools::StreamExecutor* executor,
                                  const Literal& literal) override;
+  Status TransferBufferToInfeed(perftools::gputools::StreamExecutor* executor,
+                                int64 size, const void* source) override;
 
  private:
   // Internal helper function for TransferLiteralToInfeed(). Input
@@ -45,6 +47,11 @@ class GpuTransferManager : public GenericTransferManager {
   StatusOr<gpu::InfeedBuffer*> TransferLiteralToInfeedInternal(
       perftools::gputools::StreamExecutor* executor, const Literal& literal);
 
+  // Internal helper function for TransferLiteralToInfeed().
+  StatusOr<gpu::InfeedBuffer*> TransferBufferToInfeedInternal(
+      perftools::gputools::StreamExecutor* executor, int64 size,
+      const void* source);
+
   TF_DISALLOW_COPY_AND_ASSIGN(GpuTransferManager);
 };
 
diff --git a/tensorflow/compiler/xla/service/transfer_manager.h b/tensorflow/compiler/xla/service/transfer_manager.h
index 15f6b7bfb4..c79ffa9cd7 100644
--- a/tensorflow/compiler/xla/service/transfer_manager.h
+++ b/tensorflow/compiler/xla/service/transfer_manager.h
@@ -65,6 +65,17 @@ class TransferManager {
       perftools::gputools::StreamExecutor* executor,
       const Literal& literal) = 0;
 
+  // Transfer a memory block of the given size from 'source' buffer to the
+  // Infeed interface of the device using the given executor.
+  //
+  // size is the size to transfer from source in bytes.
+  //
+  // source is the source data that must be in the target-dependent layout that
+  // the Infeed HLO used in the computation expects.
+  virtual Status TransferBufferToInfeed(
+      perftools::gputools::StreamExecutor* executor, int64 size,
+      const void* source) = 0;
+
   // Transfers the given literal from the Outfeed interface of the device,
   // using the given executor.
   virtual Status TransferLiteralFromOutfeed(
author	Jacques Pienaar <jpienaar@google.com>	2017-06-15 14:16:59 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-06-15 14:20:48 -0700
commit	b52debb4e63cce1e0733d6d34975d4efb9934680 (patch)
tree	6cfc65a41e8c98b938272f2df6436e1b9a48f8dd /tensorflow
parent	a203bbb98c7c72d7a68064098ec051bba20219cd (diff)