Add methods on TransferManager which transfer to/from device memory specified by ShapedBuffer rather than DeviceMemoryBase. This is part of a broader replacement of DeviceMemoryBase->ShapedBuffer in several XLA interfaces. With this change TransferManager no longer has to allocate memory to transfer tuples to the device. The existing methods using DeviceMemoryBase will be removed in a followup cl.

Various related changes: * Make the transfer_manager_test an xla_test so that it runs on all the platforms. * Make several of the TransferManager methods protected. * Change ScopedShapedBuffer::Allocate to only allocate device memory buffers, and not fill in the tuple index table. The index table is filled in by the transfer manager. This is a cleaner separation of concerns. PiperOrigin-RevId: 176015628
author: Mark Heffernan <meheff@google.com> 2017-11-16 14:03:51 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-11-16 14:09:31 -0800
commit: 22d948d2739ecaadfb4091302f2050ba9cf0d0c1 (patch)
tree: e1d1568d8456b39c944e10dfd6d6c7da056c6821 /tensorflow/compiler/xla/service/shaped_buffer.cc
parent: e2a60582bf28fa29c871736d10edad06e660776d (diff)
1 files changed, 35 insertions, 44 deletions
diff --git a/tensorflow/compiler/xla/service/shaped_buffer.cc b/tensorflow/compiler/xla/service/shaped_buffer.cc
index a57ebf59e7..a7539a1a11 100644
--- a/tensorflow/compiler/xla/service/shaped_buffer.cc
+++ b/tensorflow/compiler/xla/service/shaped_buffer.cc
@@ -21,17 +21,19 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/layout_util.h"
 #include "tensorflow/compiler/xla/ptr_util.h"
-#include "tensorflow/compiler/xla/service/transfer_manager.h"
 #include "tensorflow/compiler/xla/shape_util.h"
 #include "tensorflow/compiler/xla/status_macros.h"
 #include "tensorflow/compiler/xla/types.h"
 #include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
 #include "tensorflow/core/platform/logging.h"
 
 namespace se = ::perftools::gputools;
 
 namespace xla {
 
+using ::tensorflow::strings::Appendf;
+
 /* static */ StatusOr<std::unique_ptr<ShapedBuffer>>
 ShapedBuffer::MakeArrayShapedBuffer(const Shape& shape,
                                     const se::Platform* platform,
@@ -80,10 +82,33 @@ se::DeviceMemoryBase* ShapedBuffer::mutable_buffer(const ShapeIndex& index) {
   return &buffers_[shape_index_to_buffer_entry_.element(index)];
 }
 
+string ShapedBuffer::ToString() const {
+  string s = "ShapedBuffer(" + platform_->Name() + "):\n";
+  ShapeUtil::ForEachSubshape(
+      shape(), [this, &s](const Shape& subshape, const ShapeIndex& index) {
+        string shape_str;
+        if (ShapeUtil::IsTuple(subshape)) {
+          shape_str = "tuple";
+        } else {
+          shape_str = ShapeUtil::HumanStringWithLayout(subshape);
+        }
+        const se::DeviceMemoryBase& memory = buffer(index);
+        Appendf(&s, "  %s%p (%lld bytes) : %s\n",
+                string(index.size() * 2, ' ').c_str(), memory.opaque(),
+                memory.size(), shape_str.c_str());
+      });
+  return s;
+}
+
+std::ostream& operator<<(std::ostream& out, const ShapedBuffer& buffer) {
+  out << buffer.ToString();
+  return out;
+}
+
 /* static */ StatusOr<std::unique_ptr<ScopedShapedBuffer>>
-ScopedShapedBuffer::Allocate(const Shape& shape,
-                             DeviceMemoryAllocator* allocator,
-                             int device_ordinal) {
+ScopedShapedBuffer::Allocate(
+    const Shape& shape, DeviceMemoryAllocator* allocator, int device_ordinal,
+    const std::function<int64(const Shape&)>& shape_size_fn) {
   if (!LayoutUtil::HasLayout(shape)) {
     return InvalidArgument("Shape must have a layout: %s",
                            ShapeUtil::HumanStringWithLayout(shape).c_str());
@@ -93,51 +118,17 @@ ScopedShapedBuffer::Allocate(const Shape& shape,
       WrapUnique(new ScopedShapedBuffer(shape, allocator, device_ordinal));
 
   // Allocate an appropriate sized buffer for each element in the shape
-  // including the tuple pointer arrays. Gather tuple element addresses in
-  // 'element_addresses'. These will be written in the respective tuple's array
-  // of pointers on the device.
-  TF_ASSIGN_OR_RETURN(TransferManager * transfer_manager,
-                      TransferManager::GetForPlatform(allocator->platform()));
-  ShapeTree<std::vector<se::DeviceMemoryBase>> element_addresses(shape);
+  // including the tuple pointer arrays.
   for (auto& pair : shaped_buffer->shape_index_to_buffer_entry_) {
     const ShapeIndex& index = pair.first;
     size_t& buffer_entry = pair.second;
-    TF_ASSIGN_OR_RETURN(
-        se::DeviceMemoryBase memory_base,
-        shaped_buffer->allocator_->Allocate(
-            shaped_buffer->device_ordinal(),
-            transfer_manager->GetByteSizeRequirement(
-                ShapeUtil::GetSubshape(shaped_buffer->shape(), index))));
+    TF_ASSIGN_OR_RETURN(se::DeviceMemoryBase memory_base,
+                        shaped_buffer->allocator_->Allocate(
+                            shaped_buffer->device_ordinal(),
+                            shape_size_fn(ShapeUtil::GetSubshape(
+                                shaped_buffer->shape(), index))));
     shaped_buffer->buffers_.push_back(memory_base);
     buffer_entry = shaped_buffer->buffers_.size() - 1;
-
-    // If this is a tuple element, then push the address on to the
-    // vector of tuple element addresses.
-    if (!index.empty()) {
-      ShapeIndex parent_index = index;
-      parent_index.pop_back();
-      element_addresses.mutable_element(parent_index)->push_back(memory_base);
-    }
-  }
-
-  // Fill in the tuple pointer arrays with the addresses of their respective
-  // elements.
-  TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
-                      allocator->platform()->ExecutorForDevice(
-                          shaped_buffer->device_ordinal()));
-  for (const auto& pair : element_addresses) {
-    const ShapeIndex& index = pair.first;
-    const std::vector<se::DeviceMemoryBase>& addresses = pair.second;
-    const Shape& subshape = ShapeUtil::GetSubshape(shape, index);
-
-    if (addresses.empty()) {
-      TF_RET_CHECK(!ShapeUtil::IsTuple(subshape) ||
-                   ShapeUtil::TupleElementCount(subshape) == 0);
-      continue;
-    }
-    TF_RET_CHECK(ShapeUtil::IsTuple(subshape));
-    TF_RETURN_IF_ERROR(transfer_manager->WriteTuplePointersToDevice(
-        executor, addresses, subshape, shaped_buffer->mutable_buffer(index)));
   }
 
   return std::move(shaped_buffer);
author	Mark Heffernan <meheff@google.com>	2017-11-16 14:03:51 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-11-16 14:09:31 -0800
commit	22d948d2739ecaadfb4091302f2050ba9cf0d0c1 (patch)
tree	e1d1568d8456b39c944e10dfd6d6c7da056c6821 /tensorflow/compiler/xla/service/shaped_buffer.cc
parent	e2a60582bf28fa29c871736d10edad06e660776d (diff)