[XLA:HLO] Run HeapSimulator on whole-module if all computations are sequential.

Previously the HeapSimulator was only run on a per-computation basis. This meant that if you had many sub-computations in your module (e.g. many While loops), the space for all of the temporary buffers inside the conditions and bodies of the loops were in distinct memory ranges. This is overly pessimistic if all computations in the module are sequential. This CL changes the HeapSimulator to also run whole-module simulation, calling Alloc and Free on sub-computation buffers at the appropriate nested spot, right next to the calling instruction. The BufferAssigner is updated to take advantage of this when possible, as is MinimumMemoryForSequence. Change: 154908856
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-05-02 17:21:15 -0800
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-05-02 18:30:05 -0700
commit: 5ad12420e78d0aa756fd2a41945468e826e267c2 (patch)
tree: 400a2866984e554e7ba1ac0de02682822ef0d6dc /tensorflow/compiler/xla/service/heap_simulator.h
parent: 58196d4bf923d6fa2500e84d9d22ed8227ba305c (diff)
1 files changed, 27 insertions, 5 deletions
diff --git a/tensorflow/compiler/xla/service/heap_simulator.h b/tensorflow/compiler/xla/service/heap_simulator.h
index 0ce2906767..3d98046261 100644
--- a/tensorflow/compiler/xla/service/heap_simulator.h
+++ b/tensorflow/compiler/xla/service/heap_simulator.h
@@ -23,6 +23,7 @@ limitations under the License.
 
 #include "tensorflow/compiler/xla/service/hlo_computation.h"
 #include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/service/hlo_ordering.h"
 #include "tensorflow/compiler/xla/service/logical_buffer.h"
 #include "tensorflow/compiler/xla/service/tuple_points_to_analysis.h"
 #include "tensorflow/compiler/xla/statusor.h"
@@ -63,17 +64,32 @@ class HeapSimulator {
   };
 
   // Run the heap simulation with the given algorithm, assuming the given
-  // sequential ordering of instructions.  The 'instruction_sequence' must
-  // contain a topologically-consistent total ordering of all instructions in
-  // the computation.  The result is invalid if instructions are not run in
-  // exactly this sequence.
+  // module_sequence, which must contain a topologically-consistent total
+  // ordering of all instructions within each computation. The result is invalid
+  // if instructions are not run in exactly this sequence.
+  //
+  // Running heap simulation on the whole module tends to save memory, compared
+  // to running on a per-computation basis, since we can re-use buffer space for
+  // called sub-computations.
   //
   // If 'buffers_to_assign' is provided, only those buffers are assigned
   // offsets, otherwise all buffers defined by the instructions are assigned.
   static StatusOr<Result> Run(
+      std::unique_ptr<HeapAlgorithm> algorithm, const HloModule& module,
+      const SequentialHloOrdering::HloModuleSequence& module_sequence,
+      const TuplePointsToAnalysis& points_to_analysis,
+      const LogicalBuffer::SizeFunction& size_fn,
+      const tensorflow::gtl::FlatSet<const LogicalBuffer*>* buffers_to_assign =
+          nullptr);
+
+  // Same as above, but runs on a single computation. The 'instruction_sequence'
+  // must contain a topologically-consistent total ordering of all instructions
+  // in the computation. The result is invalid if instructions are not run in
+  // exactly this sequence.
+  static StatusOr<Result> Run(
       std::unique_ptr<HeapAlgorithm> algorithm,
-      const std::vector<const HloInstruction*>& instruction_sequence,
       const HloComputation& computation,
+      const std::vector<const HloInstruction*>& instruction_sequence,
       const TuplePointsToAnalysis& points_to_analysis,
       const LogicalBuffer::SizeFunction& size_fn,
       const tensorflow::gtl::FlatSet<const LogicalBuffer*>* buffers_to_assign =
@@ -86,6 +102,12 @@ class HeapSimulator {
       const tensorflow::gtl::FlatSet<const LogicalBuffer*>* buffers_to_assign);
   ~HeapSimulator();
 
+  Status RunComputation(
+      const HloComputation& computation,
+      const std::vector<const HloInstruction*>& instruction_sequence,
+      const TuplePointsToAnalysis& points_to_analysis,
+      const SequentialHloOrdering::HloModuleSequence* module_sequence);
+
   bool IgnoreBuffer(const LogicalBuffer* buffer) const;
   void Alloc(const LogicalBuffer* buffer);
   void Free(const LogicalBuffer* buffer);
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-05-02 17:21:15 -0800
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-05-02 18:30:05 -0700
commit	5ad12420e78d0aa756fd2a41945468e826e267c2 (patch)
tree	400a2866984e554e7ba1ac0de02682822ef0d6dc /tensorflow/compiler/xla/service/heap_simulator.h
parent	58196d4bf923d6fa2500e84d9d22ed8227ba305c (diff)