Allow cost estimates to differ per backend and include the estimates into the HLO profile. Add a summary table for what categories have the most opportunity for optimization left in them.

PiperOrigin-RevId: 163780413
author: Bjarke Hammersholt Roune <broune@google.com> 2017-07-31 18:48:42 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-07-31 18:52:42 -0700
commit: b882d686ff00f73425a846c47e29a7c336435f25 (patch)
tree: 65ce9501716aac37d9bb26bc58960ed64b19b4ef /tensorflow/compiler/xla/service/gpu/gpu_executable.cc
parent: 14b73676136f6044ae372513e56189b5528a3b6e (diff)
1 files changed, 7 insertions, 2 deletions
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
index 7f9e60460c..a195d0b8e4 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_executable.cc
@@ -112,10 +112,11 @@ GpuExecutable::GpuExecutable(
     std::unique_ptr<HloModule> hlo_module,
     std::unique_ptr<BufferAssignment> assignment,
     HloCostAnalysis::ShapeSizeFunction shape_size_function)
-    : Executable(std::move(hlo_module), std::move(shape_size_function)),
+    : Executable(std::move(hlo_module)),
       ptx_(ptx),
       thunk_schedule_(std::move(thunk_schedule)),
-      assignment_(std::move(assignment)) {}
+      assignment_(std::move(assignment)),
+      shape_size_function_(std::move(shape_size_function)) {}
 
 Status GpuExecutable::ExecuteThunks(
     const ServiceExecutableRunOptions* run_options,
@@ -356,5 +357,9 @@ const PointsToSet& GpuExecutable::GetRootPointsToSet() const {
       module().entry_computation()->root_instruction());
 }
 
+std::unique_ptr<HloCostAnalysis> GpuExecutable::CreateCostAnalysis() const {
+  return MakeUnique<HloCostAnalysis>(shape_size_function_);
+}
+
 }  // namespace gpu
 }  // namespace xla
author	Bjarke Hammersholt Roune <broune@google.com>	2017-07-31 18:48:42 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-07-31 18:52:42 -0700
commit	b882d686ff00f73425a846c47e29a7c336435f25 (patch)
tree	65ce9501716aac37d9bb26bc58960ed64b19b4ef /tensorflow/compiler/xla/service/gpu/gpu_executable.cc
parent	14b73676136f6044ae372513e56189b5528a3b6e (diff)