aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/compiler/xla/service/hlo_execution_profile.cc
diff options
context:
space:
mode:
authorGravatar Peter Hawkins <phawkins@google.com>2017-01-09 12:04:37 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-01-09 12:26:35 -0800
commit1e67c90e2caceeff82d09793d1ef5fa0300d219b (patch)
tree6567ea8b0fa01fcfcd608b7e4c636865d33c7032 /tensorflow/compiler/xla/service/hlo_execution_profile.cc
parent7ad7e4dfae4344d6b955b5eb61dc4b6bb792f1b3 (diff)
Initial open-source release of XLA: Accelerated Linear Algebra.
XLA is a compiler-based linear algebra execution engine that targets CPUs, GPUs and custom accelerators. XLA is still experimental; we are releasing it early to get the community involved. Change: 143990941
Diffstat (limited to 'tensorflow/compiler/xla/service/hlo_execution_profile.cc')
-rw-r--r--tensorflow/compiler/xla/service/hlo_execution_profile.cc87
1 files changed, 87 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
new file mode 100644
index 0000000000..edba55f6cd
--- /dev/null
+++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc
@@ -0,0 +1,87 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/compiler/xla/service/hlo_execution_profile.h"
+
+#include <algorithm>
+#include <utility>
+#include <vector>
+
+#include "tensorflow/compiler/xla/service/hlo_instruction.h"
+#include "tensorflow/compiler/xla/types.h"
+#include "tensorflow/compiler/xla/util.h"
+#include "tensorflow/core/lib/strings/numbers.h"
+#include "tensorflow/core/lib/strings/strcat.h"
+#include "tensorflow/core/lib/strings/stringprintf.h"
+
+namespace xla {
+
+void HloExecutionProfile::AddProfileResult(const HloInstruction* hlo,
+ uint64 cycles_taken) {
+ hlo_to_cycles_taken_[hlo] = cycles_taken;
+}
+
+uint64 HloExecutionProfile::GetProfileResult(const HloInstruction& hlo) const {
+ auto iter = hlo_to_cycles_taken_.find(&hlo);
+ if (iter == hlo_to_cycles_taken_.end()) {
+ return 0;
+ }
+ return iter->second;
+}
+
+string HloExecutionProfile::ToString(
+ const DeviceDescription& device_description,
+ const HloCostAnalysis& cost_analysis) const {
+ using Item = std::pair<const HloInstruction*, uint64>;
+ std::vector<Item> items(hlo_to_cycles_taken_.begin(),
+ hlo_to_cycles_taken_.end());
+ auto custom_less = [](const Item& lhs, const Item& rhs) {
+ return lhs.second > rhs.second;
+ };
+ std::sort(items.begin(), items.end(), custom_less);
+ string result;
+ const int64 total_cycles = total_cycles_executed();
+ double clock_rate_ghz = device_description.clock_rate_ghz();
+ auto append_item = [&result, total_cycles, clock_rate_ghz](
+ int64 cycles, int64 flops, const string& name) {
+ double nsecs = cycles / clock_rate_ghz;
+ tensorflow::strings::StrAppend(
+ &result,
+ tensorflow::strings::Printf(
+ "%15lld cycles (%6.2f%%) :: %12.1f usec @ f_nom :: %18s :: %s",
+ cycles, cycles / static_cast<double>(total_cycles) * 100,
+ nsecs / 1e3,
+ flops <= 0 ? "<none>" : HumanReadableNumFlops(flops, nsecs).c_str(),
+ name.c_str()));
+ };
+ tensorflow::strings::StrAppend(
+ &result,
+ tensorflow::strings::Printf("HLO execution profile: (%s @ f_nom)\n\t",
+ tensorflow::strings::HumanReadableElapsedTime(
+ total_cycles / clock_rate_ghz / 1e9)
+ .c_str()));
+ append_item(total_cycles, -1, "[total]");
+ for (const auto& item : items) {
+ tensorflow::strings::StrAppend(&result, "\n\t");
+ auto flops = item.first == nullptr
+ ? -1
+ : cost_analysis.hlo_to_flop_count(*item.first);
+ string display = item.first == nullptr ? "<none>" : item.first->ToString();
+ append_item(item.second, flops, display);
+ }
+ return result;
+}
+
+} // namespace xla