diff options
author | Peter Hawkins <phawkins@google.com> | 2017-01-09 12:04:37 -0800 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-01-09 12:26:35 -0800 |
commit | 1e67c90e2caceeff82d09793d1ef5fa0300d219b (patch) | |
tree | 6567ea8b0fa01fcfcd608b7e4c636865d33c7032 /tensorflow/compiler/xla/service/hlo_execution_profile.cc | |
parent | 7ad7e4dfae4344d6b955b5eb61dc4b6bb792f1b3 (diff) |
Initial open-source release of XLA: Accelerated Linear Algebra.
XLA is a compiler-based linear algebra execution engine that targets CPUs, GPUs and custom accelerators.
XLA is still experimental; we are releasing it early to get the community involved.
Change: 143990941
Diffstat (limited to 'tensorflow/compiler/xla/service/hlo_execution_profile.cc')
-rw-r--r-- | tensorflow/compiler/xla/service/hlo_execution_profile.cc | 87 |
1 files changed, 87 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/hlo_execution_profile.cc b/tensorflow/compiler/xla/service/hlo_execution_profile.cc new file mode 100644 index 0000000000..edba55f6cd --- /dev/null +++ b/tensorflow/compiler/xla/service/hlo_execution_profile.cc @@ -0,0 +1,87 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/compiler/xla/service/hlo_execution_profile.h" + +#include <algorithm> +#include <utility> +#include <vector> + +#include "tensorflow/compiler/xla/service/hlo_instruction.h" +#include "tensorflow/compiler/xla/types.h" +#include "tensorflow/compiler/xla/util.h" +#include "tensorflow/core/lib/strings/numbers.h" +#include "tensorflow/core/lib/strings/strcat.h" +#include "tensorflow/core/lib/strings/stringprintf.h" + +namespace xla { + +void HloExecutionProfile::AddProfileResult(const HloInstruction* hlo, + uint64 cycles_taken) { + hlo_to_cycles_taken_[hlo] = cycles_taken; +} + +uint64 HloExecutionProfile::GetProfileResult(const HloInstruction& hlo) const { + auto iter = hlo_to_cycles_taken_.find(&hlo); + if (iter == hlo_to_cycles_taken_.end()) { + return 0; + } + return iter->second; +} + +string HloExecutionProfile::ToString( + const DeviceDescription& device_description, + const HloCostAnalysis& cost_analysis) const { + using Item = std::pair<const HloInstruction*, uint64>; + std::vector<Item> items(hlo_to_cycles_taken_.begin(), + hlo_to_cycles_taken_.end()); + auto custom_less = [](const Item& lhs, const Item& rhs) { + return lhs.second > rhs.second; + }; + std::sort(items.begin(), items.end(), custom_less); + string result; + const int64 total_cycles = total_cycles_executed(); + double clock_rate_ghz = device_description.clock_rate_ghz(); + auto append_item = [&result, total_cycles, clock_rate_ghz]( + int64 cycles, int64 flops, const string& name) { + double nsecs = cycles / clock_rate_ghz; + tensorflow::strings::StrAppend( + &result, + tensorflow::strings::Printf( + "%15lld cycles (%6.2f%%) :: %12.1f usec @ f_nom :: %18s :: %s", + cycles, cycles / static_cast<double>(total_cycles) * 100, + nsecs / 1e3, + flops <= 0 ? "<none>" : HumanReadableNumFlops(flops, nsecs).c_str(), + name.c_str())); + }; + tensorflow::strings::StrAppend( + &result, + tensorflow::strings::Printf("HLO execution profile: (%s @ f_nom)\n\t", + tensorflow::strings::HumanReadableElapsedTime( + total_cycles / clock_rate_ghz / 1e9) + .c_str())); + append_item(total_cycles, -1, "[total]"); + for (const auto& item : items) { + tensorflow::strings::StrAppend(&result, "\n\t"); + auto flops = item.first == nullptr + ? -1 + : cost_analysis.hlo_to_flop_count(*item.first); + string display = item.first == nullptr ? "<none>" : item.first->ToString(); + append_item(item.second, flops, display); + } + return result; +} + +} // namespace xla |