diff options
author | 2017-03-09 15:50:51 -0800 | |
---|---|---|
committer | 2017-03-09 16:11:05 -0800 | |
commit | 7cdda60b60a6df4bc08ae8fb2895559f0c70c789 (patch) | |
tree | 282eb22930be751f9a71866c04ca298d99d67054 | |
parent | 00d0347ccebc3e29ffe541703b5a2f929b89da36 (diff) |
Add a sample program to evaluate clock cycle profiler and support clock cycle profiling on android 64bit
Change: 149704577
6 files changed, 126 insertions, 16 deletions
diff --git a/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD b/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD new file mode 100644 index 0000000000..621466c9ed --- /dev/null +++ b/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD @@ -0,0 +1,63 @@ +# Description: +# contains parts of TensorFlow that are experimental or unstable and which are not supported. + +licenses(["notice"]) # Apache 2.0 + +load( + "//tensorflow:tensorflow.bzl", + "tf_copts", +) + +exports_files(["LICENSE"]) + +package(default_visibility = ["//visibility:public"]) + +filegroup( + name = "all_files", + srcs = glob( + ["**/*"], + exclude = [ + "**/METADATA", + "**/OWNERS", + ], + ), + visibility = ["//tensorflow:__subpackages__"], +) + +cc_binary( + name = "clock_cycle_profiling", + testonly = 1, + srcs = ["clock_cycle_profiling_main.cc"], + copts = tf_copts(), + linkopts = select({ + "//tensorflow:android": [ + "-pie", + "-s", + "-landroid", + "-ljnigraphics", + "-llog", + "-lm", + "-z defs", + "-s", + "-Wl,--exclude-libs,ALL", # Exclude syms in all libs from auto export + ], + "//conditions:default": [], + }), + linkstatic = 1, + visibility = ["//visibility:public"], + deps = select({ + "//tensorflow:android": [ + "//tensorflow/core:android_tensorflow_lib", + "//tensorflow/core:android_tensorflow_test_lib", + ], + "//conditions:default": [ + "//tensorflow/core:core_cpu", + "//tensorflow/core:lib", + "//tensorflow/core:framework", + "//tensorflow/core:framework_internal", + "//tensorflow/core:protos_all_cc", + "//tensorflow/core:tensorflow", + "//tensorflow/core:test", + ], + }), +) diff --git a/tensorflow/contrib/hvx/clock_cycle_profiling/clock_cycle_profiling_main.cc b/tensorflow/contrib/hvx/clock_cycle_profiling/clock_cycle_profiling_main.cc new file mode 100644 index 0000000000..a87ef953b7 --- /dev/null +++ b/tensorflow/contrib/hvx/clock_cycle_profiling/clock_cycle_profiling_main.cc @@ -0,0 +1,53 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#include "tensorflow/core/platform/logging.h" +#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h" +#include "tensorflow/core/platform/profile_utils/cpu_utils.h" + +int main(int argc, char** argv) { + static constexpr int LOOP_COUNT = 1000000; + +#if defined(__ANDROID_API__) +#if defined(__aarch64__) + LOG(INFO) << "android arm 64 bit"; +#endif +#if defined(__ARM_ARCH_7A__) + LOG(INFO) << "android arm 32 bit"; +#endif + LOG(INFO) << "Android API = " << __ANDROID_API__; + if (__ANDROID_API__ < 21) { + LOG(INFO) << "Cpu utils requires API level 21 or above."; + return 0; + } +#endif + + tensorflow::profile_utils::CpuUtils::EnableClockCycleProfiling(true); + + tensorflow::ClockCycleProfiler prof_global; + tensorflow::ClockCycleProfiler prof_internal; + + prof_global.Start(); + for (int i = 0; i < LOOP_COUNT; ++i) { + prof_internal.Start(); + prof_internal.Stop(); + } + prof_global.Stop(); + + prof_global.DumpStatistics("prof_global"); + prof_internal.DumpStatistics("prof_internal"); + + return 0; +} diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc index 8f9fa1dc53..fb1955edde 100644 --- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc +++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc @@ -15,7 +15,8 @@ limitations under the License. #include "tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h" -#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) +#if defined(__ANDROID__) && (__ANDROID_API__ >= 21) && \ + (defined(__ARM_ARCH_7A__) || defined(__aarch64__)) #include <asm/unistd.h> #include <linux/perf_event.h> @@ -126,5 +127,5 @@ int64 AndroidArmV7ACpuUtilsHelper::ReadCpuFrequencyFile( } // namespace profile_utils } // namespace tensorflow -// defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) -#endif +#endif // defined(__ANDROID__) && (__ANDROID_API__ >= 21) && + // (defined(__ARM_ARCH_7A__) || defined(__aarch64__)) diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h index f4ac7964f3..8604b01c53 100644 --- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h +++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h @@ -22,7 +22,8 @@ limitations under the License. #include "tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h" #include "tensorflow/core/platform/types.h" -#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) +#if defined(__ANDROID__) && (__ANDROID_API__ >= 21) && \ + (defined(__ARM_ARCH_7A__) || defined(__aarch64__)) struct perf_event_attr; @@ -60,7 +61,7 @@ class AndroidArmV7ACpuUtilsHelper : public ICpuUtilsHelper { } // profile_utils } // tensorflow -// defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) -#endif +#endif // defined(__ANDROID__) && (__ANDROID_API__ >= 21) && + // (defined(__ARM_ARCH_7A__) || defined(__aarch64__)) #endif // TENSORFLOW_PLATFORM_PROFILEUTILS_ANDROID_ARMV7A_CPU_UTILS_HELPER_H__ diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.cc b/tensorflow/core/platform/profile_utils/cpu_utils.cc index 9d1e464cb3..22400565d6 100644 --- a/tensorflow/core/platform/profile_utils/cpu_utils.cc +++ b/tensorflow/core/platform/profile_utils/cpu_utils.cc @@ -56,12 +56,7 @@ static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr; /* static */ int64 CpuUtils::GetCycleCounterFrequencyImpl() { // TODO(satok): do not switch by macro here #if defined(__ANDROID__) -#if defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) - // This profiling tool only supports Ver 21 or upper on Android return GetCpuUtilsHelperSingletonInstance().CalculateCpuFrequency(); -#else // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) - return INVALID_FREQUENCY; -#endif #elif defined(__linux__) double bogomips; FILE* fp = popen("grep '^bogomips' /proc/cpuinfo | head -1", "r"); @@ -108,7 +103,8 @@ static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr; if (cpu_utils_helper_instance_ != nullptr) { LOG(FATAL) << "cpu_utils_helper_instance_ is already instantiated."; } -#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) +#if defined(__ANDROID__) && (__ANDROID_API__ >= 21) && \ + (defined(__ARM_ARCH_7A__) || defined(__aarch64__)) cpu_utils_helper_instance_ = new AndroidArmV7ACpuUtilsHelper(); #else cpu_utils_helper_instance_ = new DefaultCpuUtilsHelper(); diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.h b/tensorflow/core/platform/profile_utils/cpu_utils.h index 2d80f2e89c..19471ec858 100644 --- a/tensorflow/core/platform/profile_utils/cpu_utils.h +++ b/tensorflow/core/platform/profile_utils/cpu_utils.h @@ -53,11 +53,7 @@ class CpuUtils { // is less than 2 ^ 61. static inline uint64 GetCurrentClockCycle() { #if defined(__ANDROID__) -#if defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) return GetCpuUtilsHelperSingletonInstance().GetCurrentClockCycle(); -#else // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) - return DUMMY_CYCLE_CLOCK; -#endif // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21) // ---------------------------------------------------------------- #elif defined(__x86_64__) || defined(__amd64__) uint64_t high, low; |