aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2017-03-09 15:50:51 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-03-09 16:11:05 -0800
commit7cdda60b60a6df4bc08ae8fb2895559f0c70c789 (patch)
tree282eb22930be751f9a71866c04ca298d99d67054
parent00d0347ccebc3e29ffe541703b5a2f929b89da36 (diff)
Add a sample program to evaluate clock cycle profiler and support clock cycle profiling on android 64bit
Change: 149704577
-rw-r--r--tensorflow/contrib/hvx/clock_cycle_profiling/BUILD63
-rw-r--r--tensorflow/contrib/hvx/clock_cycle_profiling/clock_cycle_profiling_main.cc53
-rw-r--r--tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc7
-rw-r--r--tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h7
-rw-r--r--tensorflow/core/platform/profile_utils/cpu_utils.cc8
-rw-r--r--tensorflow/core/platform/profile_utils/cpu_utils.h4
6 files changed, 126 insertions, 16 deletions
diff --git a/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD b/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD
new file mode 100644
index 0000000000..621466c9ed
--- /dev/null
+++ b/tensorflow/contrib/hvx/clock_cycle_profiling/BUILD
@@ -0,0 +1,63 @@
+# Description:
+# contains parts of TensorFlow that are experimental or unstable and which are not supported.
+
+licenses(["notice"]) # Apache 2.0
+
+load(
+ "//tensorflow:tensorflow.bzl",
+ "tf_copts",
+)
+
+exports_files(["LICENSE"])
+
+package(default_visibility = ["//visibility:public"])
+
+filegroup(
+ name = "all_files",
+ srcs = glob(
+ ["**/*"],
+ exclude = [
+ "**/METADATA",
+ "**/OWNERS",
+ ],
+ ),
+ visibility = ["//tensorflow:__subpackages__"],
+)
+
+cc_binary(
+ name = "clock_cycle_profiling",
+ testonly = 1,
+ srcs = ["clock_cycle_profiling_main.cc"],
+ copts = tf_copts(),
+ linkopts = select({
+ "//tensorflow:android": [
+ "-pie",
+ "-s",
+ "-landroid",
+ "-ljnigraphics",
+ "-llog",
+ "-lm",
+ "-z defs",
+ "-s",
+ "-Wl,--exclude-libs,ALL", # Exclude syms in all libs from auto export
+ ],
+ "//conditions:default": [],
+ }),
+ linkstatic = 1,
+ visibility = ["//visibility:public"],
+ deps = select({
+ "//tensorflow:android": [
+ "//tensorflow/core:android_tensorflow_lib",
+ "//tensorflow/core:android_tensorflow_test_lib",
+ ],
+ "//conditions:default": [
+ "//tensorflow/core:core_cpu",
+ "//tensorflow/core:lib",
+ "//tensorflow/core:framework",
+ "//tensorflow/core:framework_internal",
+ "//tensorflow/core:protos_all_cc",
+ "//tensorflow/core:tensorflow",
+ "//tensorflow/core:test",
+ ],
+ }),
+)
diff --git a/tensorflow/contrib/hvx/clock_cycle_profiling/clock_cycle_profiling_main.cc b/tensorflow/contrib/hvx/clock_cycle_profiling/clock_cycle_profiling_main.cc
new file mode 100644
index 0000000000..a87ef953b7
--- /dev/null
+++ b/tensorflow/contrib/hvx/clock_cycle_profiling/clock_cycle_profiling_main.cc
@@ -0,0 +1,53 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include "tensorflow/core/platform/logging.h"
+#include "tensorflow/core/platform/profile_utils/clock_cycle_profiler.h"
+#include "tensorflow/core/platform/profile_utils/cpu_utils.h"
+
+int main(int argc, char** argv) {
+ static constexpr int LOOP_COUNT = 1000000;
+
+#if defined(__ANDROID_API__)
+#if defined(__aarch64__)
+ LOG(INFO) << "android arm 64 bit";
+#endif
+#if defined(__ARM_ARCH_7A__)
+ LOG(INFO) << "android arm 32 bit";
+#endif
+ LOG(INFO) << "Android API = " << __ANDROID_API__;
+ if (__ANDROID_API__ < 21) {
+ LOG(INFO) << "Cpu utils requires API level 21 or above.";
+ return 0;
+ }
+#endif
+
+ tensorflow::profile_utils::CpuUtils::EnableClockCycleProfiling(true);
+
+ tensorflow::ClockCycleProfiler prof_global;
+ tensorflow::ClockCycleProfiler prof_internal;
+
+ prof_global.Start();
+ for (int i = 0; i < LOOP_COUNT; ++i) {
+ prof_internal.Start();
+ prof_internal.Stop();
+ }
+ prof_global.Stop();
+
+ prof_global.DumpStatistics("prof_global");
+ prof_internal.DumpStatistics("prof_internal");
+
+ return 0;
+}
diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
index 8f9fa1dc53..fb1955edde 100644
--- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
+++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.cc
@@ -15,7 +15,8 @@ limitations under the License.
#include "tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h"
-#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
+#if defined(__ANDROID__) && (__ANDROID_API__ >= 21) && \
+ (defined(__ARM_ARCH_7A__) || defined(__aarch64__))
#include <asm/unistd.h>
#include <linux/perf_event.h>
@@ -126,5 +127,5 @@ int64 AndroidArmV7ACpuUtilsHelper::ReadCpuFrequencyFile(
} // namespace profile_utils
} // namespace tensorflow
-// defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
-#endif
+#endif // defined(__ANDROID__) && (__ANDROID_API__ >= 21) &&
+ // (defined(__ARM_ARCH_7A__) || defined(__aarch64__))
diff --git a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h
index f4ac7964f3..8604b01c53 100644
--- a/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h
+++ b/tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h
@@ -22,7 +22,8 @@ limitations under the License.
#include "tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h"
#include "tensorflow/core/platform/types.h"
-#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
+#if defined(__ANDROID__) && (__ANDROID_API__ >= 21) && \
+ (defined(__ARM_ARCH_7A__) || defined(__aarch64__))
struct perf_event_attr;
@@ -60,7 +61,7 @@ class AndroidArmV7ACpuUtilsHelper : public ICpuUtilsHelper {
} // profile_utils
} // tensorflow
-// defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
-#endif
+#endif // defined(__ANDROID__) && (__ANDROID_API__ >= 21) &&
+ // (defined(__ARM_ARCH_7A__) || defined(__aarch64__))
#endif // TENSORFLOW_PLATFORM_PROFILEUTILS_ANDROID_ARMV7A_CPU_UTILS_HELPER_H__
diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.cc b/tensorflow/core/platform/profile_utils/cpu_utils.cc
index 9d1e464cb3..22400565d6 100644
--- a/tensorflow/core/platform/profile_utils/cpu_utils.cc
+++ b/tensorflow/core/platform/profile_utils/cpu_utils.cc
@@ -56,12 +56,7 @@ static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr;
/* static */ int64 CpuUtils::GetCycleCounterFrequencyImpl() {
// TODO(satok): do not switch by macro here
#if defined(__ANDROID__)
-#if defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
- // This profiling tool only supports Ver 21 or upper on Android
return GetCpuUtilsHelperSingletonInstance().CalculateCpuFrequency();
-#else // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
- return INVALID_FREQUENCY;
-#endif
#elif defined(__linux__)
double bogomips;
FILE* fp = popen("grep '^bogomips' /proc/cpuinfo | head -1", "r");
@@ -108,7 +103,8 @@ static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr;
if (cpu_utils_helper_instance_ != nullptr) {
LOG(FATAL) << "cpu_utils_helper_instance_ is already instantiated.";
}
-#if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
+#if defined(__ANDROID__) && (__ANDROID_API__ >= 21) && \
+ (defined(__ARM_ARCH_7A__) || defined(__aarch64__))
cpu_utils_helper_instance_ = new AndroidArmV7ACpuUtilsHelper();
#else
cpu_utils_helper_instance_ = new DefaultCpuUtilsHelper();
diff --git a/tensorflow/core/platform/profile_utils/cpu_utils.h b/tensorflow/core/platform/profile_utils/cpu_utils.h
index 2d80f2e89c..19471ec858 100644
--- a/tensorflow/core/platform/profile_utils/cpu_utils.h
+++ b/tensorflow/core/platform/profile_utils/cpu_utils.h
@@ -53,11 +53,7 @@ class CpuUtils {
// is less than 2 ^ 61.
static inline uint64 GetCurrentClockCycle() {
#if defined(__ANDROID__)
-#if defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
return GetCpuUtilsHelperSingletonInstance().GetCurrentClockCycle();
-#else // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
- return DUMMY_CYCLE_CLOCK;
-#endif // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
// ----------------------------------------------------------------
#elif defined(__x86_64__) || defined(__amd64__)
uint64_t high, low;