summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Benjamin Barenblat <bbaren@google.com>2021-04-21 10:18:44 -0400
committerGravatar Benjamin Barenblat <bbaren@google.com>2021-04-21 10:18:44 -0400
commit297410a9906290b441144816f3c112dfeefb7099 (patch)
tree229aa2e40823b79f2a885a71ef2aad60a0751d09
parenta9940c9c7fd5ec3377d120d373e0deadce01e05f (diff)
Add patch to make symbolizer compute Thumb function bounds correctly
-rw-r--r--debian/changelog1
-rw-r--r--debian/patches/series1
-rw-r--r--debian/patches/thumb-function-bounds.diff96
3 files changed, 98 insertions, 0 deletions
diff --git a/debian/changelog b/debian/changelog
index ca40b912..10359f55 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -4,6 +4,7 @@ abseil (0~20210324.0-1) UNRELEASED; urgency=medium
* Correct debian/watch search URLs to avoid picking up rc versions.
* Mangle upstream version in debian/watch to match manual mangling in
debian/changelog.
+ * Compute Thumb function bounds correctly. (Closes: #987314)
* Reenable unit tests on arm64.
-- Benjamin Barenblat <bbaren@debian.org> Thu, 08 Apr 2021 10:28:01 -0400
diff --git a/debian/patches/series b/debian/patches/series
index 663e1655..e963b998 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -2,3 +2,4 @@ configure.diff
std-hash.diff
latomic.diff
cordrepring-typo.diff
+thumb-function-bounds.diff
diff --git a/debian/patches/thumb-function-bounds.diff b/debian/patches/thumb-function-bounds.diff
new file mode 100644
index 00000000..1fd8c729
--- /dev/null
+++ b/debian/patches/thumb-function-bounds.diff
@@ -0,0 +1,96 @@
+From: Benjamin Barenblat <bbaren@google.com>
+Subject: Correct Thumb function bound computation in the symbolizer
+Forwarded: yes
+Applied-Upstream: https://github.com/abseil/abseil-cpp/commit/1ae9b71c474628d60eb251a3f62967fe64151bb2
+
+On 32-bit ARM, all functions are aligned to multiples of two bytes, and
+the lowest-order bit in a function’s address is ignored by the CPU when
+computing branch targets. That bit is still present in instructions and
+ELF symbol tables, though; it’s repurposed to indicate whether the
+function contains ARM or Thumb code. If the symbolizer doesn’t ignore
+that bit, it will believe Thumb functions have boundaries that are off
+by one byte, so instruct the symbolizer to null out the lowest-order bit
+after retrieving it from the symbol table.
+
+The author works at Google. Upstream applied this patch as Piper
+revision 369254082 and exported it to GitHub; the Applied-Upstream URL
+above points to the exported commit.
+
+--- a/absl/debugging/symbolize_elf.inc
++++ b/absl/debugging/symbolize_elf.inc
+@@ -701,6 +701,16 @@
+ const char *start_address =
+ ComputeOffset(original_start_address, relocation);
+
++#ifdef __arm__
++ // ARM functions are always aligned to multiples of two bytes; the
++ // lowest-order bit in start_address is ignored by the CPU and indicates
++ // whether the function contains ARM (0) or Thumb (1) code. We don't care
++ // about what encoding is being used; we just want the real start address
++ // of the function.
++ start_address = reinterpret_cast<const char *>(
++ reinterpret_cast<uintptr_t>(start_address) & ~1);
++#endif
++
+ if (deref_function_descriptor_pointer &&
+ InSection(original_start_address, opd)) {
+ // The opd section is mapped into memory. Just dereference
+--- a/absl/debugging/symbolize_test.cc
++++ b/absl/debugging/symbolize_test.cc
+@@ -477,6 +477,46 @@
+ #endif
+ }
+
++#if defined(__arm__) && ABSL_HAVE_ATTRIBUTE(target)
++// Test that we correctly identify bounds of Thumb functions on ARM.
++//
++// Thumb functions have the lowest-order bit set in their addresses in the ELF
++// symbol table. This requires some extra logic to properly compute function
++// bounds. To test this logic, nudge a Thumb function right up against an ARM
++// function and try to symbolize the ARM function.
++//
++// A naive implementation will simply use the Thumb function's entry point as
++// written in the symbol table and will therefore treat the Thumb function as
++// extending one byte further in the instruction stream than it actually does.
++// When asked to symbolize the start of the ARM function, it will identify an
++// overlap between the Thumb and ARM functions, and it will return the name of
++// the Thumb function.
++//
++// A correct implementation, on the other hand, will null out the lowest-order
++// bit in the Thumb function's entry point. It will correctly compute the end of
++// the Thumb function, it will find no overlap between the Thumb and ARM
++// functions, and it will return the name of the ARM function.
++
++__attribute__((target("thumb"))) int ArmThumbOverlapThumb(int x) {
++ return x * x * x;
++}
++
++__attribute__((target("arm"))) int ArmThumbOverlapArm(int x) {
++ return x * x * x;
++}
++
++void ABSL_ATTRIBUTE_NOINLINE TestArmThumbOverlap() {
++#if defined(ABSL_HAVE_ATTRIBUTE_NOINLINE)
++ const char *symbol = TrySymbolize((void *)&ArmThumbOverlapArm);
++ ABSL_RAW_CHECK(symbol != nullptr, "TestArmThumbOverlap failed");
++ ABSL_RAW_CHECK(strcmp("ArmThumbOverlapArm()", symbol) == 0,
++ "TestArmThumbOverlap failed");
++ std::cout << "TestArmThumbOverlap passed" << std::endl;
++#endif
++}
++
++#endif // defined(__arm__) && ABSL_HAVE_ATTRIBUTE(target)
++
+ #elif defined(_WIN32)
+ #if !defined(ABSL_CONSUME_DLL)
+
+@@ -551,6 +591,9 @@
+ TestWithPCInsideInlineFunction();
+ TestWithPCInsideNonInlineFunction();
+ TestWithReturnAddress();
++#if defined(__arm__) && ABSL_HAVE_ATTRIBUTE(target)
++ TestArmThumbOverlap();
++#endif
+ #endif
+
+ return RUN_ALL_TESTS();