diff options
author | Chris Mihelich <cmihelic@google.com> | 2024-05-06 05:56:26 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2024-05-06 05:57:30 -0700 |
commit | a28ee5b51c9ea41707d9a5d2d358ad77850264c4 (patch) | |
tree | 98b00df448483279f43bd48d015067e8dbdbcb21 | |
parent | c1e1b47d989978cde8c5a2a219df425b785a0c47 (diff) |
Recognize some simple Rust mangled names in Demangle.
PiperOrigin-RevId: 631018414
Change-Id: Ice8efa0af4cb1f72b5d62fbbea4cb12cbead8634
-rw-r--r-- | CMake/AbseilDll.cmake | 2 | ||||
-rw-r--r-- | absl/debugging/BUILD.bazel | 24 | ||||
-rw-r--r-- | absl/debugging/CMakeLists.txt | 15 | ||||
-rw-r--r-- | absl/debugging/internal/demangle.cc | 6 | ||||
-rw-r--r-- | absl/debugging/internal/demangle.h | 2 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_rust.cc | 432 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_rust.h | 47 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_rust_test.cc | 216 | ||||
-rw-r--r-- | absl/debugging/internal/demangle_test.cc | 9 |
9 files changed, 751 insertions, 2 deletions
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake index 6d871552..2a40532a 100644 --- a/CMake/AbseilDll.cmake +++ b/CMake/AbseilDll.cmake @@ -124,6 +124,8 @@ set(ABSL_INTERNAL_DLL_FILES "debugging/internal/address_is_readable.h" "debugging/internal/demangle.cc" "debugging/internal/demangle.h" + "debugging/internal/demangle_rust.cc" + "debugging/internal/demangle_rust.h" "debugging/internal/elf_mem_image.cc" "debugging/internal/elf_mem_image.h" "debugging/internal/examine_stack.cc" diff --git a/absl/debugging/BUILD.bazel b/absl/debugging/BUILD.bazel index 5baff7a1..22494554 100644 --- a/absl/debugging/BUILD.bazel +++ b/absl/debugging/BUILD.bazel @@ -219,8 +219,14 @@ cc_library( cc_library( name = "demangle_internal", - srcs = ["internal/demangle.cc"], - hdrs = ["internal/demangle.h"], + srcs = [ + "internal/demangle.cc", + "internal/demangle_rust.cc", + ], + hdrs = [ + "internal/demangle.h", + "internal/demangle_rust.h", + ], copts = ABSL_DEFAULT_COPTS, linkopts = ABSL_DEFAULT_LINKOPTS, visibility = [ @@ -235,6 +241,20 @@ cc_library( ) cc_test( + name = "demangle_rust_test", + srcs = ["internal/demangle_rust_test.cc"], + copts = ABSL_TEST_COPTS, + linkopts = ABSL_DEFAULT_LINKOPTS, + deps = [ + ":demangle_internal", + "//absl/base:config", + "//absl/base:core_headers", + "@com_google_googletest//:gtest", + "@com_google_googletest//:gtest_main", + ], +) + +cc_test( name = "demangle_test", srcs = ["internal/demangle_test.cc"], copts = ABSL_TEST_COPTS, diff --git a/absl/debugging/CMakeLists.txt b/absl/debugging/CMakeLists.txt index 65e2af88..027a6be2 100644 --- a/absl/debugging/CMakeLists.txt +++ b/absl/debugging/CMakeLists.txt @@ -196,8 +196,10 @@ absl_cc_library( demangle_internal HDRS "internal/demangle.h" + "internal/demangle_rust.h" SRCS "internal/demangle.cc" + "internal/demangle_rust.cc" COPTS ${ABSL_DEFAULT_COPTS} DEPS @@ -208,6 +210,19 @@ absl_cc_library( absl_cc_test( NAME + demangle_rust_test + SRCS + "internal/demangle_rust_test.cc" + COPTS + ${ABSL_TEST_COPTS} + DEPS + absl::demangle_internal + absl::config + GTest::gmock_main +) + +absl_cc_test( + NAME demangle_test SRCS "internal/demangle_test.cc" diff --git a/absl/debugging/internal/demangle.cc b/absl/debugging/internal/demangle.cc index 82420c8b..71d4eb0a 100644 --- a/absl/debugging/internal/demangle.cc +++ b/absl/debugging/internal/demangle.cc @@ -19,6 +19,7 @@ #include "absl/debugging/internal/demangle.h" +#include <cstddef> #include <cstdint> #include <cstdio> #include <cstdlib> @@ -26,6 +27,7 @@ #include <string> #include "absl/base/config.h" +#include "absl/debugging/internal/demangle_rust.h" #if ABSL_INTERNAL_HAS_CXA_DEMANGLE #include <cxxabi.h> @@ -2110,6 +2112,10 @@ static bool Overflowed(const State *state) { // The demangler entry point. bool Demangle(const char* mangled, char* out, size_t out_size) { + if (mangled[0] == '_' && mangled[1] == 'R') { + return DemangleRustSymbolEncoding(mangled, out, out_size); + } + State state; InitState(&state, mangled, out, out_size); return ParseTopLevelMangledName(&state) && !Overflowed(&state) && diff --git a/absl/debugging/internal/demangle.h b/absl/debugging/internal/demangle.h index 146d1150..e75d1473 100644 --- a/absl/debugging/internal/demangle.h +++ b/absl/debugging/internal/demangle.h @@ -56,6 +56,8 @@ namespace debugging_internal { // // See the unit test for more examples. // +// Support for Rust mangled names is in development; see demangle_rust.h. +// // Note: we might want to write demanglers for ABIs other than Itanium // C++ ABI in the future. bool Demangle(const char* mangled, char* out, size_t out_size); diff --git a/absl/debugging/internal/demangle_rust.cc b/absl/debugging/internal/demangle_rust.cc new file mode 100644 index 00000000..7086cab2 --- /dev/null +++ b/absl/debugging/internal/demangle_rust.cc @@ -0,0 +1,432 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/demangle_rust.h" + +#include <cstddef> +#include <cstdint> +#include <cstring> +#include <limits> + +#include "absl/base/attributes.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +namespace { + +// Same step limit as the C++ demangler in demangle.cc uses. +constexpr int kMaxReturns = 1 << 17; + +bool IsDigit(char c) { return '0' <= c && c <= '9'; } +bool IsLower(char c) { return 'a' <= c && c <= 'z'; } +bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; } +bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); } +bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; } + +// Parser for Rust symbol mangling v0, whose grammar is defined here: +// +// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary +class RustSymbolParser { + public: + // Prepares to demangle the given encoding, a Rust symbol name starting with + // _R, into the output buffer [out, out_end). The caller is expected to + // continue by calling the new object's Parse function. + RustSymbolParser(const char* encoding, char* out, char* const out_end) + : encoding_(encoding), out_(out), out_end_(out_end) { + if (out_ != out_end_) *out_ = '\0'; + } + + // Parses the constructor's encoding argument, writing output into the range + // [out, out_end). Returns true on success and false for input whose + // structure was not recognized or exceeded implementation limits, such as by + // nesting structures too deep. In either case *this should not be used + // again. + ABSL_MUST_USE_RESULT bool Parse() && { + // Recursively parses the grammar production named by callee, then resumes + // execution at the next statement. + // + // Recursive-descent parsing is a beautifully readable translation of a + // grammar, but it risks stack overflow if implemented by naive recursion on + // the C++ call stack. So we simulate recursion by goto and switch instead, + // keeping a bounded stack of "return addresses" in the stack_ member. + // + // The callee argument is a statement label. We goto that label after + // saving the "return address" on stack_. The next continue statement in + // the for loop below "returns" from this "call". + // + // The caller argument names the return point. Each value of caller must + // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the + // definition of enum ReturnAddress. The switch implements the control + // transfer from the end of a "called" subroutine back to the statement + // after the "call". + // + // Note that not all the grammar productions have to be packed into the + // switch, but only those which appear in a cycle in the grammar. Anything + // acyclic can be written as ordinary functions and function calls, e.g., + // ParseIdentifier. +#define ABSL_DEMANGLER_RECURSE(callee, caller) \ + do { \ + if (depth_ == data_stack_pointer_) return false; \ + /* The next continue will switch on this saved value ... */ \ + stack_[depth_++] = caller; \ + goto callee; \ + /* ... and will land here, resuming the suspended code. */ \ + case caller: {} \ + } while (0) + + // Parse the encoding, counting completed recursive calls to guard against + // excessively complex input and infinite-loop bugs. + int iter = 0; + goto whole_encoding; + for (; iter < kMaxReturns && depth_ > 0; ++iter) { + // This switch resumes the code path most recently suspended by + // ABSL_DEMANGLER_RECURSE. + switch (static_cast<ReturnAddress>(stack_[--depth_])) { + // + // symbol-name -> + // _R decimal-number? path instantiating-crate? vendor-specific-suffix? + whole_encoding: + if (!Eat('_') || !Eat('R')) return false; + // decimal-number? is always empty today, so proceed to path, which + // can't start with a decimal digit. + ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate); + if (IsAlpha(Peek())) { + ++silence_depth_; // Print nothing more from here on. + ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix); + } + switch (Take()) { + case '.': case '$': case '\0': return true; + } + return false; // unexpected trailing content + + // path -> crate-root | inherent-impl | trait-impl | trait-definition | + // nested-path | generic-args | backref + path: + switch (Take()) { + case 'C': goto crate_root; + case 'M': return false; // inherent-impl not yet implemented + case 'X': return false; // trait-impl not yet implemented + case 'Y': return false; // trait-definition not yet implemented + case 'N': goto nested_path; + case 'I': return false; // generic-args not yet implemented + case 'B': return false; // backref not yet implemented + default: return false; + } + + // crate-root -> C identifier (C consumed above) + crate_root: + if (!ParseIdentifier()) return false; + continue; + + // nested-path -> N namespace path identifier (N consumed above) + // namespace -> lower | upper + nested_path: + // Uppercase namespaces must be saved on the stack so we can print + // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed. + if (IsUpper(Peek())) { + if (!PushByte(static_cast<std::uint8_t>(Take()))) return false; + ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace); + if (!Emit("::")) return false; + if (!ParseIdentifier(static_cast<char>(PopByte()))) return false; + continue; + } + + // Lowercase namespaces, however, are never represented in the output; + // they all emit just ::name. + if (IsLower(Take())) { + ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace); + if (!Emit("::")) return false; + if (!ParseIdentifier()) return false; + continue; + } + + // Neither upper or lower + return false; + } + } + + return false; // hit iteration limit or a bug in our stack handling + } + + private: + // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls. + enum ReturnAddress : std::uint8_t { + kInstantiatingCrate, + kVendorSpecificSuffix, + kIdentifierInUppercaseNamespace, + kIdentifierInLowercaseNamespace, + }; + + // Element count for the stack_ array. A larger kStackSize accommodates more + // deeply nested names at the cost of a larger footprint on the C++ call + // stack. + enum { kStackSize = 256 }; + + // Returns the next input character without consuming it. + char Peek() const { return encoding_[pos_]; } + + // Consumes and returns the next input character. + char Take() { return encoding_[pos_++]; } + + // If the next input character is the given character, consumes it and returns + // true; otherwise returns false without consuming a character. + ABSL_MUST_USE_RESULT bool Eat(char want) { + if (encoding_[pos_] != want) return false; + ++pos_; + return true; + } + + // Provided there is enough remaining output space, appends c to the output, + // writing a fresh NUL terminator afterward, and returns true. Returns false + // if the output buffer had less than two bytes free. + ABSL_MUST_USE_RESULT bool EmitChar(char c) { + if (silence_depth_ > 0) return true; + if (out_end_ - out_ < 2) return false; + *out_++ = c; + *out_ = '\0'; + return true; + } + + // Provided there is enough remaining output space, appends the C string token + // to the output, followed by a NUL character, and returns true. Returns + // false if not everything fit into the output buffer. + ABSL_MUST_USE_RESULT bool Emit(const char* token) { + if (silence_depth_ > 0) return true; + const std::size_t token_length = std::strlen(token); + const std::size_t bytes_to_copy = token_length + 1; // token and final NUL + if (static_cast<std::size_t>(out_end_ - out_) < bytes_to_copy) return false; + std::memcpy(out_, token, bytes_to_copy); + out_ += token_length; + return true; + } + + // Provided there is enough remaining output space, appends the decimal form + // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the + // output, followed by a NUL character, and returns true. Returns false if + // not everything fit into the output buffer. + ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) { + if (disambiguator < 0) return EmitChar('?'); // parsed but too large + if (disambiguator == 0) return EmitChar('0'); + // Convert disambiguator to decimal text. Three digits per byte is enough + // because 999 > 256. The bound will remain correct even if future + // maintenance changes the type of the disambiguator variable. + char digits[3 * sizeof(disambiguator)] = {}; + std::size_t leading_digit_index = sizeof(digits) - 1; + for (; disambiguator > 0; disambiguator /= 10) { + digits[--leading_digit_index] = + static_cast<char>('0' + disambiguator % 10); + } + return Emit(digits + leading_digit_index); + } + + // Consumes an optional disambiguator (s123_) from the input. + // + // On success returns true and fills value with the encoded value if it was + // not too big, otherwise with -1. If the optional disambiguator was omitted, + // value is 0. On parse failure returns false and sets value to -1. + ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) { + value = -1; + + // disambiguator = s base-62-number + // + // Disambiguators are optional. An omitted disambiguator is zero. + if (!Eat('s')) { + value = 0; + return true; + } + int base_62_value = 0; + if (!ParseBase62Number(base_62_value)) return false; + value = base_62_value < 0 ? -1 : base_62_value + 1; + return true; + } + + // Consumes a base-62 number like _ or 123_ from the input. + // + // On success returns true and fills value with the encoded value if it was + // not too big, otherwise with -1. On parse failure returns false and sets + // value to -1. + ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) { + value = -1; + + // base-62-number = (digit | lower | upper)* _ + // + // An empty base-62 digit sequence means 0. + if (Eat('_')) { + value = 0; + return true; + } + + // A nonempty digit sequence denotes its base-62 value plus 1. + int encoded_number = 0; + bool overflowed = false; + while (IsAlpha(Peek()) || IsDigit(Peek())) { + const char c = Take(); + if (encoded_number >= std::numeric_limits<int>::max()/62) { + // If we are close to overflowing an int, keep parsing but stop updating + // encoded_number and remember to return -1 at the end. The point is to + // avoid undefined behavior while parsing crate-root disambiguators, + // which are large in practice but not shown in demangling, while + // successfully computing closure and shim disambiguators, which are + // typically small and are printed out. + overflowed = true; + } else { + int digit; + if (IsDigit(c)) { + digit = c - '0'; + } else if (IsLower(c)) { + digit = c - 'a' + 10; + } else { + digit = c - 'A' + 36; + } + encoded_number = 62 * encoded_number + digit; + } + } + + if (!Eat('_')) return false; + if (!overflowed) value = encoded_number + 1; + return true; + } + + // Consumes an identifier from the input, returning true on success. + // + // A nonzero uppercase_namespace specifies the character after the N in a + // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to + // write out the name with the conventional decoration for that namespace. + ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') { + // identifier -> disambiguator? undisambiguated-identifier + int disambiguator = 0; + if (!ParseDisambiguator(disambiguator)) return false; + + // undisambiguated-identifier -> u? decimal-number _? bytes + const bool is_punycoded = Eat('u'); + if (!IsDigit(Peek())) return false; + int num_bytes = 0; + if (!ParseDecimalNumber(num_bytes)) return false; + (void)Eat('_'); // optional separator, needed if a digit follows + + // Emit the beginnings of braced forms like {shim:vtable#0}. + if (uppercase_namespace == '\0') { + if (is_punycoded && !Emit("{Punycode ")) return false; + } else { + switch (uppercase_namespace) { + case 'C': + if (!Emit("{closure")) return false; + break; + case 'S': + if (!Emit("{shim")) return false; + break; + default: + if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false; + break; + } + if (num_bytes > 0 && !Emit(":")) return false; + } + + // Emit the name itself. + for (int i = 0; i < num_bytes; ++i) { + const char c = Take(); + if (!IsIdentifierChar(c) && + // The spec gives toolchains the choice of Punycode or raw UTF-8 for + // identifiers containing code points above 0x7f, so accept bytes with + // the high bit set if this is not a u... encoding. + (is_punycoded || (c & 0x80) == 0)) { + return false; + } + if (!EmitChar(c)) return false; + } + + // Emit the endings of braced forms: "#42}" or "}". + if (uppercase_namespace != '\0') { + if (!EmitChar('#')) return false; + if (!EmitDisambiguator(disambiguator)) return false; + } + if (uppercase_namespace != '\0' || is_punycoded) { + if (!EmitChar('}')) return false; + } + + return true; + } + + // Consumes a decimal number like 0 or 123 from the input. On success returns + // true and fills value with the encoded value. If the encoded value is too + // large or otherwise unparsable, returns false and sets value to -1. + ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) { + value = -1; + if (!IsDigit(Peek())) return false; + int encoded_number = Take() - '0'; + if (encoded_number == 0) { + // Decimal numbers are never encoded with extra leading zeroes. + value = 0; + return true; + } + while (IsDigit(Peek()) && + // avoid overflow + encoded_number < std::numeric_limits<int>::max()/10) { + encoded_number = 10 * encoded_number + (Take() - '0'); + } + if (IsDigit(Peek())) return false; // too big + value = encoded_number; + return true; + } + + // Pushes byte onto the data stack (the right side of stack_) and returns + // true if stack_ is not full, else returns false. + ABSL_MUST_USE_RESULT bool PushByte(std::uint8_t byte) { + if (depth_ == data_stack_pointer_) return false; + stack_[--data_stack_pointer_] = byte; + return true; + } + + // Pops the last pushed data byte from stack_. Requires that the data stack + // is not empty (data_stack_pointer_ < kStackSize). + std::uint8_t PopByte() { return stack_[data_stack_pointer_++]; } + + // Call and data stacks reside in stack_. The leftmost depth_ elements + // contain ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE. The elements + // from index data_stack_pointer_ to the right edge of stack_ contain bytes + // pushed by PushByte. + std::uint8_t stack_[kStackSize] = {}; + int data_stack_pointer_ = kStackSize; + int depth_ = 0; + + // Anything parsed while silence_depth_ > 0 contributes nothing to the + // demangled output. For constructs omitted from the demangling, such as + // impl-path and the contents of generic-args, we will increment + // silence_depth_ on the way in and decrement silence_depth_ on the way out. + int silence_depth_ = 0; + + // Input: encoding_ points just after the _R in a Rust mangled symbol, and + // encoding_[pos_] is the next input character to be scanned. + int pos_ = 0; + const char* encoding_ = nullptr; + + // Output: *out_ is where the next output character should be written, and + // out_end_ points past the last byte of available space. + char* out_ = nullptr; + char* out_end_ = nullptr; +}; + +} // namespace + +bool DemangleRustSymbolEncoding(const char* mangled, char* out, + std::size_t out_size) { + return RustSymbolParser(mangled, out, out + out_size).Parse(); +} + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/demangle_rust.h b/absl/debugging/internal/demangle_rust.h new file mode 100644 index 00000000..8e9060b4 --- /dev/null +++ b/absl/debugging/internal/demangle_rust.h @@ -0,0 +1,47 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_ +#define ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_ + +#include <cstddef> + +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { + +// Demangle the Rust encoding `mangled`. On success, return true and write the +// demangled symbol name to `out`. Otherwise, return false, leaving unspecified +// contents in `out`. For example, calling DemangleRustSymbolEncoding with +// `mangled = "_RNvC8my_crate7my_func"` will yield `my_crate::my_func` in `out`, +// provided `out_size` is large enough for that value and its trailing NUL. +// +// DemangleRustSymbolEncoding is async-signal-safe and runs in bounded C++ +// call-stack space. It is suitable for symbolizing stack traces in a signal +// handler. +// +// The demangling logic is under development. In this version of Abseil, +// DemangleRustSymbolEncoding parses a few simple kinds of symbol names, but +// nothing having backreferences in the input or angle brackets in the +// demangling, and it emits raw Punycode instead of the UTF-8 represented by it. +bool DemangleRustSymbolEncoding(const char* mangled, char* out, + std::size_t out_size); + +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl + +#endif // ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_ diff --git a/absl/debugging/internal/demangle_rust_test.cc b/absl/debugging/internal/demangle_rust_test.cc new file mode 100644 index 00000000..2841576e --- /dev/null +++ b/absl/debugging/internal/demangle_rust_test.cc @@ -0,0 +1,216 @@ +// Copyright 2024 The Abseil Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "absl/debugging/internal/demangle_rust.h" + +#include <cstddef> +#include <string> + +#include "gtest/gtest.h" +#include "absl/base/config.h" + +namespace absl { +ABSL_NAMESPACE_BEGIN +namespace debugging_internal { +namespace { + +// If DemangleRustSymbolEncoding(mangled, <buffer with room for buffer_size +// chars>, buffer_size) returns true and seems not to have overrun its output +// buffer, returns the string written by DemangleRustSymbolEncoding; otherwise +// returns an error message. +std::string ResultOfDemangling(const char* mangled, std::size_t buffer_size) { + // Fill the buffer with something other than NUL so we test whether Demangle + // appends trailing NUL as expected. + std::string buffer(buffer_size + 1, '~'); + constexpr char kCanaryCharacter = 0x7f; // arbitrary unlikely value + buffer[buffer_size] = kCanaryCharacter; + if (!DemangleRustSymbolEncoding(mangled, &buffer[0], buffer_size)) { + return "Failed parse"; + } + if (buffer[buffer_size] != kCanaryCharacter) { + return "Buffer overrun by output: " + buffer.substr(0, buffer_size + 1) + + "..."; + } + return buffer.data(); // Not buffer itself: this trims trailing padding. +} + +// Tests that DemangleRustSymbolEncoding converts mangled into plaintext given +// enough output buffer space but returns false and avoids overrunning a buffer +// that is one byte too short. +// +// The lambda wrapping allows ASSERT_EQ to branch out the first time an +// expectation is not satisfied, preventing redundant errors for the same bug. +// +// We test first with excess space so that if the algorithm just computes the +// wrong answer, it will be clear from the error log that the bounds checks are +// unlikely to be the code at fault. +#define EXPECT_DEMANGLING(mangled, plaintext) \ + do { \ + [] { \ + constexpr std::size_t plenty_of_space = sizeof(plaintext) + 128; \ + constexpr std::size_t just_enough_space = sizeof(plaintext); \ + constexpr std::size_t one_byte_too_few = sizeof(plaintext) - 1; \ + const char* expected_plaintext = plaintext; \ + const char* expected_error = "Failed parse"; \ + ASSERT_EQ(ResultOfDemangling(mangled, plenty_of_space), \ + expected_plaintext); \ + ASSERT_EQ(ResultOfDemangling(mangled, just_enough_space), \ + expected_plaintext); \ + ASSERT_EQ(ResultOfDemangling(mangled, one_byte_too_few), \ + expected_error); \ + }(); \ + } while (0) + +// Tests that DemangleRustSymbolEncoding rejects the given input (typically, a +// truncation of a real Rust symbol name). +#define EXPECT_DEMANGLING_FAILS(mangled) \ + do { \ + constexpr std::size_t plenty_of_space = 1024; \ + const char* expected_error = "Failed parse"; \ + EXPECT_EQ(ResultOfDemangling(mangled, plenty_of_space), expected_error); \ + } while (0) + +// Piping grep -C 1 _R demangle_test.cc into your favorite c++filt +// implementation allows you to verify that the goldens below are reasonable. + +TEST(DemangleRust, EmptyDemangling) { + EXPECT_TRUE(DemangleRustSymbolEncoding("_RC0", nullptr, 0)); +} + +TEST(DemangleRust, FunctionAtCrateLevel) { + EXPECT_DEMANGLING("_RNvC10crate_name9func_name", "crate_name::func_name"); + EXPECT_DEMANGLING( + "_RNvCs09azAZ_10crate_name9func_name", "crate_name::func_name"); +} + +TEST(DemangleRust, TruncationsOfFunctionAtCrateLevel) { + EXPECT_DEMANGLING_FAILS("_R"); + EXPECT_DEMANGLING_FAILS("_RN"); + EXPECT_DEMANGLING_FAILS("_RNvC"); + EXPECT_DEMANGLING_FAILS("_RNvC10"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_nam"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_name"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9"); + EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9func_nam"); + EXPECT_DEMANGLING_FAILS("_RNvCs"); + EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ"); + EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ_"); +} + +TEST(DemangleRust, VendorSuffixes) { + EXPECT_DEMANGLING("_RNvC10crate_name9func_name.!@#", "crate_name::func_name"); + EXPECT_DEMANGLING("_RNvC10crate_name9func_name$!@#", "crate_name::func_name"); +} + +TEST(DemangleRust, UnicodeIdentifiers) { + EXPECT_DEMANGLING("_RNvC7ice_cap17Eyjafjallajökull", + "ice_cap::Eyjafjallajökull"); + EXPECT_DEMANGLING("_RNvC7ice_caps_u19Eyjafjallajkull_jtb", + "ice_cap::{Punycode Eyjafjallajkull_jtb}"); +} + +TEST(DemangleRust, FunctionInModule) { + EXPECT_DEMANGLING("_RNvNtCs09azAZ_10crate_name11module_name9func_name", + "crate_name::module_name::func_name"); +} + +TEST(DemangleRust, FunctionInFunction) { + EXPECT_DEMANGLING( + "_RNvNvCs09azAZ_10crate_name15outer_func_name15inner_func_name", + "crate_name::outer_func_name::inner_func_name"); +} + +TEST(DemangleRust, ClosureInFunction) { + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_name0", + "crate_name::func_name::{closure#0}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_name0Cs123_12client_crate", + "crate_name::func_name::{closure#0}"); +} + +TEST(DemangleRust, ClosureNumbering) { + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names_0Cs123_12client_crate", + "crate_name::func_name::{closure#1}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names0_0Cs123_12client_crate", + "crate_name::func_name::{closure#2}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names9_0Cs123_12client_crate", + "crate_name::func_name::{closure#11}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesa_0Cs123_12client_crate", + "crate_name::func_name::{closure#12}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesz_0Cs123_12client_crate", + "crate_name::func_name::{closure#37}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesA_0Cs123_12client_crate", + "crate_name::func_name::{closure#38}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesZ_0Cs123_12client_crate", + "crate_name::func_name::{closure#63}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names10_0Cs123_12client_crate", + "crate_name::func_name::{closure#64}"); + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_namesg6_0Cs123_12client_crate", + "crate_name::func_name::{closure#1000}"); +} + +TEST(DemangleRust, ClosureNumberOverflowingInt) { + EXPECT_DEMANGLING( + "_RNCNvCs09azAZ_10crate_name9func_names1234567_0Cs123_12client_crate", + "crate_name::func_name::{closure#?}"); +} + +TEST(DemangleRust, UnexpectedlyNamedClosure) { + EXPECT_DEMANGLING( + "_RNCNvCs123_10crate_name9func_name12closure_nameCs456_12client_crate", + "crate_name::func_name::{closure:closure_name#0}"); + EXPECT_DEMANGLING( + "_RNCNvCs123_10crate_name9func_names2_12closure_nameCs456_12client_crate", + "crate_name::func_name::{closure:closure_name#4}"); +} + +TEST(DemangleRust, ItemNestedInsideClosure) { + EXPECT_DEMANGLING( + "_RNvNCNvCs123_10crate_name9func_name015inner_func_nameCs_12client_crate", + "crate_name::func_name::{closure#0}::inner_func_name"); +} + +TEST(DemangleRust, Shim) { + EXPECT_DEMANGLING( + "_RNSNvCs123_10crate_name9func_name6vtableCs456_12client_crate", + "crate_name::func_name::{shim:vtable#0}"); +} + +TEST(DemangleRust, UnknownUppercaseNamespace) { + EXPECT_DEMANGLING( + "_RNXNvCs123_10crate_name9func_name14mystery_objectCs456_12client_crate", + "crate_name::func_name::{X:mystery_object#0}"); +} + +TEST(DemangleRust, NestedUppercaseNamespaces) { + EXPECT_DEMANGLING( + "_RNCNXNYCs123_10crate_names0_1ys1_1xs2_0Cs456_12client_crate", + "crate_name::{Y:y#2}::{X:x#3}::{closure#4}"); +} + + +} // namespace +} // namespace debugging_internal +ABSL_NAMESPACE_END +} // namespace absl diff --git a/absl/debugging/internal/demangle_test.cc b/absl/debugging/internal/demangle_test.cc index 2771d4d0..b6318302 100644 --- a/absl/debugging/internal/demangle_test.cc +++ b/absl/debugging/internal/demangle_test.cc @@ -300,6 +300,15 @@ TEST(Demangle, AbiTags) { EXPECT_STREQ("C[abi:bar][abi:foo]()", tmp); } +// Test one Rust symbol to exercise Demangle's delegation path. Rust demangling +// itself is more thoroughly tested in demangle_rust_test.cc. +TEST(Demangle, DelegatesToDemangleRustSymbolEncoding) { + char tmp[80]; + + EXPECT_TRUE(Demangle("_RNvC8my_crate7my_func", tmp, sizeof(tmp))); + EXPECT_STREQ("my_crate::my_func", tmp); +} + // Tests that verify that Demangle footprint is within some limit. // They are not to be run under sanitizers as the sanitizers increase // stack consumption by about 4x. |