summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Chris Mihelich <cmihelic@google.com>2024-05-06 05:56:26 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2024-05-06 05:57:30 -0700
commita28ee5b51c9ea41707d9a5d2d358ad77850264c4 (patch)
tree98b00df448483279f43bd48d015067e8dbdbcb21
parentc1e1b47d989978cde8c5a2a219df425b785a0c47 (diff)
Recognize some simple Rust mangled names in Demangle.
PiperOrigin-RevId: 631018414 Change-Id: Ice8efa0af4cb1f72b5d62fbbea4cb12cbead8634
-rw-r--r--CMake/AbseilDll.cmake2
-rw-r--r--absl/debugging/BUILD.bazel24
-rw-r--r--absl/debugging/CMakeLists.txt15
-rw-r--r--absl/debugging/internal/demangle.cc6
-rw-r--r--absl/debugging/internal/demangle.h2
-rw-r--r--absl/debugging/internal/demangle_rust.cc432
-rw-r--r--absl/debugging/internal/demangle_rust.h47
-rw-r--r--absl/debugging/internal/demangle_rust_test.cc216
-rw-r--r--absl/debugging/internal/demangle_test.cc9
9 files changed, 751 insertions, 2 deletions
diff --git a/CMake/AbseilDll.cmake b/CMake/AbseilDll.cmake
index 6d871552..2a40532a 100644
--- a/CMake/AbseilDll.cmake
+++ b/CMake/AbseilDll.cmake
@@ -124,6 +124,8 @@ set(ABSL_INTERNAL_DLL_FILES
"debugging/internal/address_is_readable.h"
"debugging/internal/demangle.cc"
"debugging/internal/demangle.h"
+ "debugging/internal/demangle_rust.cc"
+ "debugging/internal/demangle_rust.h"
"debugging/internal/elf_mem_image.cc"
"debugging/internal/elf_mem_image.h"
"debugging/internal/examine_stack.cc"
diff --git a/absl/debugging/BUILD.bazel b/absl/debugging/BUILD.bazel
index 5baff7a1..22494554 100644
--- a/absl/debugging/BUILD.bazel
+++ b/absl/debugging/BUILD.bazel
@@ -219,8 +219,14 @@ cc_library(
cc_library(
name = "demangle_internal",
- srcs = ["internal/demangle.cc"],
- hdrs = ["internal/demangle.h"],
+ srcs = [
+ "internal/demangle.cc",
+ "internal/demangle_rust.cc",
+ ],
+ hdrs = [
+ "internal/demangle.h",
+ "internal/demangle_rust.h",
+ ],
copts = ABSL_DEFAULT_COPTS,
linkopts = ABSL_DEFAULT_LINKOPTS,
visibility = [
@@ -235,6 +241,20 @@ cc_library(
)
cc_test(
+ name = "demangle_rust_test",
+ srcs = ["internal/demangle_rust_test.cc"],
+ copts = ABSL_TEST_COPTS,
+ linkopts = ABSL_DEFAULT_LINKOPTS,
+ deps = [
+ ":demangle_internal",
+ "//absl/base:config",
+ "//absl/base:core_headers",
+ "@com_google_googletest//:gtest",
+ "@com_google_googletest//:gtest_main",
+ ],
+)
+
+cc_test(
name = "demangle_test",
srcs = ["internal/demangle_test.cc"],
copts = ABSL_TEST_COPTS,
diff --git a/absl/debugging/CMakeLists.txt b/absl/debugging/CMakeLists.txt
index 65e2af88..027a6be2 100644
--- a/absl/debugging/CMakeLists.txt
+++ b/absl/debugging/CMakeLists.txt
@@ -196,8 +196,10 @@ absl_cc_library(
demangle_internal
HDRS
"internal/demangle.h"
+ "internal/demangle_rust.h"
SRCS
"internal/demangle.cc"
+ "internal/demangle_rust.cc"
COPTS
${ABSL_DEFAULT_COPTS}
DEPS
@@ -208,6 +210,19 @@ absl_cc_library(
absl_cc_test(
NAME
+ demangle_rust_test
+ SRCS
+ "internal/demangle_rust_test.cc"
+ COPTS
+ ${ABSL_TEST_COPTS}
+ DEPS
+ absl::demangle_internal
+ absl::config
+ GTest::gmock_main
+)
+
+absl_cc_test(
+ NAME
demangle_test
SRCS
"internal/demangle_test.cc"
diff --git a/absl/debugging/internal/demangle.cc b/absl/debugging/internal/demangle.cc
index 82420c8b..71d4eb0a 100644
--- a/absl/debugging/internal/demangle.cc
+++ b/absl/debugging/internal/demangle.cc
@@ -19,6 +19,7 @@
#include "absl/debugging/internal/demangle.h"
+#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
@@ -26,6 +27,7 @@
#include <string>
#include "absl/base/config.h"
+#include "absl/debugging/internal/demangle_rust.h"
#if ABSL_INTERNAL_HAS_CXA_DEMANGLE
#include <cxxabi.h>
@@ -2110,6 +2112,10 @@ static bool Overflowed(const State *state) {
// The demangler entry point.
bool Demangle(const char* mangled, char* out, size_t out_size) {
+ if (mangled[0] == '_' && mangled[1] == 'R') {
+ return DemangleRustSymbolEncoding(mangled, out, out_size);
+ }
+
State state;
InitState(&state, mangled, out, out_size);
return ParseTopLevelMangledName(&state) && !Overflowed(&state) &&
diff --git a/absl/debugging/internal/demangle.h b/absl/debugging/internal/demangle.h
index 146d1150..e75d1473 100644
--- a/absl/debugging/internal/demangle.h
+++ b/absl/debugging/internal/demangle.h
@@ -56,6 +56,8 @@ namespace debugging_internal {
//
// See the unit test for more examples.
//
+// Support for Rust mangled names is in development; see demangle_rust.h.
+//
// Note: we might want to write demanglers for ABIs other than Itanium
// C++ ABI in the future.
bool Demangle(const char* mangled, char* out, size_t out_size);
diff --git a/absl/debugging/internal/demangle_rust.cc b/absl/debugging/internal/demangle_rust.cc
new file mode 100644
index 00000000..7086cab2
--- /dev/null
+++ b/absl/debugging/internal/demangle_rust.cc
@@ -0,0 +1,432 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/demangle_rust.h"
+
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
+#include <limits>
+
+#include "absl/base/attributes.h"
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+namespace {
+
+// Same step limit as the C++ demangler in demangle.cc uses.
+constexpr int kMaxReturns = 1 << 17;
+
+bool IsDigit(char c) { return '0' <= c && c <= '9'; }
+bool IsLower(char c) { return 'a' <= c && c <= 'z'; }
+bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; }
+bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); }
+bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; }
+
+// Parser for Rust symbol mangling v0, whose grammar is defined here:
+//
+// https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary
+class RustSymbolParser {
+ public:
+ // Prepares to demangle the given encoding, a Rust symbol name starting with
+ // _R, into the output buffer [out, out_end). The caller is expected to
+ // continue by calling the new object's Parse function.
+ RustSymbolParser(const char* encoding, char* out, char* const out_end)
+ : encoding_(encoding), out_(out), out_end_(out_end) {
+ if (out_ != out_end_) *out_ = '\0';
+ }
+
+ // Parses the constructor's encoding argument, writing output into the range
+ // [out, out_end). Returns true on success and false for input whose
+ // structure was not recognized or exceeded implementation limits, such as by
+ // nesting structures too deep. In either case *this should not be used
+ // again.
+ ABSL_MUST_USE_RESULT bool Parse() && {
+ // Recursively parses the grammar production named by callee, then resumes
+ // execution at the next statement.
+ //
+ // Recursive-descent parsing is a beautifully readable translation of a
+ // grammar, but it risks stack overflow if implemented by naive recursion on
+ // the C++ call stack. So we simulate recursion by goto and switch instead,
+ // keeping a bounded stack of "return addresses" in the stack_ member.
+ //
+ // The callee argument is a statement label. We goto that label after
+ // saving the "return address" on stack_. The next continue statement in
+ // the for loop below "returns" from this "call".
+ //
+ // The caller argument names the return point. Each value of caller must
+ // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the
+ // definition of enum ReturnAddress. The switch implements the control
+ // transfer from the end of a "called" subroutine back to the statement
+ // after the "call".
+ //
+ // Note that not all the grammar productions have to be packed into the
+ // switch, but only those which appear in a cycle in the grammar. Anything
+ // acyclic can be written as ordinary functions and function calls, e.g.,
+ // ParseIdentifier.
+#define ABSL_DEMANGLER_RECURSE(callee, caller) \
+ do { \
+ if (depth_ == data_stack_pointer_) return false; \
+ /* The next continue will switch on this saved value ... */ \
+ stack_[depth_++] = caller; \
+ goto callee; \
+ /* ... and will land here, resuming the suspended code. */ \
+ case caller: {} \
+ } while (0)
+
+ // Parse the encoding, counting completed recursive calls to guard against
+ // excessively complex input and infinite-loop bugs.
+ int iter = 0;
+ goto whole_encoding;
+ for (; iter < kMaxReturns && depth_ > 0; ++iter) {
+ // This switch resumes the code path most recently suspended by
+ // ABSL_DEMANGLER_RECURSE.
+ switch (static_cast<ReturnAddress>(stack_[--depth_])) {
+ //
+ // symbol-name ->
+ // _R decimal-number? path instantiating-crate? vendor-specific-suffix?
+ whole_encoding:
+ if (!Eat('_') || !Eat('R')) return false;
+ // decimal-number? is always empty today, so proceed to path, which
+ // can't start with a decimal digit.
+ ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate);
+ if (IsAlpha(Peek())) {
+ ++silence_depth_; // Print nothing more from here on.
+ ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix);
+ }
+ switch (Take()) {
+ case '.': case '$': case '\0': return true;
+ }
+ return false; // unexpected trailing content
+
+ // path -> crate-root | inherent-impl | trait-impl | trait-definition |
+ // nested-path | generic-args | backref
+ path:
+ switch (Take()) {
+ case 'C': goto crate_root;
+ case 'M': return false; // inherent-impl not yet implemented
+ case 'X': return false; // trait-impl not yet implemented
+ case 'Y': return false; // trait-definition not yet implemented
+ case 'N': goto nested_path;
+ case 'I': return false; // generic-args not yet implemented
+ case 'B': return false; // backref not yet implemented
+ default: return false;
+ }
+
+ // crate-root -> C identifier (C consumed above)
+ crate_root:
+ if (!ParseIdentifier()) return false;
+ continue;
+
+ // nested-path -> N namespace path identifier (N consumed above)
+ // namespace -> lower | upper
+ nested_path:
+ // Uppercase namespaces must be saved on the stack so we can print
+ // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed.
+ if (IsUpper(Peek())) {
+ if (!PushByte(static_cast<std::uint8_t>(Take()))) return false;
+ ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace);
+ if (!Emit("::")) return false;
+ if (!ParseIdentifier(static_cast<char>(PopByte()))) return false;
+ continue;
+ }
+
+ // Lowercase namespaces, however, are never represented in the output;
+ // they all emit just ::name.
+ if (IsLower(Take())) {
+ ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace);
+ if (!Emit("::")) return false;
+ if (!ParseIdentifier()) return false;
+ continue;
+ }
+
+ // Neither upper or lower
+ return false;
+ }
+ }
+
+ return false; // hit iteration limit or a bug in our stack handling
+ }
+
+ private:
+ // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls.
+ enum ReturnAddress : std::uint8_t {
+ kInstantiatingCrate,
+ kVendorSpecificSuffix,
+ kIdentifierInUppercaseNamespace,
+ kIdentifierInLowercaseNamespace,
+ };
+
+ // Element count for the stack_ array. A larger kStackSize accommodates more
+ // deeply nested names at the cost of a larger footprint on the C++ call
+ // stack.
+ enum { kStackSize = 256 };
+
+ // Returns the next input character without consuming it.
+ char Peek() const { return encoding_[pos_]; }
+
+ // Consumes and returns the next input character.
+ char Take() { return encoding_[pos_++]; }
+
+ // If the next input character is the given character, consumes it and returns
+ // true; otherwise returns false without consuming a character.
+ ABSL_MUST_USE_RESULT bool Eat(char want) {
+ if (encoding_[pos_] != want) return false;
+ ++pos_;
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends c to the output,
+ // writing a fresh NUL terminator afterward, and returns true. Returns false
+ // if the output buffer had less than two bytes free.
+ ABSL_MUST_USE_RESULT bool EmitChar(char c) {
+ if (silence_depth_ > 0) return true;
+ if (out_end_ - out_ < 2) return false;
+ *out_++ = c;
+ *out_ = '\0';
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends the C string token
+ // to the output, followed by a NUL character, and returns true. Returns
+ // false if not everything fit into the output buffer.
+ ABSL_MUST_USE_RESULT bool Emit(const char* token) {
+ if (silence_depth_ > 0) return true;
+ const std::size_t token_length = std::strlen(token);
+ const std::size_t bytes_to_copy = token_length + 1; // token and final NUL
+ if (static_cast<std::size_t>(out_end_ - out_) < bytes_to_copy) return false;
+ std::memcpy(out_, token, bytes_to_copy);
+ out_ += token_length;
+ return true;
+ }
+
+ // Provided there is enough remaining output space, appends the decimal form
+ // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the
+ // output, followed by a NUL character, and returns true. Returns false if
+ // not everything fit into the output buffer.
+ ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) {
+ if (disambiguator < 0) return EmitChar('?'); // parsed but too large
+ if (disambiguator == 0) return EmitChar('0');
+ // Convert disambiguator to decimal text. Three digits per byte is enough
+ // because 999 > 256. The bound will remain correct even if future
+ // maintenance changes the type of the disambiguator variable.
+ char digits[3 * sizeof(disambiguator)] = {};
+ std::size_t leading_digit_index = sizeof(digits) - 1;
+ for (; disambiguator > 0; disambiguator /= 10) {
+ digits[--leading_digit_index] =
+ static_cast<char>('0' + disambiguator % 10);
+ }
+ return Emit(digits + leading_digit_index);
+ }
+
+ // Consumes an optional disambiguator (s123_) from the input.
+ //
+ // On success returns true and fills value with the encoded value if it was
+ // not too big, otherwise with -1. If the optional disambiguator was omitted,
+ // value is 0. On parse failure returns false and sets value to -1.
+ ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) {
+ value = -1;
+
+ // disambiguator = s base-62-number
+ //
+ // Disambiguators are optional. An omitted disambiguator is zero.
+ if (!Eat('s')) {
+ value = 0;
+ return true;
+ }
+ int base_62_value = 0;
+ if (!ParseBase62Number(base_62_value)) return false;
+ value = base_62_value < 0 ? -1 : base_62_value + 1;
+ return true;
+ }
+
+ // Consumes a base-62 number like _ or 123_ from the input.
+ //
+ // On success returns true and fills value with the encoded value if it was
+ // not too big, otherwise with -1. On parse failure returns false and sets
+ // value to -1.
+ ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) {
+ value = -1;
+
+ // base-62-number = (digit | lower | upper)* _
+ //
+ // An empty base-62 digit sequence means 0.
+ if (Eat('_')) {
+ value = 0;
+ return true;
+ }
+
+ // A nonempty digit sequence denotes its base-62 value plus 1.
+ int encoded_number = 0;
+ bool overflowed = false;
+ while (IsAlpha(Peek()) || IsDigit(Peek())) {
+ const char c = Take();
+ if (encoded_number >= std::numeric_limits<int>::max()/62) {
+ // If we are close to overflowing an int, keep parsing but stop updating
+ // encoded_number and remember to return -1 at the end. The point is to
+ // avoid undefined behavior while parsing crate-root disambiguators,
+ // which are large in practice but not shown in demangling, while
+ // successfully computing closure and shim disambiguators, which are
+ // typically small and are printed out.
+ overflowed = true;
+ } else {
+ int digit;
+ if (IsDigit(c)) {
+ digit = c - '0';
+ } else if (IsLower(c)) {
+ digit = c - 'a' + 10;
+ } else {
+ digit = c - 'A' + 36;
+ }
+ encoded_number = 62 * encoded_number + digit;
+ }
+ }
+
+ if (!Eat('_')) return false;
+ if (!overflowed) value = encoded_number + 1;
+ return true;
+ }
+
+ // Consumes an identifier from the input, returning true on success.
+ //
+ // A nonzero uppercase_namespace specifies the character after the N in a
+ // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to
+ // write out the name with the conventional decoration for that namespace.
+ ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') {
+ // identifier -> disambiguator? undisambiguated-identifier
+ int disambiguator = 0;
+ if (!ParseDisambiguator(disambiguator)) return false;
+
+ // undisambiguated-identifier -> u? decimal-number _? bytes
+ const bool is_punycoded = Eat('u');
+ if (!IsDigit(Peek())) return false;
+ int num_bytes = 0;
+ if (!ParseDecimalNumber(num_bytes)) return false;
+ (void)Eat('_'); // optional separator, needed if a digit follows
+
+ // Emit the beginnings of braced forms like {shim:vtable#0}.
+ if (uppercase_namespace == '\0') {
+ if (is_punycoded && !Emit("{Punycode ")) return false;
+ } else {
+ switch (uppercase_namespace) {
+ case 'C':
+ if (!Emit("{closure")) return false;
+ break;
+ case 'S':
+ if (!Emit("{shim")) return false;
+ break;
+ default:
+ if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false;
+ break;
+ }
+ if (num_bytes > 0 && !Emit(":")) return false;
+ }
+
+ // Emit the name itself.
+ for (int i = 0; i < num_bytes; ++i) {
+ const char c = Take();
+ if (!IsIdentifierChar(c) &&
+ // The spec gives toolchains the choice of Punycode or raw UTF-8 for
+ // identifiers containing code points above 0x7f, so accept bytes with
+ // the high bit set if this is not a u... encoding.
+ (is_punycoded || (c & 0x80) == 0)) {
+ return false;
+ }
+ if (!EmitChar(c)) return false;
+ }
+
+ // Emit the endings of braced forms: "#42}" or "}".
+ if (uppercase_namespace != '\0') {
+ if (!EmitChar('#')) return false;
+ if (!EmitDisambiguator(disambiguator)) return false;
+ }
+ if (uppercase_namespace != '\0' || is_punycoded) {
+ if (!EmitChar('}')) return false;
+ }
+
+ return true;
+ }
+
+ // Consumes a decimal number like 0 or 123 from the input. On success returns
+ // true and fills value with the encoded value. If the encoded value is too
+ // large or otherwise unparsable, returns false and sets value to -1.
+ ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) {
+ value = -1;
+ if (!IsDigit(Peek())) return false;
+ int encoded_number = Take() - '0';
+ if (encoded_number == 0) {
+ // Decimal numbers are never encoded with extra leading zeroes.
+ value = 0;
+ return true;
+ }
+ while (IsDigit(Peek()) &&
+ // avoid overflow
+ encoded_number < std::numeric_limits<int>::max()/10) {
+ encoded_number = 10 * encoded_number + (Take() - '0');
+ }
+ if (IsDigit(Peek())) return false; // too big
+ value = encoded_number;
+ return true;
+ }
+
+ // Pushes byte onto the data stack (the right side of stack_) and returns
+ // true if stack_ is not full, else returns false.
+ ABSL_MUST_USE_RESULT bool PushByte(std::uint8_t byte) {
+ if (depth_ == data_stack_pointer_) return false;
+ stack_[--data_stack_pointer_] = byte;
+ return true;
+ }
+
+ // Pops the last pushed data byte from stack_. Requires that the data stack
+ // is not empty (data_stack_pointer_ < kStackSize).
+ std::uint8_t PopByte() { return stack_[data_stack_pointer_++]; }
+
+ // Call and data stacks reside in stack_. The leftmost depth_ elements
+ // contain ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE. The elements
+ // from index data_stack_pointer_ to the right edge of stack_ contain bytes
+ // pushed by PushByte.
+ std::uint8_t stack_[kStackSize] = {};
+ int data_stack_pointer_ = kStackSize;
+ int depth_ = 0;
+
+ // Anything parsed while silence_depth_ > 0 contributes nothing to the
+ // demangled output. For constructs omitted from the demangling, such as
+ // impl-path and the contents of generic-args, we will increment
+ // silence_depth_ on the way in and decrement silence_depth_ on the way out.
+ int silence_depth_ = 0;
+
+ // Input: encoding_ points just after the _R in a Rust mangled symbol, and
+ // encoding_[pos_] is the next input character to be scanned.
+ int pos_ = 0;
+ const char* encoding_ = nullptr;
+
+ // Output: *out_ is where the next output character should be written, and
+ // out_end_ points past the last byte of available space.
+ char* out_ = nullptr;
+ char* out_end_ = nullptr;
+};
+
+} // namespace
+
+bool DemangleRustSymbolEncoding(const char* mangled, char* out,
+ std::size_t out_size) {
+ return RustSymbolParser(mangled, out, out + out_size).Parse();
+}
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/demangle_rust.h b/absl/debugging/internal/demangle_rust.h
new file mode 100644
index 00000000..8e9060b4
--- /dev/null
+++ b/absl/debugging/internal/demangle_rust.h
@@ -0,0 +1,47 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
+#define ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
+
+#include <cstddef>
+
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+
+// Demangle the Rust encoding `mangled`. On success, return true and write the
+// demangled symbol name to `out`. Otherwise, return false, leaving unspecified
+// contents in `out`. For example, calling DemangleRustSymbolEncoding with
+// `mangled = "_RNvC8my_crate7my_func"` will yield `my_crate::my_func` in `out`,
+// provided `out_size` is large enough for that value and its trailing NUL.
+//
+// DemangleRustSymbolEncoding is async-signal-safe and runs in bounded C++
+// call-stack space. It is suitable for symbolizing stack traces in a signal
+// handler.
+//
+// The demangling logic is under development. In this version of Abseil,
+// DemangleRustSymbolEncoding parses a few simple kinds of symbol names, but
+// nothing having backreferences in the input or angle brackets in the
+// demangling, and it emits raw Punycode instead of the UTF-8 represented by it.
+bool DemangleRustSymbolEncoding(const char* mangled, char* out,
+ std::size_t out_size);
+
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_DEBUGGING_INTERNAL_DEMANGLE_RUST_H_
diff --git a/absl/debugging/internal/demangle_rust_test.cc b/absl/debugging/internal/demangle_rust_test.cc
new file mode 100644
index 00000000..2841576e
--- /dev/null
+++ b/absl/debugging/internal/demangle_rust_test.cc
@@ -0,0 +1,216 @@
+// Copyright 2024 The Abseil Authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/debugging/internal/demangle_rust.h"
+
+#include <cstddef>
+#include <string>
+
+#include "gtest/gtest.h"
+#include "absl/base/config.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace debugging_internal {
+namespace {
+
+// If DemangleRustSymbolEncoding(mangled, <buffer with room for buffer_size
+// chars>, buffer_size) returns true and seems not to have overrun its output
+// buffer, returns the string written by DemangleRustSymbolEncoding; otherwise
+// returns an error message.
+std::string ResultOfDemangling(const char* mangled, std::size_t buffer_size) {
+ // Fill the buffer with something other than NUL so we test whether Demangle
+ // appends trailing NUL as expected.
+ std::string buffer(buffer_size + 1, '~');
+ constexpr char kCanaryCharacter = 0x7f; // arbitrary unlikely value
+ buffer[buffer_size] = kCanaryCharacter;
+ if (!DemangleRustSymbolEncoding(mangled, &buffer[0], buffer_size)) {
+ return "Failed parse";
+ }
+ if (buffer[buffer_size] != kCanaryCharacter) {
+ return "Buffer overrun by output: " + buffer.substr(0, buffer_size + 1)
+ + "...";
+ }
+ return buffer.data(); // Not buffer itself: this trims trailing padding.
+}
+
+// Tests that DemangleRustSymbolEncoding converts mangled into plaintext given
+// enough output buffer space but returns false and avoids overrunning a buffer
+// that is one byte too short.
+//
+// The lambda wrapping allows ASSERT_EQ to branch out the first time an
+// expectation is not satisfied, preventing redundant errors for the same bug.
+//
+// We test first with excess space so that if the algorithm just computes the
+// wrong answer, it will be clear from the error log that the bounds checks are
+// unlikely to be the code at fault.
+#define EXPECT_DEMANGLING(mangled, plaintext) \
+ do { \
+ [] { \
+ constexpr std::size_t plenty_of_space = sizeof(plaintext) + 128; \
+ constexpr std::size_t just_enough_space = sizeof(plaintext); \
+ constexpr std::size_t one_byte_too_few = sizeof(plaintext) - 1; \
+ const char* expected_plaintext = plaintext; \
+ const char* expected_error = "Failed parse"; \
+ ASSERT_EQ(ResultOfDemangling(mangled, plenty_of_space), \
+ expected_plaintext); \
+ ASSERT_EQ(ResultOfDemangling(mangled, just_enough_space), \
+ expected_plaintext); \
+ ASSERT_EQ(ResultOfDemangling(mangled, one_byte_too_few), \
+ expected_error); \
+ }(); \
+ } while (0)
+
+// Tests that DemangleRustSymbolEncoding rejects the given input (typically, a
+// truncation of a real Rust symbol name).
+#define EXPECT_DEMANGLING_FAILS(mangled) \
+ do { \
+ constexpr std::size_t plenty_of_space = 1024; \
+ const char* expected_error = "Failed parse"; \
+ EXPECT_EQ(ResultOfDemangling(mangled, plenty_of_space), expected_error); \
+ } while (0)
+
+// Piping grep -C 1 _R demangle_test.cc into your favorite c++filt
+// implementation allows you to verify that the goldens below are reasonable.
+
+TEST(DemangleRust, EmptyDemangling) {
+ EXPECT_TRUE(DemangleRustSymbolEncoding("_RC0", nullptr, 0));
+}
+
+TEST(DemangleRust, FunctionAtCrateLevel) {
+ EXPECT_DEMANGLING("_RNvC10crate_name9func_name", "crate_name::func_name");
+ EXPECT_DEMANGLING(
+ "_RNvCs09azAZ_10crate_name9func_name", "crate_name::func_name");
+}
+
+TEST(DemangleRust, TruncationsOfFunctionAtCrateLevel) {
+ EXPECT_DEMANGLING_FAILS("_R");
+ EXPECT_DEMANGLING_FAILS("_RN");
+ EXPECT_DEMANGLING_FAILS("_RNvC");
+ EXPECT_DEMANGLING_FAILS("_RNvC10");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_nam");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_name");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9");
+ EXPECT_DEMANGLING_FAILS("_RNvC10crate_name9func_nam");
+ EXPECT_DEMANGLING_FAILS("_RNvCs");
+ EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ");
+ EXPECT_DEMANGLING_FAILS("_RNvCs09azAZ_");
+}
+
+TEST(DemangleRust, VendorSuffixes) {
+ EXPECT_DEMANGLING("_RNvC10crate_name9func_name.!@#", "crate_name::func_name");
+ EXPECT_DEMANGLING("_RNvC10crate_name9func_name$!@#", "crate_name::func_name");
+}
+
+TEST(DemangleRust, UnicodeIdentifiers) {
+ EXPECT_DEMANGLING("_RNvC7ice_cap17Eyjafjallajökull",
+ "ice_cap::Eyjafjallajökull");
+ EXPECT_DEMANGLING("_RNvC7ice_caps_u19Eyjafjallajkull_jtb",
+ "ice_cap::{Punycode Eyjafjallajkull_jtb}");
+}
+
+TEST(DemangleRust, FunctionInModule) {
+ EXPECT_DEMANGLING("_RNvNtCs09azAZ_10crate_name11module_name9func_name",
+ "crate_name::module_name::func_name");
+}
+
+TEST(DemangleRust, FunctionInFunction) {
+ EXPECT_DEMANGLING(
+ "_RNvNvCs09azAZ_10crate_name15outer_func_name15inner_func_name",
+ "crate_name::outer_func_name::inner_func_name");
+}
+
+TEST(DemangleRust, ClosureInFunction) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_name0",
+ "crate_name::func_name::{closure#0}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_name0Cs123_12client_crate",
+ "crate_name::func_name::{closure#0}");
+}
+
+TEST(DemangleRust, ClosureNumbering) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#1}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names0_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#2}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names9_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#11}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesa_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#12}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesz_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#37}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesA_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#38}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesZ_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#63}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names10_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#64}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_namesg6_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#1000}");
+}
+
+TEST(DemangleRust, ClosureNumberOverflowingInt) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs09azAZ_10crate_name9func_names1234567_0Cs123_12client_crate",
+ "crate_name::func_name::{closure#?}");
+}
+
+TEST(DemangleRust, UnexpectedlyNamedClosure) {
+ EXPECT_DEMANGLING(
+ "_RNCNvCs123_10crate_name9func_name12closure_nameCs456_12client_crate",
+ "crate_name::func_name::{closure:closure_name#0}");
+ EXPECT_DEMANGLING(
+ "_RNCNvCs123_10crate_name9func_names2_12closure_nameCs456_12client_crate",
+ "crate_name::func_name::{closure:closure_name#4}");
+}
+
+TEST(DemangleRust, ItemNestedInsideClosure) {
+ EXPECT_DEMANGLING(
+ "_RNvNCNvCs123_10crate_name9func_name015inner_func_nameCs_12client_crate",
+ "crate_name::func_name::{closure#0}::inner_func_name");
+}
+
+TEST(DemangleRust, Shim) {
+ EXPECT_DEMANGLING(
+ "_RNSNvCs123_10crate_name9func_name6vtableCs456_12client_crate",
+ "crate_name::func_name::{shim:vtable#0}");
+}
+
+TEST(DemangleRust, UnknownUppercaseNamespace) {
+ EXPECT_DEMANGLING(
+ "_RNXNvCs123_10crate_name9func_name14mystery_objectCs456_12client_crate",
+ "crate_name::func_name::{X:mystery_object#0}");
+}
+
+TEST(DemangleRust, NestedUppercaseNamespaces) {
+ EXPECT_DEMANGLING(
+ "_RNCNXNYCs123_10crate_names0_1ys1_1xs2_0Cs456_12client_crate",
+ "crate_name::{Y:y#2}::{X:x#3}::{closure#4}");
+}
+
+
+} // namespace
+} // namespace debugging_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/debugging/internal/demangle_test.cc b/absl/debugging/internal/demangle_test.cc
index 2771d4d0..b6318302 100644
--- a/absl/debugging/internal/demangle_test.cc
+++ b/absl/debugging/internal/demangle_test.cc
@@ -300,6 +300,15 @@ TEST(Demangle, AbiTags) {
EXPECT_STREQ("C[abi:bar][abi:foo]()", tmp);
}
+// Test one Rust symbol to exercise Demangle's delegation path. Rust demangling
+// itself is more thoroughly tested in demangle_rust_test.cc.
+TEST(Demangle, DelegatesToDemangleRustSymbolEncoding) {
+ char tmp[80];
+
+ EXPECT_TRUE(Demangle("_RNvC8my_crate7my_func", tmp, sizeof(tmp)));
+ EXPECT_STREQ("my_crate::my_func", tmp);
+}
+
// Tests that verify that Demangle footprint is within some limit.
// They are not to be run under sanitizers as the sanitizers increase
// stack consumption by about 4x.