summaryrefslogtreecommitdiff
path: root/absl/debugging
diff options
context:
space:
mode:
authorGravatar Chris Mihelich <cmihelic@google.com>2024-05-14 16:16:24 -0700
committerGravatar Copybara-Service <copybara-worker@google.com>2024-05-14 16:17:26 -0700
commiteba8db7baf6c326870f28e58977075b7b2fb243d (patch)
tree36ecf9b15163210fd8827c50bc02c1a0b8881dcc /absl/debugging
parentde8ae8715e38766e1564e2d2c738fa9c694200bd (diff)
Demangle Rust backrefs.
PiperOrigin-RevId: 633738511 Change-Id: I3f895d5de1aec5b5b9666523a328f3a3b0344e59
Diffstat (limited to 'absl/debugging')
-rw-r--r--absl/debugging/internal/demangle_rust.cc105
-rw-r--r--absl/debugging/internal/demangle_rust_test.cc66
2 files changed, 165 insertions, 6 deletions
diff --git a/absl/debugging/internal/demangle_rust.cc b/absl/debugging/internal/demangle_rust.cc
index f2a411d3..36f54161 100644
--- a/absl/debugging/internal/demangle_rust.cc
+++ b/absl/debugging/internal/demangle_rust.cc
@@ -154,7 +154,7 @@ class RustSymbolParser {
case 'Y': goto trait_definition;
case 'N': goto nested_path;
case 'I': return false; // generic-args not yet implemented
- case 'B': return false; // backref not yet implemented
+ case 'B': goto path_backref;
default: return false;
}
@@ -239,7 +239,7 @@ class RustSymbolParser {
}
if (Eat('F')) return false; // fn-type not yet implemented
if (Eat('D')) return false; // dyn-trait-type not yet implemented
- if (Eat('B')) return false; // backref not yet implemented
+ if (Eat('B')) goto type_backref;
goto path;
// tuple-type -> T type* E (T already consumed)
@@ -288,6 +288,32 @@ class RustSymbolParser {
}
--silence_depth_;
continue;
+
+ // backref -> B base-62-number (B already consumed)
+ //
+ // The BeginBackref call parses and range-checks the base-62-number. We
+ // always do that much.
+ //
+ // The recursive call parses and prints what the backref points at. We
+ // save CPU and stack by skipping this work if the output would be
+ // suppressed anyway.
+ path_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding);
+ }
+ EndBackref();
+ continue;
+
+ // This represents the same backref production as in path_backref but
+ // parses the target as a type instead of a path.
+ type_backref:
+ if (!BeginBackref()) return false;
+ if (silence_depth_ == 0) {
+ ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding);
+ }
+ EndBackref();
+ continue;
}
}
@@ -308,12 +334,22 @@ class RustSymbolParser {
kAfterSecondTupleElement,
kAfterThirdTupleElement,
kAfterSubsequentTupleElement,
+ kPathBackrefEnding,
+ kTypeBackrefEnding,
};
- // Element count for the stack_ array. A larger kStackSize accommodates more
+ // Element counts for the stack arrays. Larger stack sizes accommodate more
// deeply nested names at the cost of a larger footprint on the C++ call
// stack.
- enum { kStackSize = 256 };
+ enum {
+ // Maximum (recursive calls + N<uppercase> nested-names) open at one time.
+ kStackSize = 256,
+
+ // Maximum number of nested backrefs. We can keep this stack pretty small
+ // because we do not follow backrefs inside generic-args or other contexts
+ // that suppress printing, so deep stacking is unlikely in practice.
+ kPositionStackSize = 16,
+ };
// Returns the next input character without consuming it.
char Peek() const { return encoding_[pos_]; }
@@ -536,6 +572,58 @@ class RustSymbolParser {
// is not empty (data_stack_pointer_ < kStackSize).
std::uint8_t PopByte() { return stack_[data_stack_pointer_++]; }
+ // Pushes position onto the position stack and returns true if the stack is
+ // not full, else returns false.
+ ABSL_MUST_USE_RESULT bool PushPosition(int position) {
+ if (position_depth_ == kPositionStackSize) return false;
+ position_stack_[position_depth_++] = position;
+ return true;
+ }
+
+ // Pops the last pushed input position. Requires that the position stack is
+ // not empty (position_depth_ > 0).
+ int PopPosition() { return position_stack_[--position_depth_]; }
+
+ // Consumes a base-62-number denoting a backref target, pushes the current
+ // input position on the data stack, and sets the input position to the
+ // beginning of the backref target. Returns true on success. Returns false
+ // if parsing failed, the stack is exhausted, or the backref target position
+ // is out of range.
+ ABSL_MUST_USE_RESULT bool BeginBackref() {
+ // backref = B base-62-number (B already consumed)
+ //
+ // Reject backrefs that don't parse, overflow int, or don't point backward.
+ // If the offset looks fine, adjust it to account for the _R prefix.
+ int offset = 0;
+ const int offset_of_this_backref =
+ pos_ - 2 /* _R */ - 1 /* B already consumed */;
+ if (!ParseBase62Number(offset) || offset < 0 ||
+ offset >= offset_of_this_backref) {
+ return false;
+ }
+ offset += 2;
+
+ // Save the old position to restore later.
+ if (!PushPosition(pos_)) return false;
+
+ // Move the input position to the backref target.
+ //
+ // Note that we do not check whether the new position points to the
+ // beginning of a construct matching the context in which the backref
+ // appeared. We just jump to it and see whether nested parsing succeeds.
+ // We therefore accept various wrong manglings, e.g., a type backref
+ // pointing to an 'l' character inside an identifier, which happens to mean
+ // i32 when parsed as a type mangling. This saves the complexity and RAM
+ // footprint of remembering which offsets began which kinds of
+ // substructures. Existing demanglers take similar shortcuts.
+ pos_ = offset;
+ return true;
+ }
+
+ // Cleans up after a backref production by restoring the previous input
+ // position from the data stack.
+ void EndBackref() { pos_ = PopPosition(); }
+
// Call and data stacks reside in stack_. The leftmost depth_ elements
// contain ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE. The elements
// from index data_stack_pointer_ to the right edge of stack_ contain bytes
@@ -544,14 +632,19 @@ class RustSymbolParser {
int data_stack_pointer_ = kStackSize;
int depth_ = 0;
+ // The leftmost position_depth_ elements of position_stack_ contain the input
+ // positions to return to after fully printing the targets of backrefs.
+ int position_stack_[kPositionStackSize] = {};
+ int position_depth_ = 0;
+
// Anything parsed while silence_depth_ > 0 contributes nothing to the
// demangled output. For constructs omitted from the demangling, such as
// impl-path and the contents of generic-args, we will increment
// silence_depth_ on the way in and decrement silence_depth_ on the way out.
int silence_depth_ = 0;
- // Input: encoding_ points just after the _R in a Rust mangled symbol, and
- // encoding_[pos_] is the next input character to be scanned.
+ // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is
+ // the next input character to be scanned.
int pos_ = 0;
const char* encoding_ = nullptr;
diff --git a/absl/debugging/internal/demangle_rust_test.cc b/absl/debugging/internal/demangle_rust_test.cc
index a8ef60d6..c931304d 100644
--- a/absl/debugging/internal/demangle_rust_test.cc
+++ b/absl/debugging/internal/demangle_rust_test.cc
@@ -294,6 +294,72 @@ TEST(DemangleRust, LongerTuplesAbbreviated) {
"<(i32, u32, i128, ...) as c::t>::f");
}
+TEST(DemangleRust, PathBackrefToCrate) {
+ EXPECT_DEMANGLING("_RNvYNtC8my_crate9my_structNtB4_8my_trait1f",
+ "<my_crate::my_struct as my_crate::my_trait>::f");
+}
+
+TEST(DemangleRust, PathBackrefToNestedPath) {
+ EXPECT_DEMANGLING("_RNvYNtNtC1c1m1sNtB4_1t1f", "<c::m::s as c::m::t>::f");
+}
+
+TEST(DemangleRust, PathBackrefAsInstantiatingCrate) {
+ EXPECT_DEMANGLING("_RNCNvC8my_crate7my_func0B3_",
+ "my_crate::my_func::{closure#0}");
+}
+
+TEST(DemangleRust, TypeBackrefsNestedInTuple) {
+ EXPECT_DEMANGLING("_RNvYTTRlB4_ERB3_ENtC1c1t1f",
+ "<((&i32, &i32), &(&i32, &i32)) as c::t>::f");
+}
+
+TEST(DemangleRust, NoInfiniteLoopOnBackrefToTheWhole) {
+ EXPECT_DEMANGLING_FAILS("_RB_");
+ EXPECT_DEMANGLING_FAILS("_RNvB_1sNtC1c1t1f");
+}
+
+TEST(DemangleRust, NoCrashOnForwardBackref) {
+ EXPECT_DEMANGLING_FAILS("_RB0_");
+ EXPECT_DEMANGLING_FAILS("_RB1_");
+ EXPECT_DEMANGLING_FAILS("_RB2_");
+ EXPECT_DEMANGLING_FAILS("_RB3_");
+ EXPECT_DEMANGLING_FAILS("_RB4_");
+}
+
+TEST(DemangleRust, PathBackrefsDoNotRecurseDuringSilence) {
+ // B_ points at the value f (the whole mangling), so the cycle would lead to
+ // parse failure if the parser tried to parse what was pointed to.
+ EXPECT_DEMANGLING("_RNvYTlmnNtB_1sENtC1c1t1f",
+ "<(i32, u32, i128, ...) as c::t>::f");
+}
+
+TEST(DemangleRust, TypeBackrefsDoNotRecurseDuringSilence) {
+ // B2_ points at the tuple type, likewise making a cycle that the parser
+ // avoids following.
+ EXPECT_DEMANGLING("_RNvYTlmnB2_ENtC1c1t1f",
+ "<(i32, u32, i128, ...) as c::t>::f");
+}
+
+TEST(DemangleRust, ReturnFromBackrefToInputPosition256) {
+ // Show that we can resume at input positions that don't fit into a byte.
+ EXPECT_DEMANGLING("_RNvYNtC1c238very_long_type_"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABC"
+ "NtB4_1t1f",
+ "<c::very_long_type_"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABCDEFGHIJabcdefghij"
+ "ABCDEFGHIJabcdefghijABC"
+ " as c::t>::f");
+}
+
} // namespace
} // namespace debugging_internal
ABSL_NAMESPACE_END