// Copyright 2024 The Abseil Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // https://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include "absl/debugging/internal/demangle_rust.h" #include #include #include #include #include "absl/base/attributes.h" #include "absl/base/config.h" #include "absl/debugging/internal/decode_rust_punycode.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace debugging_internal { namespace { // Same step limit as the C++ demangler in demangle.cc uses. constexpr int kMaxReturns = 1 << 17; bool IsDigit(char c) { return '0' <= c && c <= '9'; } bool IsLower(char c) { return 'a' <= c && c <= 'z'; } bool IsUpper(char c) { return 'A' <= c && c <= 'Z'; } bool IsAlpha(char c) { return IsLower(c) || IsUpper(c); } bool IsIdentifierChar(char c) { return IsAlpha(c) || IsDigit(c) || c == '_'; } bool IsLowerHexDigit(char c) { return IsDigit(c) || ('a' <= c && c <= 'f'); } const char* BasicTypeName(char c) { switch (c) { case 'a': return "i8"; case 'b': return "bool"; case 'c': return "char"; case 'd': return "f64"; case 'e': return "str"; case 'f': return "f32"; case 'h': return "u8"; case 'i': return "isize"; case 'j': return "usize"; case 'l': return "i32"; case 'm': return "u32"; case 'n': return "i128"; case 'o': return "u128"; case 'p': return "_"; case 's': return "i16"; case 't': return "u16"; case 'u': return "()"; case 'v': return "..."; case 'x': return "i64"; case 'y': return "u64"; case 'z': return "!"; } return nullptr; } // Parser for Rust symbol mangling v0, whose grammar is defined here: // // https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary class RustSymbolParser { public: // Prepares to demangle the given encoding, a Rust symbol name starting with // _R, into the output buffer [out, out_end). The caller is expected to // continue by calling the new object's Parse function. RustSymbolParser(const char* encoding, char* out, char* const out_end) : encoding_(encoding), out_(out), out_end_(out_end) { if (out_ != out_end_) *out_ = '\0'; } // Parses the constructor's encoding argument, writing output into the range // [out, out_end). Returns true on success and false for input whose // structure was not recognized or exceeded implementation limits, such as by // nesting structures too deep. In either case *this should not be used // again. ABSL_MUST_USE_RESULT bool Parse() && { // Recursively parses the grammar production named by callee, then resumes // execution at the next statement. // // Recursive-descent parsing is a beautifully readable translation of a // grammar, but it risks stack overflow if implemented by naive recursion on // the C++ call stack. So we simulate recursion by goto and switch instead, // keeping a bounded stack of "return addresses" in the recursion_stack_ // member. // // The callee argument is a statement label. We goto that label after // saving the "return address" on recursion_stack_. The next continue // statement in the for loop below "returns" from this "call". // // The caller argument names the return point. Each value of caller must // appear in only one ABSL_DEMANGLER_RECURSE call and be listed in the // definition of enum ReturnAddress. The switch implements the control // transfer from the end of a "called" subroutine back to the statement // after the "call". // // Note that not all the grammar productions have to be packed into the // switch, but only those which appear in a cycle in the grammar. Anything // acyclic can be written as ordinary functions and function calls, e.g., // ParseIdentifier. #define ABSL_DEMANGLER_RECURSE(callee, caller) \ do { \ if (recursion_depth_ == kStackSize) return false; \ /* The next continue will switch on this saved value ... */ \ recursion_stack_[recursion_depth_++] = caller; \ goto callee; \ /* ... and will land here, resuming the suspended code. */ \ case caller: {} \ } while (0) // Parse the encoding, counting completed recursive calls to guard against // excessively complex input and infinite-loop bugs. int iter = 0; goto whole_encoding; for (; iter < kMaxReturns && recursion_depth_ > 0; ++iter) { // This switch resumes the code path most recently suspended by // ABSL_DEMANGLER_RECURSE. switch (recursion_stack_[--recursion_depth_]) { // // symbol-name -> // _R decimal-number? path instantiating-crate? vendor-specific-suffix? whole_encoding: if (!Eat('_') || !Eat('R')) return false; // decimal-number? is always empty today, so proceed to path, which // can't start with a decimal digit. ABSL_DEMANGLER_RECURSE(path, kInstantiatingCrate); if (IsAlpha(Peek())) { ++silence_depth_; // Print nothing more from here on. ABSL_DEMANGLER_RECURSE(path, kVendorSpecificSuffix); } switch (Take()) { case '.': case '$': case '\0': return true; } return false; // unexpected trailing content // path -> crate-root | inherent-impl | trait-impl | trait-definition | // nested-path | generic-args | backref // // Note that ABSL_DEMANGLER_RECURSE does not work inside a nested switch // (which would hide the generated case label). Thus we jump out of the // inner switch with gotos before performing any fake recursion. path: switch (Take()) { case 'C': goto crate_root; case 'M': goto inherent_impl; case 'X': goto trait_impl; case 'Y': goto trait_definition; case 'N': goto nested_path; case 'I': goto generic_args; case 'B': goto path_backref; default: return false; } // crate-root -> C identifier (C consumed above) crate_root: if (!ParseIdentifier()) return false; continue; // inherent-impl -> M impl-path type (M already consumed) inherent_impl: if (!Emit("<")) return false; ABSL_DEMANGLER_RECURSE(impl_path, kInherentImplType); ABSL_DEMANGLER_RECURSE(type, kInherentImplEnding); if (!Emit(">")) return false; continue; // trait-impl -> X impl-path type path (X already consumed) trait_impl: if (!Emit("<")) return false; ABSL_DEMANGLER_RECURSE(impl_path, kTraitImplType); ABSL_DEMANGLER_RECURSE(type, kTraitImplInfix); if (!Emit(" as ")) return false; ABSL_DEMANGLER_RECURSE(path, kTraitImplEnding); if (!Emit(">")) return false; continue; // impl-path -> disambiguator? path (but never print it!) impl_path: ++silence_depth_; { int ignored_disambiguator; if (!ParseDisambiguator(ignored_disambiguator)) return false; } ABSL_DEMANGLER_RECURSE(path, kImplPathEnding); --silence_depth_; continue; // trait-definition -> Y type path (Y already consumed) trait_definition: if (!Emit("<")) return false; ABSL_DEMANGLER_RECURSE(type, kTraitDefinitionInfix); if (!Emit(" as ")) return false; ABSL_DEMANGLER_RECURSE(path, kTraitDefinitionEnding); if (!Emit(">")) return false; continue; // nested-path -> N namespace path identifier (N already consumed) // namespace -> lower | upper nested_path: // Uppercase namespaces must be saved on a stack so we can print // ::{closure#0} or ::{shim:vtable#0} or ::{X:name#0} as needed. if (IsUpper(Peek())) { if (!PushNamespace(Take())) return false; ABSL_DEMANGLER_RECURSE(path, kIdentifierInUppercaseNamespace); if (!Emit("::")) return false; if (!ParseIdentifier(PopNamespace())) return false; continue; } // Lowercase namespaces, however, are never represented in the output; // they all emit just ::name. if (IsLower(Take())) { ABSL_DEMANGLER_RECURSE(path, kIdentifierInLowercaseNamespace); if (!Emit("::")) return false; if (!ParseIdentifier()) return false; continue; } // Neither upper or lower return false; // type -> basic-type | array-type | slice-type | tuple-type | // ref-type | mut-ref-type | const-ptr-type | mut-ptr-type | // fn-type | dyn-trait-type | path | backref // // We use ifs instead of switch (Take()) because the default case jumps // to path, which will need to see the first character not yet Taken // from the input. Because we do not use a nested switch here, // ABSL_DEMANGLER_RECURSE works fine in the 'S' case. type: if (IsLower(Peek())) { const char* type_name = BasicTypeName(Take()); if (type_name == nullptr || !Emit(type_name)) return false; continue; } if (Eat('A')) { // array-type = A type const if (!Emit("[")) return false; ABSL_DEMANGLER_RECURSE(type, kArraySize); if (!Emit("; ")) return false; ABSL_DEMANGLER_RECURSE(constant, kFinishArray); if (!Emit("]")) return false; continue; } if (Eat('S')) { if (!Emit("[")) return false; ABSL_DEMANGLER_RECURSE(type, kSliceEnding); if (!Emit("]")) return false; continue; } if (Eat('T')) goto tuple_type; if (Eat('R')) { if (!Emit("&")) return false; if (!ParseOptionalLifetime()) return false; goto type; } if (Eat('Q')) { if (!Emit("&mut ")) return false; if (!ParseOptionalLifetime()) return false; goto type; } if (Eat('P')) { if (!Emit("*const ")) return false; goto type; } if (Eat('O')) { if (!Emit("*mut ")) return false; goto type; } if (Eat('F')) goto fn_type; if (Eat('D')) goto dyn_trait_type; if (Eat('B')) goto type_backref; goto path; // tuple-type -> T type* E (T already consumed) tuple_type: if (!Emit("(")) return false; // The toolchain should call the unit type u instead of TE, but the // grammar and other demanglers also recognize TE, so we do too. if (Eat('E')) { if (!Emit(")")) return false; continue; } // A tuple with one element is rendered (type,) instead of (type). ABSL_DEMANGLER_RECURSE(type, kAfterFirstTupleElement); if (Eat('E')) { if (!Emit(",)")) return false; continue; } // A tuple with two elements is of course (x, y). if (!Emit(", ")) return false; ABSL_DEMANGLER_RECURSE(type, kAfterSecondTupleElement); if (Eat('E')) { if (!Emit(")")) return false; continue; } // And (x, y, z) for three elements. if (!Emit(", ")) return false; ABSL_DEMANGLER_RECURSE(type, kAfterThirdTupleElement); if (Eat('E')) { if (!Emit(")")) return false; continue; } // For longer tuples we write (x, y, z, ...), printing none of the // content of the fourth and later types. Thus we avoid exhausting // output buffers and human readers' patience when some library has a // long tuple as an implementation detail, without having to // completely obfuscate all tuples. if (!Emit(", ...)")) return false; ++silence_depth_; while (!Eat('E')) { ABSL_DEMANGLER_RECURSE(type, kAfterSubsequentTupleElement); } --silence_depth_; continue; // fn-type -> F fn-sig (F already consumed) // fn-sig -> binder? U? (K abi)? type* E type // abi -> C | undisambiguated-identifier // // We follow the C++ demangler in suppressing details of function // signatures. Every function type is rendered "fn...". fn_type: if (!Emit("fn...")) return false; ++silence_depth_; if (!ParseOptionalBinder()) return false; (void)Eat('U'); if (Eat('K')) { if (!Eat('C') && !ParseUndisambiguatedIdentifier()) return false; } while (!Eat('E')) { ABSL_DEMANGLER_RECURSE(type, kContinueParameterList); } ABSL_DEMANGLER_RECURSE(type, kFinishFn); --silence_depth_; continue; // dyn-trait-type -> D dyn-bounds lifetime (D already consumed) // dyn-bounds -> binder? dyn-trait* E // // The grammar strangely allows an empty trait list, even though the // compiler should never output one. We follow existing demanglers in // rendering DEL_ as "dyn ". // // Because auto traits lengthen a type name considerably without // providing much value to a search for related source code, it would be // desirable to abbreviate // dyn main::Trait + std::marker::Copy + std::marker::Send // to // dyn main::Trait + ..., // eliding the auto traits. But it is difficult to do so correctly, in // part because there is no guarantee that the mangling will list the // main trait first. So we just print all the traits in their order of // appearance in the mangled name. dyn_trait_type: if (!Emit("dyn ")) return false; if (!ParseOptionalBinder()) return false; if (!Eat('E')) { ABSL_DEMANGLER_RECURSE(dyn_trait, kBeginAutoTraits); while (!Eat('E')) { if (!Emit(" + ")) return false; ABSL_DEMANGLER_RECURSE(dyn_trait, kContinueAutoTraits); } } if (!ParseRequiredLifetime()) return false; continue; // dyn-trait -> path dyn-trait-assoc-binding* // dyn-trait-assoc-binding -> p undisambiguated-identifier type // // We render nonempty binding lists as <>, omitting their contents as // for generic-args. dyn_trait: ABSL_DEMANGLER_RECURSE(path, kContinueDynTrait); if (Peek() == 'p') { if (!Emit("<>")) return false; ++silence_depth_; while (Eat('p')) { if (!ParseUndisambiguatedIdentifier()) return false; ABSL_DEMANGLER_RECURSE(type, kContinueAssocBinding); } --silence_depth_; } continue; // const -> type const-data | p | backref // // const is a C++ keyword, so we use the label `constant` instead. constant: if (Eat('B')) goto const_backref; if (Eat('p')) { if (!Emit("_")) return false; continue; } // Scan the type without printing it. // // The Rust language restricts the type of a const generic argument // much more than the mangling grammar does. We do not enforce this. // // We also do not bother printing false, true, 'A', and '\u{abcd}' for // the types bool and char. Because we do not print generic-args // contents, we expect to print constants only in array sizes, and // those should not be bool or char. ++silence_depth_; ABSL_DEMANGLER_RECURSE(type, kConstData); --silence_depth_; // const-data -> n? hex-digit* _ // // Although the grammar doesn't say this, existing demanglers expect // that zero is 0, not an empty digit sequence, and no nonzero value // may have leading zero digits. Also n0_ is accepted and printed as // -0, though a toolchain will probably never write that encoding. if (Eat('n') && !EmitChar('-')) return false; if (!Emit("0x")) return false; if (Eat('0')) { if (!EmitChar('0')) return false; if (!Eat('_')) return false; continue; } while (IsLowerHexDigit(Peek())) { if (!EmitChar(Take())) return false; } if (!Eat('_')) return false; continue; // generic-args -> I path generic-arg* E (I already consumed) // // We follow the C++ demangler in omitting all the arguments from the // output, printing only the list opening and closing tokens. generic_args: ABSL_DEMANGLER_RECURSE(path, kBeginGenericArgList); if (!Emit("::<>")) return false; ++silence_depth_; while (!Eat('E')) { ABSL_DEMANGLER_RECURSE(generic_arg, kContinueGenericArgList); } --silence_depth_; continue; // generic-arg -> lifetime | type | K const generic_arg: if (Peek() == 'L') { if (!ParseOptionalLifetime()) return false; continue; } if (Eat('K')) goto constant; goto type; // backref -> B base-62-number (B already consumed) // // The BeginBackref call parses and range-checks the base-62-number. We // always do that much. // // The recursive call parses and prints what the backref points at. We // save CPU and stack by skipping this work if the output would be // suppressed anyway. path_backref: if (!BeginBackref()) return false; if (silence_depth_ == 0) { ABSL_DEMANGLER_RECURSE(path, kPathBackrefEnding); } EndBackref(); continue; // This represents the same backref production as in path_backref but // parses the target as a type instead of a path. type_backref: if (!BeginBackref()) return false; if (silence_depth_ == 0) { ABSL_DEMANGLER_RECURSE(type, kTypeBackrefEnding); } EndBackref(); continue; const_backref: if (!BeginBackref()) return false; if (silence_depth_ == 0) { ABSL_DEMANGLER_RECURSE(constant, kConstantBackrefEnding); } EndBackref(); continue; } } return false; // hit iteration limit or a bug in our stack handling } private: // Enumerates resumption points for ABSL_DEMANGLER_RECURSE calls. enum ReturnAddress : uint8_t { kInstantiatingCrate, kVendorSpecificSuffix, kIdentifierInUppercaseNamespace, kIdentifierInLowercaseNamespace, kInherentImplType, kInherentImplEnding, kTraitImplType, kTraitImplInfix, kTraitImplEnding, kImplPathEnding, kTraitDefinitionInfix, kTraitDefinitionEnding, kArraySize, kFinishArray, kSliceEnding, kAfterFirstTupleElement, kAfterSecondTupleElement, kAfterThirdTupleElement, kAfterSubsequentTupleElement, kContinueParameterList, kFinishFn, kBeginAutoTraits, kContinueAutoTraits, kContinueDynTrait, kContinueAssocBinding, kConstData, kBeginGenericArgList, kContinueGenericArgList, kPathBackrefEnding, kTypeBackrefEnding, kConstantBackrefEnding, }; // Element counts for the stack arrays. Larger stack sizes accommodate more // deeply nested names at the cost of a larger footprint on the C++ call // stack. enum { // Maximum recursive calls outstanding at one time. kStackSize = 256, // Maximum N nested-paths open at once. We do not expect // closures inside closures inside closures as much as functions inside // modules inside other modules, so we can use a smaller array here. kNamespaceStackSize = 64, // Maximum number of nested backrefs. We can keep this stack pretty small // because we do not follow backrefs inside generic-args or other contexts // that suppress printing, so deep stacking is unlikely in practice. kPositionStackSize = 16, }; // Returns the next input character without consuming it. char Peek() const { return encoding_[pos_]; } // Consumes and returns the next input character. char Take() { return encoding_[pos_++]; } // If the next input character is the given character, consumes it and returns // true; otherwise returns false without consuming a character. ABSL_MUST_USE_RESULT bool Eat(char want) { if (encoding_[pos_] != want) return false; ++pos_; return true; } // Provided there is enough remaining output space, appends c to the output, // writing a fresh NUL terminator afterward, and returns true. Returns false // if the output buffer had less than two bytes free. ABSL_MUST_USE_RESULT bool EmitChar(char c) { if (silence_depth_ > 0) return true; if (out_end_ - out_ < 2) return false; *out_++ = c; *out_ = '\0'; return true; } // Provided there is enough remaining output space, appends the C string token // to the output, followed by a NUL character, and returns true. Returns // false if not everything fit into the output buffer. ABSL_MUST_USE_RESULT bool Emit(const char* token) { if (silence_depth_ > 0) return true; const size_t token_length = std::strlen(token); const size_t bytes_to_copy = token_length + 1; // token and final NUL if (static_cast(out_end_ - out_) < bytes_to_copy) return false; std::memcpy(out_, token, bytes_to_copy); out_ += token_length; return true; } // Provided there is enough remaining output space, appends the decimal form // of disambiguator (if it's nonnegative) or "?" (if it's negative) to the // output, followed by a NUL character, and returns true. Returns false if // not everything fit into the output buffer. ABSL_MUST_USE_RESULT bool EmitDisambiguator(int disambiguator) { if (disambiguator < 0) return EmitChar('?'); // parsed but too large if (disambiguator == 0) return EmitChar('0'); // Convert disambiguator to decimal text. Three digits per byte is enough // because 999 > 256. The bound will remain correct even if future // maintenance changes the type of the disambiguator variable. char digits[3 * sizeof(disambiguator)] = {}; size_t leading_digit_index = sizeof(digits) - 1; for (; disambiguator > 0; disambiguator /= 10) { digits[--leading_digit_index] = static_cast('0' + disambiguator % 10); } return Emit(digits + leading_digit_index); } // Consumes an optional disambiguator (s123_) from the input. // // On success returns true and fills value with the encoded value if it was // not too big, otherwise with -1. If the optional disambiguator was omitted, // value is 0. On parse failure returns false and sets value to -1. ABSL_MUST_USE_RESULT bool ParseDisambiguator(int& value) { value = -1; // disambiguator = s base-62-number // // Disambiguators are optional. An omitted disambiguator is zero. if (!Eat('s')) { value = 0; return true; } int base_62_value = 0; if (!ParseBase62Number(base_62_value)) return false; value = base_62_value < 0 ? -1 : base_62_value + 1; return true; } // Consumes a base-62 number like _ or 123_ from the input. // // On success returns true and fills value with the encoded value if it was // not too big, otherwise with -1. On parse failure returns false and sets // value to -1. ABSL_MUST_USE_RESULT bool ParseBase62Number(int& value) { value = -1; // base-62-number = (digit | lower | upper)* _ // // An empty base-62 digit sequence means 0. if (Eat('_')) { value = 0; return true; } // A nonempty digit sequence denotes its base-62 value plus 1. int encoded_number = 0; bool overflowed = false; while (IsAlpha(Peek()) || IsDigit(Peek())) { const char c = Take(); if (encoded_number >= std::numeric_limits::max()/62) { // If we are close to overflowing an int, keep parsing but stop updating // encoded_number and remember to return -1 at the end. The point is to // avoid undefined behavior while parsing crate-root disambiguators, // which are large in practice but not shown in demangling, while // successfully computing closure and shim disambiguators, which are // typically small and are printed out. overflowed = true; } else { int digit; if (IsDigit(c)) { digit = c - '0'; } else if (IsLower(c)) { digit = c - 'a' + 10; } else { digit = c - 'A' + 36; } encoded_number = 62 * encoded_number + digit; } } if (!Eat('_')) return false; if (!overflowed) value = encoded_number + 1; return true; } // Consumes an identifier from the input, returning true on success. // // A nonzero uppercase_namespace specifies the character after the N in a // nested-identifier, e.g., 'C' for a closure, allowing ParseIdentifier to // write out the name with the conventional decoration for that namespace. ABSL_MUST_USE_RESULT bool ParseIdentifier(char uppercase_namespace = '\0') { // identifier -> disambiguator? undisambiguated-identifier int disambiguator = 0; if (!ParseDisambiguator(disambiguator)) return false; return ParseUndisambiguatedIdentifier(uppercase_namespace, disambiguator); } // Consumes from the input an identifier with no preceding disambiguator, // returning true on success. // // When ParseIdentifier calls this, it passes the N character and // disambiguator value so that "{closure#42}" and similar forms can be // rendered correctly. // // At other appearances of undisambiguated-identifier in the grammar, this // treatment is not applicable, and the call site omits both arguments. ABSL_MUST_USE_RESULT bool ParseUndisambiguatedIdentifier( char uppercase_namespace = '\0', int disambiguator = 0) { // undisambiguated-identifier -> u? decimal-number _? bytes const bool is_punycoded = Eat('u'); if (!IsDigit(Peek())) return false; int num_bytes = 0; if (!ParseDecimalNumber(num_bytes)) return false; (void)Eat('_'); // optional separator, needed if a digit follows if (is_punycoded) { DecodeRustPunycodeOptions options; options.punycode_begin = &encoding_[pos_]; options.punycode_end = &encoding_[pos_] + num_bytes; options.out_begin = out_; options.out_end = out_end_; out_ = DecodeRustPunycode(options); if (out_ == nullptr) return false; pos_ += static_cast(num_bytes); } // Emit the beginnings of braced forms like {shim:vtable#0}. if (uppercase_namespace != '\0') { switch (uppercase_namespace) { case 'C': if (!Emit("{closure")) return false; break; case 'S': if (!Emit("{shim")) return false; break; default: if (!EmitChar('{') || !EmitChar(uppercase_namespace)) return false; break; } if (num_bytes > 0 && !Emit(":")) return false; } // Emit the name itself. if (!is_punycoded) { for (int i = 0; i < num_bytes; ++i) { const char c = Take(); if (!IsIdentifierChar(c) && // The spec gives toolchains the choice of Punycode or raw UTF-8 for // identifiers containing code points above 0x7f, so accept bytes // with the high bit set. (c & 0x80) == 0) { return false; } if (!EmitChar(c)) return false; } } // Emit the endings of braced forms, e.g., "#42}". if (uppercase_namespace != '\0') { if (!EmitChar('#')) return false; if (!EmitDisambiguator(disambiguator)) return false; if (!EmitChar('}')) return false; } return true; } // Consumes a decimal number like 0 or 123 from the input. On success returns // true and fills value with the encoded value. If the encoded value is too // large or otherwise unparsable, returns false and sets value to -1. ABSL_MUST_USE_RESULT bool ParseDecimalNumber(int& value) { value = -1; if (!IsDigit(Peek())) return false; int encoded_number = Take() - '0'; if (encoded_number == 0) { // Decimal numbers are never encoded with extra leading zeroes. value = 0; return true; } while (IsDigit(Peek()) && // avoid overflow encoded_number < std::numeric_limits::max()/10) { encoded_number = 10 * encoded_number + (Take() - '0'); } if (IsDigit(Peek())) return false; // too big value = encoded_number; return true; } // Consumes a binder of higher-ranked lifetimes if one is present. On success // returns true and discards the encoded lifetime count. On parse failure // returns false. ABSL_MUST_USE_RESULT bool ParseOptionalBinder() { // binder -> G base-62-number if (!Eat('G')) return true; int ignored_binding_count; return ParseBase62Number(ignored_binding_count); } // Consumes a lifetime if one is present. // // On success returns true and discards the lifetime index. We do not print // or even range-check lifetimes because they are a finer detail than other // things we omit from output, such as the entire contents of generic-args. // // On parse failure returns false. ABSL_MUST_USE_RESULT bool ParseOptionalLifetime() { // lifetime -> L base-62-number if (!Eat('L')) return true; int ignored_de_bruijn_index; return ParseBase62Number(ignored_de_bruijn_index); } // Consumes a lifetime just like ParseOptionalLifetime, but returns false if // there is no lifetime here. ABSL_MUST_USE_RESULT bool ParseRequiredLifetime() { if (Peek() != 'L') return false; return ParseOptionalLifetime(); } // Pushes ns onto the namespace stack and returns true if the stack is not // full, else returns false. ABSL_MUST_USE_RESULT bool PushNamespace(char ns) { if (namespace_depth_ == kNamespaceStackSize) return false; namespace_stack_[namespace_depth_++] = ns; return true; } // Pops the last pushed namespace. Requires that the namespace stack is not // empty (namespace_depth_ > 0). char PopNamespace() { return namespace_stack_[--namespace_depth_]; } // Pushes position onto the position stack and returns true if the stack is // not full, else returns false. ABSL_MUST_USE_RESULT bool PushPosition(int position) { if (position_depth_ == kPositionStackSize) return false; position_stack_[position_depth_++] = position; return true; } // Pops the last pushed input position. Requires that the position stack is // not empty (position_depth_ > 0). int PopPosition() { return position_stack_[--position_depth_]; } // Consumes a base-62-number denoting a backref target, pushes the current // input position on the data stack, and sets the input position to the // beginning of the backref target. Returns true on success. Returns false // if parsing failed, the stack is exhausted, or the backref target position // is out of range. ABSL_MUST_USE_RESULT bool BeginBackref() { // backref = B base-62-number (B already consumed) // // Reject backrefs that don't parse, overflow int, or don't point backward. // If the offset looks fine, adjust it to account for the _R prefix. int offset = 0; const int offset_of_this_backref = pos_ - 2 /* _R */ - 1 /* B already consumed */; if (!ParseBase62Number(offset) || offset < 0 || offset >= offset_of_this_backref) { return false; } offset += 2; // Save the old position to restore later. if (!PushPosition(pos_)) return false; // Move the input position to the backref target. // // Note that we do not check whether the new position points to the // beginning of a construct matching the context in which the backref // appeared. We just jump to it and see whether nested parsing succeeds. // We therefore accept various wrong manglings, e.g., a type backref // pointing to an 'l' character inside an identifier, which happens to mean // i32 when parsed as a type mangling. This saves the complexity and RAM // footprint of remembering which offsets began which kinds of // substructures. Existing demanglers take similar shortcuts. pos_ = offset; return true; } // Cleans up after a backref production by restoring the previous input // position from the data stack. void EndBackref() { pos_ = PopPosition(); } // The leftmost recursion_depth_ elements of recursion_stack_ contain the // ReturnAddresses pushed by ABSL_DEMANGLER_RECURSE calls not yet completed. ReturnAddress recursion_stack_[kStackSize] = {}; int recursion_depth_ = 0; // The leftmost namespace_depth_ elements of namespace_stack_ contain the // uppercase namespace identifiers for open nested-paths, e.g., 'C' for a // closure. char namespace_stack_[kNamespaceStackSize] = {}; int namespace_depth_ = 0; // The leftmost position_depth_ elements of position_stack_ contain the input // positions to return to after fully printing the targets of backrefs. int position_stack_[kPositionStackSize] = {}; int position_depth_ = 0; // Anything parsed while silence_depth_ > 0 contributes nothing to the // demangled output. For constructs omitted from the demangling, such as // impl-path and the contents of generic-args, we will increment // silence_depth_ on the way in and decrement silence_depth_ on the way out. int silence_depth_ = 0; // Input: encoding_ points to a Rust mangled symbol, and encoding_[pos_] is // the next input character to be scanned. int pos_ = 0; const char* encoding_ = nullptr; // Output: *out_ is where the next output character should be written, and // out_end_ points past the last byte of available space. char* out_ = nullptr; char* out_end_ = nullptr; }; } // namespace bool DemangleRustSymbolEncoding(const char* mangled, char* out, size_t out_size) { return RustSymbolParser(mangled, out, out + out_size).Parse(); } } // namespace debugging_internal ABSL_NAMESPACE_END } // namespace absl