diff options
author | Abseil Team <absl-team@google.com> | 2023-03-16 09:24:49 -0700 |
---|---|---|
committer | Copybara-Service <copybara-worker@google.com> | 2023-03-16 09:25:27 -0700 |
commit | 50a9e2b27ee7144c306c0d8b40290e131c7f1b34 (patch) | |
tree | f3297a3fde02585e931e280e794b4cb786753a81 | |
parent | 256cc61a2d3a6f6dbc9f71d1f976fb9ca9306da5 (diff) |
Be more consistent in the documentation and variable declarations for base64 escaping vs unescaping:
* Move table documenting escaping conversions to be next to the internal escaping code that implements it. Mention it from the internal _un_escaping code (which reverses it) too.
* Centralize the two arrays which map from normal chars to escaped chars (k(WebSafe)Base64Chars), and make bidirectional documentation links between them and their reverse (unescaping) maps (kUn(WebSafe)Base64).
* Remove redundant list of escaping/unescaping methods in the public escaping.cc.
PiperOrigin-RevId: 517142667
Change-Id: I627e59a196ef855e5bf61ef7f4b509920e8d7acd
-rw-r--r-- | absl/strings/escaping.cc | 42 | ||||
-rw-r--r-- | absl/strings/internal/escaping.cc | 23 | ||||
-rw-r--r-- | absl/strings/internal/escaping.h | 1 |
3 files changed, 34 insertions, 32 deletions
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc index 93966846..2827fbaa 100644 --- a/absl/strings/escaping.cc +++ b/absl/strings/escaping.cc @@ -443,6 +443,8 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) { } } +// Reverses the mapping in Base64EscapeInternal; see that method's +// documentation for details of the mapping. bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, size_t szdest, const signed char* unbase64, size_t* len) { @@ -676,7 +678,10 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, return ok; } -// The arrays below were generated by the following code +// The arrays below map base64-escaped characters back to their original values. +// For the inverse case, see k(WebSafe)Base64Chars in the internal +// escaping.cc. +// These arrays were generated by the following inversion code: // #include <sys/time.h> // #include <stdlib.h> // #include <string.h> @@ -703,8 +708,8 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest, // } // } // -// where the value of "Base64[]" was replaced by one of the base-64 conversion -// tables from the functions below. +// where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars +// in the internal escaping.cc. /* clang-format off */ constexpr signed char kUnBase64[] = { -1, -1, -1, -1, -1, -1, -1, -1, @@ -777,9 +782,6 @@ constexpr signed char kUnWebSafeBase64[] = { }; /* clang-format on */ -constexpr char kWebSafeBase64Chars[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - template <typename String> bool Base64UnescapeInternal(const char* src, size_t slen, String* dest, const signed char* unbase64) { @@ -880,30 +882,6 @@ std::string Utf8SafeCHexEscape(absl::string_view src) { return CEscapeInternal(src, true, true); } -// ---------------------------------------------------------------------- -// Base64Unescape() - base64 decoder -// Base64Escape() - base64 encoder -// WebSafeBase64Unescape() - Google's variation of base64 decoder -// WebSafeBase64Escape() - Google's variation of base64 encoder -// -// Check out -// https://datatracker.ietf.org/doc/html/rfc2045 for formal description, but -// what we care about is that... -// Take the encoded stuff in groups of 4 characters and turn each -// character into a code 0 to 63 thus: -// A-Z map to 0 to 25 -// a-z map to 26 to 51 -// 0-9 map to 52 to 61 -// +(- for WebSafe) maps to 62 -// /(_ for WebSafe) maps to 63 -// There will be four numbers, all less than 64 which can be represented -// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). -// Arrange the 6 digit binary numbers into three bytes as such: -// aaaaaabb bbbbcccc ccdddddd -// Equals signs (one or two) are used at the end of the encoded block to -// indicate that the text was not an integer multiple of three bytes long. -// ---------------------------------------------------------------------- - bool Base64Unescape(absl::string_view src, std::string* dest) { return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64); } @@ -921,7 +899,7 @@ void Base64Escape(absl::string_view src, std::string* dest) { void WebSafeBase64Escape(absl::string_view src, std::string* dest) { strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest, - false, kWebSafeBase64Chars); + false, strings_internal::kWebSafeBase64Chars); } std::string Base64Escape(absl::string_view src) { @@ -936,7 +914,7 @@ std::string WebSafeBase64Escape(absl::string_view src) { std::string dest; strings_internal::Base64EscapeInternal( reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest, - false, kWebSafeBase64Chars); + false, strings_internal::kWebSafeBase64Chars); return dest; } diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc index 8bd0890d..56a4cbed 100644 --- a/absl/strings/internal/escaping.cc +++ b/absl/strings/internal/escaping.cc @@ -21,9 +21,17 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace strings_internal { +// The two strings below provide maps from normal 6-bit characters to their +// base64-escaped equivalent. +// For the inverse case, see kUn(WebSafe)Base64 in the external +// escaping.cc. ABSL_CONST_INIT const char kBase64Chars[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; +ABSL_CONST_INIT const char kWebSafeBase64Chars[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; + + size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { // Base64 encodes three bytes of input at a time. If the input is not // divisible by three, we pad as appropriate. @@ -62,6 +70,21 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) { return len; } +// ---------------------------------------------------------------------- +// Take the input in groups of 4 characters and turn each +// character into a code 0 to 63 thus: +// A-Z map to 0 to 25 +// a-z map to 26 to 51 +// 0-9 map to 52 to 61 +// +(- for WebSafe) maps to 62 +// /(_ for WebSafe) maps to 63 +// There will be four numbers, all less than 64 which can be represented +// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively). +// Arrange the 6 digit binary numbers into three bytes as such: +// aaaaaabb bbbbcccc ccdddddd +// Equals signs (one or two) are used at the end of the encoded block to +// indicate that the text was not an integer multiple of three bytes long. +// ---------------------------------------------------------------------- size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest, size_t szdest, const char* base64, bool do_padding) { diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h index b04033ff..2186f778 100644 --- a/absl/strings/internal/escaping.h +++ b/absl/strings/internal/escaping.h @@ -24,6 +24,7 @@ ABSL_NAMESPACE_BEGIN namespace strings_internal { ABSL_CONST_INIT extern const char kBase64Chars[]; +ABSL_CONST_INIT extern const char kWebSafeBase64Chars[]; // Calculates the length of a Base64 encoding (RFC 4648) of a string of length // `input_len`, with or without padding per `do_padding`. Note that 'web-safe' |