Be more consistent in the documentation and variable declarations for base64 escaping vs unescaping:

* Move table documenting escaping conversions to be next to the internal escaping code that implements it. Mention it from the internal _un_escaping code (which reverses it) too. * Centralize the two arrays which map from normal chars to escaped chars (k(WebSafe)Base64Chars), and make bidirectional documentation links between them and their reverse (unescaping) maps (kUn(WebSafe)Base64). * Remove redundant list of escaping/unescaping methods in the public escaping.cc. PiperOrigin-RevId: 517142667 Change-Id: I627e59a196ef855e5bf61ef7f4b509920e8d7acd
author: Abseil Team <absl-team@google.com> 2023-03-16 09:24:49 -0700
committer: Copybara-Service <copybara-worker@google.com> 2023-03-16 09:25:27 -0700
commit: 50a9e2b27ee7144c306c0d8b40290e131c7f1b34 (patch)
tree: f3297a3fde02585e931e280e794b4cb786753a81 /absl/strings
parent: 256cc61a2d3a6f6dbc9f71d1f976fb9ca9306da5 (diff)
3 files changed, 34 insertions, 32 deletions
diff --git a/absl/strings/escaping.cc b/absl/strings/escaping.cc
index 93966846..2827fbaa 100644
--- a/absl/strings/escaping.cc
+++ b/absl/strings/escaping.cc
@@ -443,6 +443,8 @@ void CEscapeAndAppendInternal(absl::string_view src, std::string* dest) {
   }
 }
 
+// Reverses the mapping in Base64EscapeInternal; see that method's
+// documentation for details of the mapping.
 bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
                             size_t szdest, const signed char* unbase64,
                             size_t* len) {
@@ -676,7 +678,10 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
   return ok;
 }
 
-// The arrays below were generated by the following code
+// The arrays below map base64-escaped characters back to their original values.
+// For the inverse case, see k(WebSafe)Base64Chars in the internal
+// escaping.cc.
+// These arrays were generated by the following inversion code:
 // #include <sys/time.h>
 // #include <stdlib.h>
 // #include <string.h>
@@ -703,8 +708,8 @@ bool Base64UnescapeInternal(const char* src_param, size_t szsrc, char* dest,
 //   }
 // }
 //
-// where the value of "Base64[]" was replaced by one of the base-64 conversion
-// tables from the functions below.
+// where the value of "Base64[]" was replaced by one of k(WebSafe)Base64Chars
+// in the internal escaping.cc.
 /* clang-format off */
 constexpr signed char kUnBase64[] = {
     -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
@@ -777,9 +782,6 @@ constexpr signed char kUnWebSafeBase64[] = {
 };
 /* clang-format on */
 
-constexpr char kWebSafeBase64Chars[] =
-    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
-
 template <typename String>
 bool Base64UnescapeInternal(const char* src, size_t slen, String* dest,
                             const signed char* unbase64) {
@@ -880,30 +882,6 @@ std::string Utf8SafeCHexEscape(absl::string_view src) {
   return CEscapeInternal(src, true, true);
 }
 
-// ----------------------------------------------------------------------
-// Base64Unescape() - base64 decoder
-// Base64Escape() - base64 encoder
-// WebSafeBase64Unescape() - Google's variation of base64 decoder
-// WebSafeBase64Escape() - Google's variation of base64 encoder
-//
-// Check out
-// https://datatracker.ietf.org/doc/html/rfc2045 for formal description, but
-// what we care about is that...
-//   Take the encoded stuff in groups of 4 characters and turn each
-//   character into a code 0 to 63 thus:
-//           A-Z map to 0 to 25
-//           a-z map to 26 to 51
-//           0-9 map to 52 to 61
-//           +(- for WebSafe) maps to 62
-//           /(_ for WebSafe) maps to 63
-//   There will be four numbers, all less than 64 which can be represented
-//   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
-//   Arrange the 6 digit binary numbers into three bytes as such:
-//   aaaaaabb bbbbcccc ccdddddd
-//   Equals signs (one or two) are used at the end of the encoded block to
-//   indicate that the text was not an integer multiple of three bytes long.
-// ----------------------------------------------------------------------
-
 bool Base64Unescape(absl::string_view src, std::string* dest) {
   return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
 }
@@ -921,7 +899,7 @@ void Base64Escape(absl::string_view src, std::string* dest) {
 void WebSafeBase64Escape(absl::string_view src, std::string* dest) {
   strings_internal::Base64EscapeInternal(
       reinterpret_cast<const unsigned char*>(src.data()), src.size(), dest,
-      false, kWebSafeBase64Chars);
+      false, strings_internal::kWebSafeBase64Chars);
 }
 
 std::string Base64Escape(absl::string_view src) {
@@ -936,7 +914,7 @@ std::string WebSafeBase64Escape(absl::string_view src) {
   std::string dest;
   strings_internal::Base64EscapeInternal(
       reinterpret_cast<const unsigned char*>(src.data()), src.size(), &dest,
-      false, kWebSafeBase64Chars);
+      false, strings_internal::kWebSafeBase64Chars);
   return dest;
 }
 
diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc
index 8bd0890d..56a4cbed 100644
--- a/absl/strings/internal/escaping.cc
+++ b/absl/strings/internal/escaping.cc
@@ -21,9 +21,17 @@ namespace absl {
 ABSL_NAMESPACE_BEGIN
 namespace strings_internal {
 
+// The two strings below provide maps from normal 6-bit characters to their
+// base64-escaped equivalent.
+// For the inverse case, see kUn(WebSafe)Base64 in the external
+// escaping.cc.
 ABSL_CONST_INIT const char kBase64Chars[] =
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
+ABSL_CONST_INIT const char kWebSafeBase64Chars[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
+
 size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
   // Base64 encodes three bytes of input at a time. If the input is not
   // divisible by three, we pad as appropriate.
@@ -62,6 +70,21 @@ size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
   return len;
 }
 
+// ----------------------------------------------------------------------
+//   Take the input in groups of 4 characters and turn each
+//   character into a code 0 to 63 thus:
+//           A-Z map to 0 to 25
+//           a-z map to 26 to 51
+//           0-9 map to 52 to 61
+//           +(- for WebSafe) maps to 62
+//           /(_ for WebSafe) maps to 63
+//   There will be four numbers, all less than 64 which can be represented
+//   by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
+//   Arrange the 6 digit binary numbers into three bytes as such:
+//   aaaaaabb bbbbcccc ccdddddd
+//   Equals signs (one or two) are used at the end of the encoded block to
+//   indicate that the text was not an integer multiple of three bytes long.
+// ----------------------------------------------------------------------
 size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
                             size_t szdest, const char* base64,
                             bool do_padding) {
diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h
index b04033ff..2186f778 100644
--- a/absl/strings/internal/escaping.h
+++ b/absl/strings/internal/escaping.h
@@ -24,6 +24,7 @@ ABSL_NAMESPACE_BEGIN
 namespace strings_internal {
 
 ABSL_CONST_INIT extern const char kBase64Chars[];
+ABSL_CONST_INIT extern const char kWebSafeBase64Chars[];
 
 // Calculates the length of a Base64 encoding (RFC 4648) of a string of length
 // `input_len`, with or without padding per `do_padding`. Note that 'web-safe'
author	Abseil Team <absl-team@google.com>	2023-03-16 09:24:49 -0700
committer	Copybara-Service <copybara-worker@google.com>	2023-03-16 09:25:27 -0700
commit	50a9e2b27ee7144c306c0d8b40290e131c7f1b34 (patch)
tree	f3297a3fde02585e931e280e794b4cb786753a81 /absl/strings
parent	256cc61a2d3a6f6dbc9f71d1f976fb9ca9306da5 (diff)