summaryrefslogtreecommitdiff
path: root/absl/strings/internal
diff options
context:
space:
mode:
Diffstat (limited to 'absl/strings/internal')
-rw-r--r--absl/strings/internal/escaping.cc180
-rw-r--r--absl/strings/internal/escaping.h58
2 files changed, 238 insertions, 0 deletions
diff --git a/absl/strings/internal/escaping.cc b/absl/strings/internal/escaping.cc
new file mode 100644
index 00000000..c5271286
--- /dev/null
+++ b/absl/strings/internal/escaping.cc
@@ -0,0 +1,180 @@
+// Copyright 2020 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "absl/strings/internal/escaping.h"
+
+#include "absl/base/internal/endian.h"
+#include "absl/base/internal/raw_logging.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace strings_internal {
+
+const char kBase64Chars[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
+
+size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding) {
+ // Base64 encodes three bytes of input at a time. If the input is not
+ // divisible by three, we pad as appropriate.
+ //
+ // (from https://tools.ietf.org/html/rfc3548)
+ // Special processing is performed if fewer than 24 bits are available
+ // at the end of the data being encoded. A full encoding quantum is
+ // always completed at the end of a quantity. When fewer than 24 input
+ // bits are available in an input group, zero bits are added (on the
+ // right) to form an integral number of 6-bit groups. Padding at the
+ // end of the data is performed using the '=' character. Since all base
+ // 64 input is an integral number of octets, only the following cases
+ // can arise:
+
+ // Base64 encodes each three bytes of input into four bytes of output.
+ size_t len = (input_len / 3) * 4;
+
+ if (input_len % 3 == 0) {
+ // (from https://tools.ietf.org/html/rfc3548)
+ // (1) the final quantum of encoding input is an integral multiple of 24
+ // bits; here, the final unit of encoded output will be an integral
+ // multiple of 4 characters with no "=" padding,
+ } else if (input_len % 3 == 1) {
+ // (from https://tools.ietf.org/html/rfc3548)
+ // (2) the final quantum of encoding input is exactly 8 bits; here, the
+ // final unit of encoded output will be two characters followed by two
+ // "=" padding characters, or
+ len += 2;
+ if (do_padding) {
+ len += 2;
+ }
+ } else { // (input_len % 3 == 2)
+ // (from https://tools.ietf.org/html/rfc3548)
+ // (3) the final quantum of encoding input is exactly 16 bits; here, the
+ // final unit of encoded output will be three characters followed by one
+ // "=" padding character.
+ len += 3;
+ if (do_padding) {
+ len += 1;
+ }
+ }
+
+ assert(len >= input_len); // make sure we didn't overflow
+ return len;
+}
+
+size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
+ size_t szdest, const char* base64,
+ bool do_padding) {
+ static const char kPad64 = '=';
+
+ if (szsrc * 4 > szdest * 3) return 0;
+
+ char* cur_dest = dest;
+ const unsigned char* cur_src = src;
+
+ char* const limit_dest = dest + szdest;
+ const unsigned char* const limit_src = src + szsrc;
+
+ // Three bytes of data encodes to four characters of cyphertext.
+ // So we can pump through three-byte chunks atomically.
+ if (szsrc >= 3) { // "limit_src - 3" is UB if szsrc < 3.
+ while (cur_src < limit_src - 3) { // While we have >= 32 bits.
+ uint32_t in = absl::big_endian::Load32(cur_src) >> 8;
+
+ cur_dest[0] = base64[in >> 18];
+ in &= 0x3FFFF;
+ cur_dest[1] = base64[in >> 12];
+ in &= 0xFFF;
+ cur_dest[2] = base64[in >> 6];
+ in &= 0x3F;
+ cur_dest[3] = base64[in];
+
+ cur_dest += 4;
+ cur_src += 3;
+ }
+ }
+ // To save time, we didn't update szdest or szsrc in the loop. So do it now.
+ szdest = limit_dest - cur_dest;
+ szsrc = limit_src - cur_src;
+
+ /* now deal with the tail (<=3 bytes) */
+ switch (szsrc) {
+ case 0:
+ // Nothing left; nothing more to do.
+ break;
+ case 1: {
+ // One byte left: this encodes to two characters, and (optionally)
+ // two pad characters to round out the four-character cypherblock.
+ if (szdest < 2) return 0;
+ uint32_t in = cur_src[0];
+ cur_dest[0] = base64[in >> 2];
+ in &= 0x3;
+ cur_dest[1] = base64[in << 4];
+ cur_dest += 2;
+ szdest -= 2;
+ if (do_padding) {
+ if (szdest < 2) return 0;
+ cur_dest[0] = kPad64;
+ cur_dest[1] = kPad64;
+ cur_dest += 2;
+ szdest -= 2;
+ }
+ break;
+ }
+ case 2: {
+ // Two bytes left: this encodes to three characters, and (optionally)
+ // one pad character to round out the four-character cypherblock.
+ if (szdest < 3) return 0;
+ uint32_t in = absl::big_endian::Load16(cur_src);
+ cur_dest[0] = base64[in >> 10];
+ in &= 0x3FF;
+ cur_dest[1] = base64[in >> 4];
+ in &= 0x00F;
+ cur_dest[2] = base64[in << 2];
+ cur_dest += 3;
+ szdest -= 3;
+ if (do_padding) {
+ if (szdest < 1) return 0;
+ cur_dest[0] = kPad64;
+ cur_dest += 1;
+ szdest -= 1;
+ }
+ break;
+ }
+ case 3: {
+ // Three bytes left: same as in the big loop above. We can't do this in
+ // the loop because the loop above always reads 4 bytes, and the fourth
+ // byte is past the end of the input.
+ if (szdest < 4) return 0;
+ uint32_t in = (cur_src[0] << 16) + absl::big_endian::Load16(cur_src + 1);
+ cur_dest[0] = base64[in >> 18];
+ in &= 0x3FFFF;
+ cur_dest[1] = base64[in >> 12];
+ in &= 0xFFF;
+ cur_dest[2] = base64[in >> 6];
+ in &= 0x3F;
+ cur_dest[3] = base64[in];
+ cur_dest += 4;
+ szdest -= 4;
+ break;
+ }
+ default:
+ // Should not be reached: blocks of 4 bytes are handled
+ // in the while loop before this switch statement.
+ ABSL_RAW_LOG(FATAL, "Logic problem? szsrc = %zu", szsrc);
+ break;
+ }
+ return (cur_dest - dest);
+}
+
+} // namespace strings_internal
+ABSL_NAMESPACE_END
+} // namespace absl
diff --git a/absl/strings/internal/escaping.h b/absl/strings/internal/escaping.h
new file mode 100644
index 00000000..6a9ce602
--- /dev/null
+++ b/absl/strings/internal/escaping.h
@@ -0,0 +1,58 @@
+// Copyright 2020 The Abseil Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// https://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef ABSL_STRINGS_INTERNAL_ESCAPING_H_
+#define ABSL_STRINGS_INTERNAL_ESCAPING_H_
+
+#include <cassert>
+
+#include "absl/strings/internal/resize_uninitialized.h"
+
+namespace absl {
+ABSL_NAMESPACE_BEGIN
+namespace strings_internal {
+
+ABSL_CONST_INIT extern const char kBase64Chars[];
+
+// Calculates how long a string will be when it is base64 encoded given its
+// length and whether or not the result should be padded.
+size_t CalculateBase64EscapedLenInternal(size_t input_len, bool do_padding);
+
+// Base64-encodes `src` using the alphabet provided in `base64` and writes the
+// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
+// until its length is a multiple of 3. Returns the length of `dest`.
+size_t Base64EscapeInternal(const unsigned char* src, size_t szsrc, char* dest,
+ size_t szdest, const char* base64, bool do_padding);
+
+// Base64-encodes `src` using the alphabet provided in `base64` and writes the
+// result to `dest`. If `do_padding` is true, `dest` is padded with '=' chars
+// until its length is a multiple of 3.
+template <typename String>
+void Base64EscapeInternal(const unsigned char* src, size_t szsrc, String* dest,
+ bool do_padding, const char* base64_chars) {
+ const size_t calc_escaped_size =
+ CalculateBase64EscapedLenInternal(szsrc, do_padding);
+ STLStringResizeUninitialized(dest, calc_escaped_size);
+
+ const size_t escaped_len = Base64EscapeInternal(
+ src, szsrc, &(*dest)[0], dest->size(), base64_chars, do_padding);
+ assert(calc_escaped_size == escaped_len);
+ dest->erase(escaped_len);
+}
+
+} // namespace strings_internal
+ABSL_NAMESPACE_END
+} // namespace absl
+
+#endif // ABSL_STRINGS_INTERNAL_ESCAPING_H_