From 3eb7b1a0f958cc4483802889aa9fdbd171e0f807 Mon Sep 17 00:00:00 2001 From: Benjamin Barenblat Date: Sun, 11 Oct 2020 14:36:58 -0400 Subject: Rewrite encoding routines using Abseil Abseil makes encoding and decoding a lot easier. Take an Abseil dependency, and rewrite the encoding and decoding routines using it. Closes: https://github.com/bbarenblat/scoville/issues/2 --- build.ninja | 4 +- encoding.cc | 133 +++++++++++++++++++++++++++++------------------------------- encoding.h | 5 --- 3 files changed, 67 insertions(+), 75 deletions(-) diff --git a/build.ninja b/build.ninja index a20615b..69012f8 100644 --- a/build.ninja +++ b/build.ninja @@ -39,6 +39,6 @@ build posix_extras.o: cxx posix_extras.cc build scoville.o: cxx scoville.cc build encoding_test: link encoding.o encoding_test.o - libs = -lgtest -lgtest_main -labsl_str_format_internal -lglog + libs = -lgtest -lgtest_main -lglog -labsl_str_format_internal -labsl_strings -labsl_throw_delegate build scoville: link encoding.o operations.o posix_extras.o scoville.o - libs = -lfuse -lglog -lgflags + libs = -lfuse -lglog -lgflags -labsl_strings -labsl_throw_delegate diff --git a/encoding.cc b/encoding.cc index 702b2c4..a109d33 100644 --- a/encoding.cc +++ b/encoding.cc @@ -14,40 +14,21 @@ #include "encoding.h" -#include -#include -#include -#include -#include -#include - +#include +#include +#include +#include +#include #include +#include +#include +#include + namespace scoville { namespace { -void WriteAsciiAsHex(const char c, std::ostringstream* const out) { - if (1 < sizeof(c) && 0x100 <= c) { - // Not ASCII! - throw EncodingFailure("could not encode non-ASCII character '" + - std::string(1, c) + "'"); - } - *out << std::hex << static_cast(c); -} - -char ReadHexAsAscii(std::istringstream* const in) { - std::array hex_str; - in->get(hex_str.data(), hex_str.size()); - char* decoded_end; - const char result = - static_cast(std::strtol(hex_str.data(), &decoded_end, 16)); - if (decoded_end == hex_str.data()) { - throw DecodingFailure("could not decode invalid hex"); - } - return result; -} - bool IsVfatBadCharacter(const char c) noexcept { return (0 <= c && c < 0x20) || c == '*' || c == '?' || c == '<' || c == '>' || c == '|' || c == '"' || c == ':' || c == '\\'; @@ -57,61 +38,77 @@ bool IsVfatBadLastCharacter(const char c) noexcept { return IsVfatBadCharacter(c) || c == '.' || c == ' '; } -void EncodeStream(std::istringstream* const in, std::ostringstream* const out) { - char c; - while (!in->get(c).eof()) { - in->peek(); - const bool processing_last_character = in->eof(); - - if (IsVfatBadCharacter(c) || - (processing_last_character && IsVfatBadLastCharacter(c))) { - *out << '%'; - WriteAsciiAsHex(c, out); - } else if (c == '%') { - *out << "%%"; - } else { - *out << c; - } - } +bool IsValidHex(const char c) noexcept { + return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || + ('a' <= c && c <= 'f'); } -void DecodeStream(std::istringstream* const in, std::ostringstream* const out) { - char c; - while (!in->get(c).eof()) { - if (c == '%') { - if (in->peek() == '%') { - in->ignore(); - *out << "%"; - } else { - *out << ReadHexAsAscii(in); - } +std::string EncodeComponent(absl::string_view in) { + std::string out; + for (size_t i = 0; i < in.size(); ++i) { + absl::string_view c(in.data() + i, 1); + if (in[i] == '%') { + absl::StrAppend(&out, "%%"); + } else if (IsVfatBadCharacter(in[i]) || + (i == in.size() - 1 && IsVfatBadLastCharacter(in[i]))) { + absl::StrAppend(&out, "%", absl::BytesToHexString(c)); } else { - *out << c; + absl::StrAppend(&out, c); } } -} - -std::string TransformString( - std::function f, - const std::string& in) { - std::istringstream in_stream(in); - std::ostringstream out_stream; - f(&in_stream, &out_stream); - return out_stream.str(); + return out; } } // namespace std::string Encode(const std::string& in) { - const std::string result = TransformString(EncodeStream, in); + std::vector out; + for (auto component : absl::StrSplit(in, '/')) { + out.push_back(EncodeComponent(component)); + } + std::string result = absl::StrJoin(out, "/"); VLOG(1) << "Encode: \"" << in << "\" -> \"" << result << "\""; return result; } std::string Decode(const std::string& in) { - const std::string result = TransformString(DecodeStream, in); - VLOG(1) << "Decode: \"" << in << "\" -> \"" << result << "\""; + std::string result; + for (size_t i = 0; i < in.size(); ++i) { + if (in[i] != '%') { + // This isn't an escaped byte. + absl::StrAppend(&result, absl::string_view(in.data() + i, 1)); + continue; + } + + char x, y; + + // Decode single-byte escapes. There's only one of these ("%%" -> "%"). + try { + x = in.at(i + 1); + } catch (std::out_of_range&) { + throw DecodingFailure("clipped escape at end of string"); + } + if (x == '%') { + absl::StrAppend(&result, "%"); + ++i; + continue; + } + + // Decode double-byte escapes. + try { + y = in.at(i + 2); + } catch (std::out_of_range&) { + throw DecodingFailure("clipped escape at end of string"); + } + if (!(IsValidHex(x) && IsValidHex(y))) { + throw DecodingFailure("clipped escape at end of string"); + } + absl::StrAppend(&result, absl::HexStringToBytes( + absl::string_view(in.c_str() + i + 1, 2))); + i += 2; + } return result; + VLOG(1) << "Decode: \"" << in << "\" -> \"" << result << "\""; } -} // scoville +} // namespace scoville diff --git a/encoding.h b/encoding.h index d6e72ea..dfea6a2 100644 --- a/encoding.h +++ b/encoding.h @@ -20,11 +20,6 @@ namespace scoville { -class EncodingFailure : public std::logic_error { - public: - using std::logic_error::logic_error; -}; - class DecodingFailure : public std::logic_error { public: using std::logic_error::logic_error; -- cgit v1.2.3