diff options
author | 2020-10-11 14:36:58 -0400 | |
---|---|---|
committer | 2020-10-11 14:36:58 -0400 | |
commit | 3eb7b1a0f958cc4483802889aa9fdbd171e0f807 (patch) | |
tree | f8e16323e027615bdb0a7ff94becc19dc58f16a3 | |
parent | d06e7a04c43dad29d8bd79ec6f98af07add0181f (diff) |
Abseil makes encoding and decoding a lot easier. Take an Abseil
dependency, and rewrite the encoding and decoding routines using it.
Closes: https://github.com/bbarenblat/scoville/issues/2
-rw-r--r-- | build.ninja | 4 | ||||
-rw-r--r-- | encoding.cc | 133 | ||||
-rw-r--r-- | encoding.h | 5 |
3 files changed, 67 insertions, 75 deletions
diff --git a/build.ninja b/build.ninja index a20615b..69012f8 100644 --- a/build.ninja +++ b/build.ninja @@ -39,6 +39,6 @@ build posix_extras.o: cxx posix_extras.cc build scoville.o: cxx scoville.cc build encoding_test: link encoding.o encoding_test.o - libs = -lgtest -lgtest_main -labsl_str_format_internal -lglog + libs = -lgtest -lgtest_main -lglog -labsl_str_format_internal -labsl_strings -labsl_throw_delegate build scoville: link encoding.o operations.o posix_extras.o scoville.o - libs = -lfuse -lglog -lgflags + libs = -lfuse -lglog -lgflags -labsl_strings -labsl_throw_delegate diff --git a/encoding.cc b/encoding.cc index 702b2c4..a109d33 100644 --- a/encoding.cc +++ b/encoding.cc @@ -14,40 +14,21 @@ #include "encoding.h" -#include <array> -#include <cstdlib> -#include <functional> -#include <ios> -#include <sstream> -#include <string> - +#include <absl/strings/escaping.h> +#include <absl/strings/str_cat.h> +#include <absl/strings/str_join.h> +#include <absl/strings/str_split.h> +#include <absl/strings/string_view.h> #include <glog/logging.h> +#include <stdexcept> +#include <string> +#include <vector> + namespace scoville { namespace { -void WriteAsciiAsHex(const char c, std::ostringstream* const out) { - if (1 < sizeof(c) && 0x100 <= c) { - // Not ASCII! - throw EncodingFailure("could not encode non-ASCII character '" + - std::string(1, c) + "'"); - } - *out << std::hex << static_cast<int>(c); -} - -char ReadHexAsAscii(std::istringstream* const in) { - std::array<char, 3> hex_str; - in->get(hex_str.data(), hex_str.size()); - char* decoded_end; - const char result = - static_cast<char>(std::strtol(hex_str.data(), &decoded_end, 16)); - if (decoded_end == hex_str.data()) { - throw DecodingFailure("could not decode invalid hex"); - } - return result; -} - bool IsVfatBadCharacter(const char c) noexcept { return (0 <= c && c < 0x20) || c == '*' || c == '?' || c == '<' || c == '>' || c == '|' || c == '"' || c == ':' || c == '\\'; @@ -57,61 +38,77 @@ bool IsVfatBadLastCharacter(const char c) noexcept { return IsVfatBadCharacter(c) || c == '.' || c == ' '; } -void EncodeStream(std::istringstream* const in, std::ostringstream* const out) { - char c; - while (!in->get(c).eof()) { - in->peek(); - const bool processing_last_character = in->eof(); - - if (IsVfatBadCharacter(c) || - (processing_last_character && IsVfatBadLastCharacter(c))) { - *out << '%'; - WriteAsciiAsHex(c, out); - } else if (c == '%') { - *out << "%%"; - } else { - *out << c; - } - } +bool IsValidHex(const char c) noexcept { + return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || + ('a' <= c && c <= 'f'); } -void DecodeStream(std::istringstream* const in, std::ostringstream* const out) { - char c; - while (!in->get(c).eof()) { - if (c == '%') { - if (in->peek() == '%') { - in->ignore(); - *out << "%"; - } else { - *out << ReadHexAsAscii(in); - } +std::string EncodeComponent(absl::string_view in) { + std::string out; + for (size_t i = 0; i < in.size(); ++i) { + absl::string_view c(in.data() + i, 1); + if (in[i] == '%') { + absl::StrAppend(&out, "%%"); + } else if (IsVfatBadCharacter(in[i]) || + (i == in.size() - 1 && IsVfatBadLastCharacter(in[i]))) { + absl::StrAppend(&out, "%", absl::BytesToHexString(c)); } else { - *out << c; + absl::StrAppend(&out, c); } } -} - -std::string TransformString( - std::function<void(std::istringstream*, std::ostringstream*)> f, - const std::string& in) { - std::istringstream in_stream(in); - std::ostringstream out_stream; - f(&in_stream, &out_stream); - return out_stream.str(); + return out; } } // namespace std::string Encode(const std::string& in) { - const std::string result = TransformString(EncodeStream, in); + std::vector<std::string> out; + for (auto component : absl::StrSplit(in, '/')) { + out.push_back(EncodeComponent(component)); + } + std::string result = absl::StrJoin(out, "/"); VLOG(1) << "Encode: \"" << in << "\" -> \"" << result << "\""; return result; } std::string Decode(const std::string& in) { - const std::string result = TransformString(DecodeStream, in); - VLOG(1) << "Decode: \"" << in << "\" -> \"" << result << "\""; + std::string result; + for (size_t i = 0; i < in.size(); ++i) { + if (in[i] != '%') { + // This isn't an escaped byte. + absl::StrAppend(&result, absl::string_view(in.data() + i, 1)); + continue; + } + + char x, y; + + // Decode single-byte escapes. There's only one of these ("%%" -> "%"). + try { + x = in.at(i + 1); + } catch (std::out_of_range&) { + throw DecodingFailure("clipped escape at end of string"); + } + if (x == '%') { + absl::StrAppend(&result, "%"); + ++i; + continue; + } + + // Decode double-byte escapes. + try { + y = in.at(i + 2); + } catch (std::out_of_range&) { + throw DecodingFailure("clipped escape at end of string"); + } + if (!(IsValidHex(x) && IsValidHex(y))) { + throw DecodingFailure("clipped escape at end of string"); + } + absl::StrAppend(&result, absl::HexStringToBytes( + absl::string_view(in.c_str() + i + 1, 2))); + i += 2; + } return result; + VLOG(1) << "Decode: \"" << in << "\" -> \"" << result << "\""; } -} // scoville +} // namespace scoville @@ -20,11 +20,6 @@ namespace scoville { -class EncodingFailure : public std::logic_error { - public: - using std::logic_error::logic_error; -}; - class DecodingFailure : public std::logic_error { public: using std::logic_error::logic_error; |