aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGravatar Benjamin Barenblat <bbarenblat@gmail.com>2020-10-11 14:36:58 -0400
committerGravatar Benjamin Barenblat <bbarenblat@gmail.com>2020-10-11 14:36:58 -0400
commit3eb7b1a0f958cc4483802889aa9fdbd171e0f807 (patch)
treef8e16323e027615bdb0a7ff94becc19dc58f16a3
parentd06e7a04c43dad29d8bd79ec6f98af07add0181f (diff)
Rewrite encoding routines using AbseilHEADmaster
Abseil makes encoding and decoding a lot easier. Take an Abseil dependency, and rewrite the encoding and decoding routines using it. Closes: https://github.com/bbarenblat/scoville/issues/2
-rw-r--r--build.ninja4
-rw-r--r--encoding.cc133
-rw-r--r--encoding.h5
3 files changed, 67 insertions, 75 deletions
diff --git a/build.ninja b/build.ninja
index a20615b..69012f8 100644
--- a/build.ninja
+++ b/build.ninja
@@ -39,6 +39,6 @@ build posix_extras.o: cxx posix_extras.cc
build scoville.o: cxx scoville.cc
build encoding_test: link encoding.o encoding_test.o
- libs = -lgtest -lgtest_main -labsl_str_format_internal -lglog
+ libs = -lgtest -lgtest_main -lglog -labsl_str_format_internal -labsl_strings -labsl_throw_delegate
build scoville: link encoding.o operations.o posix_extras.o scoville.o
- libs = -lfuse -lglog -lgflags
+ libs = -lfuse -lglog -lgflags -labsl_strings -labsl_throw_delegate
diff --git a/encoding.cc b/encoding.cc
index 702b2c4..a109d33 100644
--- a/encoding.cc
+++ b/encoding.cc
@@ -14,40 +14,21 @@
#include "encoding.h"
-#include <array>
-#include <cstdlib>
-#include <functional>
-#include <ios>
-#include <sstream>
-#include <string>
-
+#include <absl/strings/escaping.h>
+#include <absl/strings/str_cat.h>
+#include <absl/strings/str_join.h>
+#include <absl/strings/str_split.h>
+#include <absl/strings/string_view.h>
#include <glog/logging.h>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
namespace scoville {
namespace {
-void WriteAsciiAsHex(const char c, std::ostringstream* const out) {
- if (1 < sizeof(c) && 0x100 <= c) {
- // Not ASCII!
- throw EncodingFailure("could not encode non-ASCII character '" +
- std::string(1, c) + "'");
- }
- *out << std::hex << static_cast<int>(c);
-}
-
-char ReadHexAsAscii(std::istringstream* const in) {
- std::array<char, 3> hex_str;
- in->get(hex_str.data(), hex_str.size());
- char* decoded_end;
- const char result =
- static_cast<char>(std::strtol(hex_str.data(), &decoded_end, 16));
- if (decoded_end == hex_str.data()) {
- throw DecodingFailure("could not decode invalid hex");
- }
- return result;
-}
-
bool IsVfatBadCharacter(const char c) noexcept {
return (0 <= c && c < 0x20) || c == '*' || c == '?' || c == '<' || c == '>' ||
c == '|' || c == '"' || c == ':' || c == '\\';
@@ -57,61 +38,77 @@ bool IsVfatBadLastCharacter(const char c) noexcept {
return IsVfatBadCharacter(c) || c == '.' || c == ' ';
}
-void EncodeStream(std::istringstream* const in, std::ostringstream* const out) {
- char c;
- while (!in->get(c).eof()) {
- in->peek();
- const bool processing_last_character = in->eof();
-
- if (IsVfatBadCharacter(c) ||
- (processing_last_character && IsVfatBadLastCharacter(c))) {
- *out << '%';
- WriteAsciiAsHex(c, out);
- } else if (c == '%') {
- *out << "%%";
- } else {
- *out << c;
- }
- }
+bool IsValidHex(const char c) noexcept {
+ return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') ||
+ ('a' <= c && c <= 'f');
}
-void DecodeStream(std::istringstream* const in, std::ostringstream* const out) {
- char c;
- while (!in->get(c).eof()) {
- if (c == '%') {
- if (in->peek() == '%') {
- in->ignore();
- *out << "%";
- } else {
- *out << ReadHexAsAscii(in);
- }
+std::string EncodeComponent(absl::string_view in) {
+ std::string out;
+ for (size_t i = 0; i < in.size(); ++i) {
+ absl::string_view c(in.data() + i, 1);
+ if (in[i] == '%') {
+ absl::StrAppend(&out, "%%");
+ } else if (IsVfatBadCharacter(in[i]) ||
+ (i == in.size() - 1 && IsVfatBadLastCharacter(in[i]))) {
+ absl::StrAppend(&out, "%", absl::BytesToHexString(c));
} else {
- *out << c;
+ absl::StrAppend(&out, c);
}
}
-}
-
-std::string TransformString(
- std::function<void(std::istringstream*, std::ostringstream*)> f,
- const std::string& in) {
- std::istringstream in_stream(in);
- std::ostringstream out_stream;
- f(&in_stream, &out_stream);
- return out_stream.str();
+ return out;
}
} // namespace
std::string Encode(const std::string& in) {
- const std::string result = TransformString(EncodeStream, in);
+ std::vector<std::string> out;
+ for (auto component : absl::StrSplit(in, '/')) {
+ out.push_back(EncodeComponent(component));
+ }
+ std::string result = absl::StrJoin(out, "/");
VLOG(1) << "Encode: \"" << in << "\" -> \"" << result << "\"";
return result;
}
std::string Decode(const std::string& in) {
- const std::string result = TransformString(DecodeStream, in);
- VLOG(1) << "Decode: \"" << in << "\" -> \"" << result << "\"";
+ std::string result;
+ for (size_t i = 0; i < in.size(); ++i) {
+ if (in[i] != '%') {
+ // This isn't an escaped byte.
+ absl::StrAppend(&result, absl::string_view(in.data() + i, 1));
+ continue;
+ }
+
+ char x, y;
+
+ // Decode single-byte escapes. There's only one of these ("%%" -> "%").
+ try {
+ x = in.at(i + 1);
+ } catch (std::out_of_range&) {
+ throw DecodingFailure("clipped escape at end of string");
+ }
+ if (x == '%') {
+ absl::StrAppend(&result, "%");
+ ++i;
+ continue;
+ }
+
+ // Decode double-byte escapes.
+ try {
+ y = in.at(i + 2);
+ } catch (std::out_of_range&) {
+ throw DecodingFailure("clipped escape at end of string");
+ }
+ if (!(IsValidHex(x) && IsValidHex(y))) {
+ throw DecodingFailure("clipped escape at end of string");
+ }
+ absl::StrAppend(&result, absl::HexStringToBytes(
+ absl::string_view(in.c_str() + i + 1, 2)));
+ i += 2;
+ }
return result;
+ VLOG(1) << "Decode: \"" << in << "\" -> \"" << result << "\"";
}
-} // scoville
+} // namespace scoville
diff --git a/encoding.h b/encoding.h
index d6e72ea..dfea6a2 100644
--- a/encoding.h
+++ b/encoding.h
@@ -20,11 +20,6 @@
namespace scoville {
-class EncodingFailure : public std::logic_error {
- public:
- using std::logic_error::logic_error;
-};
-
class DecodingFailure : public std::logic_error {
public:
using std::logic_error::logic_error;