aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/google/protobuf/stubs/structurally_valid.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/google/protobuf/stubs/structurally_valid.cc')
-rw-r--r--src/google/protobuf/stubs/structurally_valid.cc52
1 files changed, 52 insertions, 0 deletions
diff --git a/src/google/protobuf/stubs/structurally_valid.cc b/src/google/protobuf/stubs/structurally_valid.cc
index 0f6afe6d..d79a6ee4 100644
--- a/src/google/protobuf/stubs/structurally_valid.cc
+++ b/src/google/protobuf/stubs/structurally_valid.cc
@@ -3,6 +3,8 @@
#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stringpiece.h>
+
namespace google {
namespace protobuf {
namespace internal {
@@ -531,6 +533,56 @@ bool IsStructurallyValidUTF8(const char* buf, int len) {
return (bytes_consumed == len);
}
+int UTF8SpnStructurallyValid(const StringPiece& str) {
+ if (!module_initialized_) return str.size();
+
+ int bytes_consumed = 0;
+ UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
+ str.data(), str.size(), &bytes_consumed);
+ return bytes_consumed;
+}
+
+// Coerce UTF-8 byte string in src_str to be
+// a structurally-valid equal-length string by selectively
+// overwriting illegal bytes with replace_char (typically blank).
+// replace_char must be legal printable 7-bit Ascii 0x20..0x7e.
+// src_str is read-only. If any overwriting is needed, a modified byte string
+// is created in idst, length isrclen.
+//
+// Returns pointer to output buffer, isrc if no changes were made,
+// or idst if some bytes were changed.
+//
+// Fast case: all is structurally valid and no byte copying is done.
+//
+char* UTF8CoerceToStructurallyValid(const StringPiece& src_str,
+ char* idst,
+ const char replace_char) {
+ const char* isrc = src_str.data();
+ const int len = src_str.length();
+ int n = UTF8SpnStructurallyValid(src_str);
+ if (n == len) { // Normal case -- all is cool, return
+ return const_cast<char*>(isrc);
+ } else { // Unusual case -- copy w/o bad bytes
+ const char* src = isrc;
+ const char* srclimit = isrc + len;
+ char* dst = idst;
+ memmove(dst, src, n); // Copy initial good chunk
+ src += n;
+ dst += n;
+ while (src < srclimit) { // src points to bogus byte or is off the end
+ dst[0] = replace_char; // replace one bad byte
+ src++;
+ dst++;
+ StringPiece str2(src, srclimit - src);
+ n = UTF8SpnStructurallyValid(str2); // scan the remainder
+ memmove(dst, src, n); // copy next good chunk
+ src += n;
+ dst += n;
+ }
+ }
+ return idst;
+}
+
} // namespace internal
} // namespace protobuf
} // namespace google