diff options
Diffstat (limited to 'third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc')
-rw-r--r-- | third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc | 588 |
1 files changed, 0 insertions, 588 deletions
diff --git a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc b/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc deleted file mode 100644 index d79a6ee450..0000000000 --- a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc +++ /dev/null @@ -1,588 +0,0 @@ -// Copyright 2005-2008 Google Inc. All Rights Reserved. -// Author: jrm@google.com (Jim Meehan) - -#include <google/protobuf/stubs/common.h> - -#include <google/protobuf/stubs/stringpiece.h> - -namespace google { -namespace protobuf { -namespace internal { - -// These four-byte entries compactly encode how many bytes 0..255 to delete -// in making a string replacement, how many bytes to add 0..255, and the offset -// 0..64k-1 of the replacement string in remap_string. -struct RemapEntry { - uint8 delete_bytes; - uint8 add_bytes; - uint16 bytes_offset; -}; - -// Exit type codes for state tables. All but the first get stuffed into -// signed one-byte entries. The first is only generated by executable code. -// To distinguish from next-state entries, these must be contiguous and -// all <= kExitNone -typedef enum { - kExitDstSpaceFull = 239, - kExitIllegalStructure, // 240 - kExitOK, // 241 - kExitReject, // ... - kExitReplace1, - kExitReplace2, - kExitReplace3, - kExitReplace21, - kExitReplace31, - kExitReplace32, - kExitReplaceOffset1, - kExitReplaceOffset2, - kExitReplace1S0, - kExitSpecial, - kExitDoAgain, - kExitRejectAlt, - kExitNone // 255 -} ExitReason; - - -// This struct represents one entire state table. The three initialized byte -// areas are state_table, remap_base, and remap_string. state0 and state0_size -// give the byte offset and length within state_table of the initial state -- -// table lookups are expected to start and end in this state, but for -// truncated UTF-8 strings, may end in a different state. These allow a quick -// test for that condition. entry_shift is 8 for tables subscripted by a full -// byte value and 6 for space-optimized tables subscripted by only six -// significant bits in UTF-8 continuation bytes. -typedef struct { - const uint32 state0; - const uint32 state0_size; - const uint32 total_size; - const int max_expand; - const int entry_shift; - const int bytes_per_entry; - const uint32 losub; - const uint32 hiadd; - const uint8* state_table; - const RemapEntry* remap_base; - const uint8* remap_string; - const uint8* fast_state; -} UTF8StateMachineObj; - -typedef UTF8StateMachineObj UTF8ScanObj; - -#define X__ (kExitIllegalStructure) -#define RJ_ (kExitReject) -#define S1_ (kExitReplace1) -#define S2_ (kExitReplace2) -#define S3_ (kExitReplace3) -#define S21 (kExitReplace21) -#define S31 (kExitReplace31) -#define S32 (kExitReplace32) -#define T1_ (kExitReplaceOffset1) -#define T2_ (kExitReplaceOffset2) -#define S11 (kExitReplace1S0) -#define SP_ (kExitSpecial) -#define D__ (kExitDoAgain) -#define RJA (kExitRejectAlt) - -// Entire table has 9 state blocks of 256 entries each -static const unsigned int utf8acceptnonsurrogates_STATE0 = 0; // state[0] -static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256; // =[1] -static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304; -static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0; -static const unsigned int utf8acceptnonsurrogates_SHIFT = 8; -static const unsigned int utf8acceptnonsurrogates_BYTES = 1; -static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020; -static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000; - -static const uint8 utf8acceptnonsurrogates[] = { -// state[0] 0x000000 Byte 1 - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3, - 4, 5, 5, 5, 6, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[1] 0x000080 Byte 2 of 2 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[2] 0x000000 Byte 2 of 3 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[3] 0x001000 Byte 2 of 3 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[4] 0x000000 Byte 2 of 4 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[5] 0x040000 Byte 2 of 4 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[6] 0x100000 Byte 2 of 4 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - - 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[7] 0x00d000 Byte 2 of 3 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -// state[8] 0x00d800 Byte 3 of 3 -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, - -RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, -RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, -RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, -RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, - -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, -}; - -// Remap base[0] = (del, add, string_offset) -static const RemapEntry utf8acceptnonsurrogates_remap_base[] = { -{0, 0, 0} }; - -// Remap string[0] -static const unsigned char utf8acceptnonsurrogates_remap_string[] = { -0 }; - -static const unsigned char utf8acceptnonsurrogates_fast[256] = { -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -}; - -static const UTF8ScanObj utf8acceptnonsurrogates_obj = { - utf8acceptnonsurrogates_STATE0, - utf8acceptnonsurrogates_STATE0_SIZE, - utf8acceptnonsurrogates_TOTAL_SIZE, - utf8acceptnonsurrogates_MAX_EXPAND_X4, - utf8acceptnonsurrogates_SHIFT, - utf8acceptnonsurrogates_BYTES, - utf8acceptnonsurrogates_LOSUB, - utf8acceptnonsurrogates_HIADD, - utf8acceptnonsurrogates, - utf8acceptnonsurrogates_remap_base, - utf8acceptnonsurrogates_remap_string, - utf8acceptnonsurrogates_fast -}; - - -#undef X__ -#undef RJ_ -#undef S1_ -#undef S2_ -#undef S3_ -#undef S21 -#undef S31 -#undef S32 -#undef T1_ -#undef T2_ -#undef S11 -#undef SP_ -#undef D__ -#undef RJA - -// Return true if current Tbl pointer is within state0 range -// Note that unsigned compare checks both ends of range simultaneously -static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) { - const uint8* Tbl0 = &st->state_table[st->state0]; - return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size); -} - -// Scan a UTF-8 string based on state table. -// Always scan complete UTF-8 characters -// Set number of bytes scanned. Return reason for exiting -int UTF8GenericScan(const UTF8ScanObj* st, - const char * str, - int str_length, - int* bytes_consumed) { - *bytes_consumed = 0; - if (str_length == 0) return kExitOK; - - int eshift = st->entry_shift; - const uint8* isrc = reinterpret_cast<const uint8*>(str); - const uint8* src = isrc; - const uint8* srclimit = isrc + str_length; - const uint8* srclimit8 = srclimit - 7; - const uint8* Tbl_0 = &st->state_table[st->state0]; - - DoAgain: - // Do state-table scan - int e = 0; - uint8 c; - const uint8* Tbl2 = &st->fast_state[0]; - const uint32 losub = st->losub; - const uint32 hiadd = st->hiadd; - // Check initial few bytes one at a time until 8-byte aligned - //---------------------------- - while ((((uintptr_t)src & 0x07) != 0) && - (src < srclimit) && - Tbl2[src[0]] == 0) { - src++; - } - if (((uintptr_t)src & 0x07) == 0) { - // Do fast for groups of 8 identity bytes. - // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop, - // including slowing slightly on cr/lf/ht - //---------------------------- - while (src < srclimit8) { - uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0]; - uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1]; - src += 8; - // This is a fast range check for all bytes in [lowsub..0x80-hiadd) - uint32 temp = (s0123 - losub) | (s0123 + hiadd) | - (s4567 - losub) | (s4567 + hiadd); - if ((temp & 0x80808080) != 0) { - // We typically end up here on cr/lf/ht; src was incremented - int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) | - (Tbl2[src[-6]] | Tbl2[src[-5]]); - if (e0123 != 0) { - src -= 8; - break; - } // Exit on Non-interchange - e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) | - (Tbl2[src[-2]] | Tbl2[src[-1]]); - if (e0123 != 0) { - src -= 4; - break; - } // Exit on Non-interchange - // Else OK, go around again - } - } - } - //---------------------------- - - // Byte-at-a-time scan - //---------------------------- - const uint8* Tbl = Tbl_0; - while (src < srclimit) { - c = *src; - e = Tbl[c]; - src++; - if (e >= kExitIllegalStructure) {break;} - Tbl = &Tbl_0[e << eshift]; - } - //---------------------------- - - - // Exit posibilities: - // Some exit code, !state0, back up over last char - // Some exit code, state0, back up one byte exactly - // source consumed, !state0, back up over partial char - // source consumed, state0, exit OK - // For illegal byte in state0, avoid backup up over PREVIOUS char - // For truncated last char, back up to beginning of it - - if (e >= kExitIllegalStructure) { - // Back up over exactly one byte of rejected/illegal UTF-8 character - src--; - // Back up more if needed - if (!InStateZero(st, Tbl)) { - do { - src--; - } while ((src > isrc) && ((src[0] & 0xc0) == 0x80)); - } - } else if (!InStateZero(st, Tbl)) { - // Back up over truncated UTF-8 character - e = kExitIllegalStructure; - do { - src--; - } while ((src > isrc) && ((src[0] & 0xc0) == 0x80)); - } else { - // Normal termination, source fully consumed - e = kExitOK; - } - - if (e == kExitDoAgain) { - // Loop back up to the fast scan - goto DoAgain; - } - - *bytes_consumed = src - isrc; - return e; -} - -int UTF8GenericScanFastAscii(const UTF8ScanObj* st, - const char * str, - int str_length, - int* bytes_consumed) { - *bytes_consumed = 0; - if (str_length == 0) return kExitOK; - - const uint8* isrc = reinterpret_cast<const uint8*>(str); - const uint8* src = isrc; - const uint8* srclimit = isrc + str_length; - const uint8* srclimit8 = srclimit - 7; - int n; - int rest_consumed; - int exit_reason; - do { - // Check initial few bytes one at a time until 8-byte aligned - while ((((uintptr_t)src & 0x07) != 0) && - (src < srclimit) && (src[0] < 0x80)) { - src++; - } - if (((uintptr_t)src & 0x07) == 0) { - while ((src < srclimit8) && - (((reinterpret_cast<const uint32*>(src)[0] | - reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) { - src += 8; - } - } - while ((src < srclimit) && (src[0] < 0x80)) { - src++; - } - // Run state table on the rest - n = src - isrc; - exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed); - src += rest_consumed; - } while ( exit_reason == kExitDoAgain ); - - *bytes_consumed = src - isrc; - return exit_reason; -} - -// Hack: On some compilers the static tables are initialized at startup. -// We can't use them until they are initialized. However, some Protocol -// Buffer parsing happens at static init time and may try to validate -// UTF-8 strings. Since UTF-8 validation is only used for debugging -// anyway, we simply always return success if initialization hasn't -// occurred yet. -namespace { - -bool module_initialized_ = false; - -struct InitDetector { - InitDetector() { - module_initialized_ = true; - } -}; -InitDetector init_detector; - -} // namespace - -bool IsStructurallyValidUTF8(const char* buf, int len) { - if (!module_initialized_) return true; - - int bytes_consumed = 0; - UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj, - buf, len, &bytes_consumed); - return (bytes_consumed == len); -} - -int UTF8SpnStructurallyValid(const StringPiece& str) { - if (!module_initialized_) return str.size(); - - int bytes_consumed = 0; - UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj, - str.data(), str.size(), &bytes_consumed); - return bytes_consumed; -} - -// Coerce UTF-8 byte string in src_str to be -// a structurally-valid equal-length string by selectively -// overwriting illegal bytes with replace_char (typically blank). -// replace_char must be legal printable 7-bit Ascii 0x20..0x7e. -// src_str is read-only. If any overwriting is needed, a modified byte string -// is created in idst, length isrclen. -// -// Returns pointer to output buffer, isrc if no changes were made, -// or idst if some bytes were changed. -// -// Fast case: all is structurally valid and no byte copying is done. -// -char* UTF8CoerceToStructurallyValid(const StringPiece& src_str, - char* idst, - const char replace_char) { - const char* isrc = src_str.data(); - const int len = src_str.length(); - int n = UTF8SpnStructurallyValid(src_str); - if (n == len) { // Normal case -- all is cool, return - return const_cast<char*>(isrc); - } else { // Unusual case -- copy w/o bad bytes - const char* src = isrc; - const char* srclimit = isrc + len; - char* dst = idst; - memmove(dst, src, n); // Copy initial good chunk - src += n; - dst += n; - while (src < srclimit) { // src points to bogus byte or is off the end - dst[0] = replace_char; // replace one bad byte - src++; - dst++; - StringPiece str2(src, srclimit - src); - n = UTF8SpnStructurallyValid(str2); // scan the remainder - memmove(dst, src, n); // copy next good chunk - src += n; - dst += n; - } - } - return idst; -} - -} // namespace internal -} // namespace protobuf -} // namespace google |