aboutsummaryrefslogtreecommitdiffhomepage
path: root/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc')
-rw-r--r--third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc588
1 files changed, 0 insertions, 588 deletions
diff --git a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc b/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc
deleted file mode 100644
index d79a6ee450..0000000000
--- a/third_party/protobuf/src/google/protobuf/stubs/structurally_valid.cc
+++ /dev/null
@@ -1,588 +0,0 @@
-// Copyright 2005-2008 Google Inc. All Rights Reserved.
-// Author: jrm@google.com (Jim Meehan)
-
-#include <google/protobuf/stubs/common.h>
-
-#include <google/protobuf/stubs/stringpiece.h>
-
-namespace google {
-namespace protobuf {
-namespace internal {
-
-// These four-byte entries compactly encode how many bytes 0..255 to delete
-// in making a string replacement, how many bytes to add 0..255, and the offset
-// 0..64k-1 of the replacement string in remap_string.
-struct RemapEntry {
- uint8 delete_bytes;
- uint8 add_bytes;
- uint16 bytes_offset;
-};
-
-// Exit type codes for state tables. All but the first get stuffed into
-// signed one-byte entries. The first is only generated by executable code.
-// To distinguish from next-state entries, these must be contiguous and
-// all <= kExitNone
-typedef enum {
- kExitDstSpaceFull = 239,
- kExitIllegalStructure, // 240
- kExitOK, // 241
- kExitReject, // ...
- kExitReplace1,
- kExitReplace2,
- kExitReplace3,
- kExitReplace21,
- kExitReplace31,
- kExitReplace32,
- kExitReplaceOffset1,
- kExitReplaceOffset2,
- kExitReplace1S0,
- kExitSpecial,
- kExitDoAgain,
- kExitRejectAlt,
- kExitNone // 255
-} ExitReason;
-
-
-// This struct represents one entire state table. The three initialized byte
-// areas are state_table, remap_base, and remap_string. state0 and state0_size
-// give the byte offset and length within state_table of the initial state --
-// table lookups are expected to start and end in this state, but for
-// truncated UTF-8 strings, may end in a different state. These allow a quick
-// test for that condition. entry_shift is 8 for tables subscripted by a full
-// byte value and 6 for space-optimized tables subscripted by only six
-// significant bits in UTF-8 continuation bytes.
-typedef struct {
- const uint32 state0;
- const uint32 state0_size;
- const uint32 total_size;
- const int max_expand;
- const int entry_shift;
- const int bytes_per_entry;
- const uint32 losub;
- const uint32 hiadd;
- const uint8* state_table;
- const RemapEntry* remap_base;
- const uint8* remap_string;
- const uint8* fast_state;
-} UTF8StateMachineObj;
-
-typedef UTF8StateMachineObj UTF8ScanObj;
-
-#define X__ (kExitIllegalStructure)
-#define RJ_ (kExitReject)
-#define S1_ (kExitReplace1)
-#define S2_ (kExitReplace2)
-#define S3_ (kExitReplace3)
-#define S21 (kExitReplace21)
-#define S31 (kExitReplace31)
-#define S32 (kExitReplace32)
-#define T1_ (kExitReplaceOffset1)
-#define T2_ (kExitReplaceOffset2)
-#define S11 (kExitReplace1S0)
-#define SP_ (kExitSpecial)
-#define D__ (kExitDoAgain)
-#define RJA (kExitRejectAlt)
-
-// Entire table has 9 state blocks of 256 entries each
-static const unsigned int utf8acceptnonsurrogates_STATE0 = 0; // state[0]
-static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256; // =[1]
-static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304;
-static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0;
-static const unsigned int utf8acceptnonsurrogates_SHIFT = 8;
-static const unsigned int utf8acceptnonsurrogates_BYTES = 1;
-static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020;
-static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000;
-
-static const uint8 utf8acceptnonsurrogates[] = {
-// state[0] 0x000000 Byte 1
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 3,
- 4, 5, 5, 5, 6, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[1] 0x000080 Byte 2 of 2
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[2] 0x000000 Byte 2 of 3
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[3] 0x001000 Byte 2 of 3
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[4] 0x000000 Byte 2 of 4
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[5] 0x040000 Byte 2 of 4
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[6] 0x100000 Byte 2 of 4
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
- 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[7] 0x00d000 Byte 2 of 3
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
- 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-// state[8] 0x00d800 Byte 3 of 3
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-
-RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
-RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
-RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
-RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
-
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__, X__,
-};
-
-// Remap base[0] = (del, add, string_offset)
-static const RemapEntry utf8acceptnonsurrogates_remap_base[] = {
-{0, 0, 0} };
-
-// Remap string[0]
-static const unsigned char utf8acceptnonsurrogates_remap_string[] = {
-0 };
-
-static const unsigned char utf8acceptnonsurrogates_fast[256] = {
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-};
-
-static const UTF8ScanObj utf8acceptnonsurrogates_obj = {
- utf8acceptnonsurrogates_STATE0,
- utf8acceptnonsurrogates_STATE0_SIZE,
- utf8acceptnonsurrogates_TOTAL_SIZE,
- utf8acceptnonsurrogates_MAX_EXPAND_X4,
- utf8acceptnonsurrogates_SHIFT,
- utf8acceptnonsurrogates_BYTES,
- utf8acceptnonsurrogates_LOSUB,
- utf8acceptnonsurrogates_HIADD,
- utf8acceptnonsurrogates,
- utf8acceptnonsurrogates_remap_base,
- utf8acceptnonsurrogates_remap_string,
- utf8acceptnonsurrogates_fast
-};
-
-
-#undef X__
-#undef RJ_
-#undef S1_
-#undef S2_
-#undef S3_
-#undef S21
-#undef S31
-#undef S32
-#undef T1_
-#undef T2_
-#undef S11
-#undef SP_
-#undef D__
-#undef RJA
-
-// Return true if current Tbl pointer is within state0 range
-// Note that unsigned compare checks both ends of range simultaneously
-static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) {
- const uint8* Tbl0 = &st->state_table[st->state0];
- return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size);
-}
-
-// Scan a UTF-8 string based on state table.
-// Always scan complete UTF-8 characters
-// Set number of bytes scanned. Return reason for exiting
-int UTF8GenericScan(const UTF8ScanObj* st,
- const char * str,
- int str_length,
- int* bytes_consumed) {
- *bytes_consumed = 0;
- if (str_length == 0) return kExitOK;
-
- int eshift = st->entry_shift;
- const uint8* isrc = reinterpret_cast<const uint8*>(str);
- const uint8* src = isrc;
- const uint8* srclimit = isrc + str_length;
- const uint8* srclimit8 = srclimit - 7;
- const uint8* Tbl_0 = &st->state_table[st->state0];
-
- DoAgain:
- // Do state-table scan
- int e = 0;
- uint8 c;
- const uint8* Tbl2 = &st->fast_state[0];
- const uint32 losub = st->losub;
- const uint32 hiadd = st->hiadd;
- // Check initial few bytes one at a time until 8-byte aligned
- //----------------------------
- while ((((uintptr_t)src & 0x07) != 0) &&
- (src < srclimit) &&
- Tbl2[src[0]] == 0) {
- src++;
- }
- if (((uintptr_t)src & 0x07) == 0) {
- // Do fast for groups of 8 identity bytes.
- // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop,
- // including slowing slightly on cr/lf/ht
- //----------------------------
- while (src < srclimit8) {
- uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0];
- uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1];
- src += 8;
- // This is a fast range check for all bytes in [lowsub..0x80-hiadd)
- uint32 temp = (s0123 - losub) | (s0123 + hiadd) |
- (s4567 - losub) | (s4567 + hiadd);
- if ((temp & 0x80808080) != 0) {
- // We typically end up here on cr/lf/ht; src was incremented
- int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) |
- (Tbl2[src[-6]] | Tbl2[src[-5]]);
- if (e0123 != 0) {
- src -= 8;
- break;
- } // Exit on Non-interchange
- e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) |
- (Tbl2[src[-2]] | Tbl2[src[-1]]);
- if (e0123 != 0) {
- src -= 4;
- break;
- } // Exit on Non-interchange
- // Else OK, go around again
- }
- }
- }
- //----------------------------
-
- // Byte-at-a-time scan
- //----------------------------
- const uint8* Tbl = Tbl_0;
- while (src < srclimit) {
- c = *src;
- e = Tbl[c];
- src++;
- if (e >= kExitIllegalStructure) {break;}
- Tbl = &Tbl_0[e << eshift];
- }
- //----------------------------
-
-
- // Exit posibilities:
- // Some exit code, !state0, back up over last char
- // Some exit code, state0, back up one byte exactly
- // source consumed, !state0, back up over partial char
- // source consumed, state0, exit OK
- // For illegal byte in state0, avoid backup up over PREVIOUS char
- // For truncated last char, back up to beginning of it
-
- if (e >= kExitIllegalStructure) {
- // Back up over exactly one byte of rejected/illegal UTF-8 character
- src--;
- // Back up more if needed
- if (!InStateZero(st, Tbl)) {
- do {
- src--;
- } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
- }
- } else if (!InStateZero(st, Tbl)) {
- // Back up over truncated UTF-8 character
- e = kExitIllegalStructure;
- do {
- src--;
- } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
- } else {
- // Normal termination, source fully consumed
- e = kExitOK;
- }
-
- if (e == kExitDoAgain) {
- // Loop back up to the fast scan
- goto DoAgain;
- }
-
- *bytes_consumed = src - isrc;
- return e;
-}
-
-int UTF8GenericScanFastAscii(const UTF8ScanObj* st,
- const char * str,
- int str_length,
- int* bytes_consumed) {
- *bytes_consumed = 0;
- if (str_length == 0) return kExitOK;
-
- const uint8* isrc = reinterpret_cast<const uint8*>(str);
- const uint8* src = isrc;
- const uint8* srclimit = isrc + str_length;
- const uint8* srclimit8 = srclimit - 7;
- int n;
- int rest_consumed;
- int exit_reason;
- do {
- // Check initial few bytes one at a time until 8-byte aligned
- while ((((uintptr_t)src & 0x07) != 0) &&
- (src < srclimit) && (src[0] < 0x80)) {
- src++;
- }
- if (((uintptr_t)src & 0x07) == 0) {
- while ((src < srclimit8) &&
- (((reinterpret_cast<const uint32*>(src)[0] |
- reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) {
- src += 8;
- }
- }
- while ((src < srclimit) && (src[0] < 0x80)) {
- src++;
- }
- // Run state table on the rest
- n = src - isrc;
- exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed);
- src += rest_consumed;
- } while ( exit_reason == kExitDoAgain );
-
- *bytes_consumed = src - isrc;
- return exit_reason;
-}
-
-// Hack: On some compilers the static tables are initialized at startup.
-// We can't use them until they are initialized. However, some Protocol
-// Buffer parsing happens at static init time and may try to validate
-// UTF-8 strings. Since UTF-8 validation is only used for debugging
-// anyway, we simply always return success if initialization hasn't
-// occurred yet.
-namespace {
-
-bool module_initialized_ = false;
-
-struct InitDetector {
- InitDetector() {
- module_initialized_ = true;
- }
-};
-InitDetector init_detector;
-
-} // namespace
-
-bool IsStructurallyValidUTF8(const char* buf, int len) {
- if (!module_initialized_) return true;
-
- int bytes_consumed = 0;
- UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
- buf, len, &bytes_consumed);
- return (bytes_consumed == len);
-}
-
-int UTF8SpnStructurallyValid(const StringPiece& str) {
- if (!module_initialized_) return str.size();
-
- int bytes_consumed = 0;
- UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
- str.data(), str.size(), &bytes_consumed);
- return bytes_consumed;
-}
-
-// Coerce UTF-8 byte string in src_str to be
-// a structurally-valid equal-length string by selectively
-// overwriting illegal bytes with replace_char (typically blank).
-// replace_char must be legal printable 7-bit Ascii 0x20..0x7e.
-// src_str is read-only. If any overwriting is needed, a modified byte string
-// is created in idst, length isrclen.
-//
-// Returns pointer to output buffer, isrc if no changes were made,
-// or idst if some bytes were changed.
-//
-// Fast case: all is structurally valid and no byte copying is done.
-//
-char* UTF8CoerceToStructurallyValid(const StringPiece& src_str,
- char* idst,
- const char replace_char) {
- const char* isrc = src_str.data();
- const int len = src_str.length();
- int n = UTF8SpnStructurallyValid(src_str);
- if (n == len) { // Normal case -- all is cool, return
- return const_cast<char*>(isrc);
- } else { // Unusual case -- copy w/o bad bytes
- const char* src = isrc;
- const char* srclimit = isrc + len;
- char* dst = idst;
- memmove(dst, src, n); // Copy initial good chunk
- src += n;
- dst += n;
- while (src < srclimit) { // src points to bogus byte or is off the end
- dst[0] = replace_char; // replace one bad byte
- src++;
- dst++;
- StringPiece str2(src, srclimit - src);
- n = UTF8SpnStructurallyValid(str2); // scan the remainder
- memmove(dst, src, n); // copy next good chunk
- src += n;
- dst += n;
- }
- }
- return idst;
-}
-
-} // namespace internal
-} // namespace protobuf
-} // namespace google