Squashed 'third_party/protobuf/' content from commit e35e248

Change-Id: I6cbe123d09fe50fdcad0e51466665daeee7433c7
git-subtree-dir: third_party/protobuf
git-subtree-split: e35e24800fb8d694bdeea5fd63dc7d1b14d68723
diff --git a/src/google/protobuf/stubs/structurally_valid.cc b/src/google/protobuf/stubs/structurally_valid.cc
new file mode 100644
index 0000000..d79a6ee
--- /dev/null
+++ b/src/google/protobuf/stubs/structurally_valid.cc
@@ -0,0 +1,588 @@
+// Copyright 2005-2008 Google Inc. All Rights Reserved.
+// Author: jrm@google.com (Jim Meehan)
+
+#include <google/protobuf/stubs/common.h>
+
+#include <google/protobuf/stubs/stringpiece.h>
+
+namespace google {
+namespace protobuf {
+namespace internal {
+
+// These four-byte entries compactly encode how many bytes 0..255 to delete
+// in making a string replacement, how many bytes to add 0..255, and the offset
+// 0..64k-1 of the replacement string in remap_string.
+struct RemapEntry {
+  uint8 delete_bytes;
+  uint8 add_bytes;
+  uint16 bytes_offset;
+};
+
+// Exit type codes for state tables. All but the first get stuffed into
+// signed one-byte entries. The first is only generated by executable code.
+// To distinguish from next-state entries, these must be contiguous and
+// all <= kExitNone
+typedef enum {
+  kExitDstSpaceFull = 239,
+  kExitIllegalStructure,  // 240
+  kExitOK,                // 241
+  kExitReject,            // ...
+  kExitReplace1,
+  kExitReplace2,
+  kExitReplace3,
+  kExitReplace21,
+  kExitReplace31,
+  kExitReplace32,
+  kExitReplaceOffset1,
+  kExitReplaceOffset2,
+  kExitReplace1S0,
+  kExitSpecial,
+  kExitDoAgain,
+  kExitRejectAlt,
+  kExitNone               // 255
+} ExitReason;
+
+
+// This struct represents one entire state table. The three initialized byte
+// areas are state_table, remap_base, and remap_string. state0 and state0_size
+// give the byte offset and length within state_table of the initial state --
+// table lookups are expected to start and end in this state, but for
+// truncated UTF-8 strings, may end in a different state. These allow a quick
+// test for that condition. entry_shift is 8 for tables subscripted by a full
+// byte value and 6 for space-optimized tables subscripted by only six
+// significant bits in UTF-8 continuation bytes.
+typedef struct {
+  const uint32 state0;
+  const uint32 state0_size;
+  const uint32 total_size;
+  const int max_expand;
+  const int entry_shift;
+  const int bytes_per_entry;
+  const uint32 losub;
+  const uint32 hiadd;
+  const uint8* state_table;
+  const RemapEntry* remap_base;
+  const uint8* remap_string;
+  const uint8* fast_state;
+} UTF8StateMachineObj;
+
+typedef UTF8StateMachineObj UTF8ScanObj;
+
+#define X__ (kExitIllegalStructure)
+#define RJ_ (kExitReject)
+#define S1_ (kExitReplace1)
+#define S2_ (kExitReplace2)
+#define S3_ (kExitReplace3)
+#define S21 (kExitReplace21)
+#define S31 (kExitReplace31)
+#define S32 (kExitReplace32)
+#define T1_ (kExitReplaceOffset1)
+#define T2_ (kExitReplaceOffset2)
+#define S11 (kExitReplace1S0)
+#define SP_ (kExitSpecial)
+#define D__ (kExitDoAgain)
+#define RJA (kExitRejectAlt)
+
+//  Entire table has 9 state blocks of 256 entries each
+static const unsigned int utf8acceptnonsurrogates_STATE0 = 0;     // state[0]
+static const unsigned int utf8acceptnonsurrogates_STATE0_SIZE = 256;  // =[1]
+static const unsigned int utf8acceptnonsurrogates_TOTAL_SIZE = 2304;
+static const unsigned int utf8acceptnonsurrogates_MAX_EXPAND_X4 = 0;
+static const unsigned int utf8acceptnonsurrogates_SHIFT = 8;
+static const unsigned int utf8acceptnonsurrogates_BYTES = 1;
+static const unsigned int utf8acceptnonsurrogates_LOSUB = 0x20202020;
+static const unsigned int utf8acceptnonsurrogates_HIADD = 0x00000000;
+
+static const uint8 utf8acceptnonsurrogates[] = {
+// state[0] 0x000000 Byte 1
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  2,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   7,   3,   3,
+  4,   5,   5,   5,   6, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[1] 0x000080 Byte 2 of 2
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+  0,   0,   0,   0,   0,   0,   0,   0,    0,   0,   0,   0,   0,   0,   0,   0,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[2] 0x000000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[3] 0x001000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[4] 0x000000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[5] 0x040000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[6] 0x100000 Byte 2 of 4
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  3,   3,   3,   3,   3,   3,   3,   3,    3,   3,   3,   3,   3,   3,   3,   3,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[7] 0x00d000 Byte 2 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  1,   1,   1,   1,   1,   1,   1,   1,    1,   1,   1,   1,   1,   1,   1,   1,
+  8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
+  8,   8,   8,   8,   8,   8,   8,   8,    8,   8,   8,   8,   8,   8,   8,   8,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+// state[8] 0x00d800 Byte 3 of 3
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,  RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_, RJ_,
+
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+X__, X__, X__, X__, X__, X__, X__, X__,  X__, X__, X__, X__, X__, X__, X__, X__,
+};
+
+// Remap base[0] = (del, add, string_offset)
+static const RemapEntry utf8acceptnonsurrogates_remap_base[] = {
+{0, 0, 0} };
+
+// Remap string[0]
+static const unsigned char utf8acceptnonsurrogates_remap_string[] = {
+0 };
+
+static const unsigned char utf8acceptnonsurrogates_fast[256] = {
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+0, 0, 0, 0, 0, 0, 0, 0,  0, 0, 0, 0, 0, 0, 0, 0,
+
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+1, 1, 1, 1, 1, 1, 1, 1,  1, 1, 1, 1, 1, 1, 1, 1,
+};
+
+static const UTF8ScanObj utf8acceptnonsurrogates_obj = {
+  utf8acceptnonsurrogates_STATE0,
+  utf8acceptnonsurrogates_STATE0_SIZE,
+  utf8acceptnonsurrogates_TOTAL_SIZE,
+  utf8acceptnonsurrogates_MAX_EXPAND_X4,
+  utf8acceptnonsurrogates_SHIFT,
+  utf8acceptnonsurrogates_BYTES,
+  utf8acceptnonsurrogates_LOSUB,
+  utf8acceptnonsurrogates_HIADD,
+  utf8acceptnonsurrogates,
+  utf8acceptnonsurrogates_remap_base,
+  utf8acceptnonsurrogates_remap_string,
+  utf8acceptnonsurrogates_fast
+};
+
+
+#undef X__
+#undef RJ_
+#undef S1_
+#undef S2_
+#undef S3_
+#undef S21
+#undef S31
+#undef S32
+#undef T1_
+#undef T2_
+#undef S11
+#undef SP_
+#undef D__
+#undef RJA
+
+// Return true if current Tbl pointer is within state0 range
+// Note that unsigned compare checks both ends of range simultaneously
+static inline bool InStateZero(const UTF8ScanObj* st, const uint8* Tbl) {
+  const uint8* Tbl0 = &st->state_table[st->state0];
+  return (static_cast<uint32>(Tbl - Tbl0) < st->state0_size);
+}
+
+// Scan a UTF-8 string based on state table.
+// Always scan complete UTF-8 characters
+// Set number of bytes scanned. Return reason for exiting
+int UTF8GenericScan(const UTF8ScanObj* st,
+                    const char * str,
+                    int str_length,
+                    int* bytes_consumed) {
+  *bytes_consumed = 0;
+  if (str_length == 0) return kExitOK;
+
+  int eshift = st->entry_shift;
+  const uint8* isrc = reinterpret_cast<const uint8*>(str);
+  const uint8* src = isrc;
+  const uint8* srclimit = isrc + str_length;
+  const uint8* srclimit8 = srclimit - 7;
+  const uint8* Tbl_0 = &st->state_table[st->state0];
+
+ DoAgain:
+  // Do state-table scan
+  int e = 0;
+  uint8 c;
+  const uint8* Tbl2 = &st->fast_state[0];
+  const uint32 losub = st->losub;
+  const uint32 hiadd = st->hiadd;
+  // Check initial few bytes one at a time until 8-byte aligned
+  //----------------------------
+  while ((((uintptr_t)src & 0x07) != 0) &&
+         (src < srclimit) &&
+         Tbl2[src[0]] == 0) {
+    src++;
+  }
+  if (((uintptr_t)src & 0x07) == 0) {
+    // Do fast for groups of 8 identity bytes.
+    // This covers a lot of 7-bit ASCII ~8x faster then the 1-byte loop,
+    // including slowing slightly on cr/lf/ht
+    //----------------------------
+    while (src < srclimit8) {
+      uint32 s0123 = (reinterpret_cast<const uint32 *>(src))[0];
+      uint32 s4567 = (reinterpret_cast<const uint32 *>(src))[1];
+      src += 8;
+      // This is a fast range check for all bytes in [lowsub..0x80-hiadd)
+      uint32 temp = (s0123 - losub) | (s0123 + hiadd) |
+                    (s4567 - losub) | (s4567 + hiadd);
+      if ((temp & 0x80808080) != 0) {
+        // We typically end up here on cr/lf/ht; src was incremented
+        int e0123 = (Tbl2[src[-8]] | Tbl2[src[-7]]) |
+                    (Tbl2[src[-6]] | Tbl2[src[-5]]);
+        if (e0123 != 0) {
+          src -= 8;
+          break;
+        }    // Exit on Non-interchange
+        e0123 = (Tbl2[src[-4]] | Tbl2[src[-3]]) |
+                (Tbl2[src[-2]] | Tbl2[src[-1]]);
+        if (e0123 != 0) {
+          src -= 4;
+          break;
+        }    // Exit on Non-interchange
+        // Else OK, go around again
+      }
+    }
+  }
+  //----------------------------
+
+  // Byte-at-a-time scan
+  //----------------------------
+  const uint8* Tbl = Tbl_0;
+  while (src < srclimit) {
+    c = *src;
+    e = Tbl[c];
+    src++;
+    if (e >= kExitIllegalStructure) {break;}
+    Tbl = &Tbl_0[e << eshift];
+  }
+  //----------------------------
+
+
+  // Exit posibilities:
+  //  Some exit code, !state0, back up over last char
+  //  Some exit code, state0, back up one byte exactly
+  //  source consumed, !state0, back up over partial char
+  //  source consumed, state0, exit OK
+  // For illegal byte in state0, avoid backup up over PREVIOUS char
+  // For truncated last char, back up to beginning of it
+
+  if (e >= kExitIllegalStructure) {
+    // Back up over exactly one byte of rejected/illegal UTF-8 character
+    src--;
+    // Back up more if needed
+    if (!InStateZero(st, Tbl)) {
+      do {
+        src--;
+      } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
+    }
+  } else if (!InStateZero(st, Tbl)) {
+    // Back up over truncated UTF-8 character
+    e = kExitIllegalStructure;
+    do {
+      src--;
+    } while ((src > isrc) && ((src[0] & 0xc0) == 0x80));
+  } else {
+    // Normal termination, source fully consumed
+    e = kExitOK;
+  }
+
+  if (e == kExitDoAgain) {
+    // Loop back up to the fast scan
+    goto DoAgain;
+  }
+
+  *bytes_consumed = src - isrc;
+  return e;
+}
+
+int UTF8GenericScanFastAscii(const UTF8ScanObj* st,
+                    const char * str,
+                    int str_length,
+                    int* bytes_consumed) {
+  *bytes_consumed = 0;
+  if (str_length == 0) return kExitOK;
+
+  const uint8* isrc =  reinterpret_cast<const uint8*>(str);
+  const uint8* src = isrc;
+  const uint8* srclimit = isrc + str_length;
+  const uint8* srclimit8 = srclimit - 7;
+  int n;
+  int rest_consumed;
+  int exit_reason;
+  do {
+    // Check initial few bytes one at a time until 8-byte aligned
+    while ((((uintptr_t)src & 0x07) != 0) &&
+           (src < srclimit) && (src[0] < 0x80)) {
+      src++;
+    }
+    if (((uintptr_t)src & 0x07) == 0) {
+      while ((src < srclimit8) &&
+             (((reinterpret_cast<const uint32*>(src)[0] |
+                reinterpret_cast<const uint32*>(src)[1]) & 0x80808080) == 0)) {
+        src += 8;
+      }
+    }
+    while ((src < srclimit) && (src[0] < 0x80)) {
+      src++;
+    }
+    // Run state table on the rest
+    n = src - isrc;
+    exit_reason = UTF8GenericScan(st, str + n, str_length - n, &rest_consumed);
+    src += rest_consumed;
+  } while ( exit_reason == kExitDoAgain );
+
+  *bytes_consumed = src - isrc;
+  return exit_reason;
+}
+
+// Hack:  On some compilers the static tables are initialized at startup.
+//   We can't use them until they are initialized.  However, some Protocol
+//   Buffer parsing happens at static init time and may try to validate
+//   UTF-8 strings.  Since UTF-8 validation is only used for debugging
+//   anyway, we simply always return success if initialization hasn't
+//   occurred yet.
+namespace {
+
+bool module_initialized_ = false;
+
+struct InitDetector {
+  InitDetector() {
+    module_initialized_ = true;
+  }
+};
+InitDetector init_detector;
+
+}  // namespace
+
+bool IsStructurallyValidUTF8(const char* buf, int len) {
+  if (!module_initialized_) return true;
+  
+  int bytes_consumed = 0;
+  UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
+                           buf, len, &bytes_consumed);
+  return (bytes_consumed == len);
+}
+
+int UTF8SpnStructurallyValid(const StringPiece& str) {
+  if (!module_initialized_) return str.size();
+
+  int bytes_consumed = 0;
+  UTF8GenericScanFastAscii(&utf8acceptnonsurrogates_obj,
+                           str.data(), str.size(), &bytes_consumed);
+  return bytes_consumed;
+}
+
+// Coerce UTF-8 byte string in src_str to be
+// a structurally-valid equal-length string by selectively
+// overwriting illegal bytes with replace_char (typically blank).
+// replace_char must be legal printable 7-bit Ascii 0x20..0x7e.
+// src_str is read-only. If any overwriting is needed, a modified byte string
+// is created in idst, length isrclen.
+//
+// Returns pointer to output buffer, isrc if no changes were made,
+//  or idst if some bytes were changed.
+//
+// Fast case: all is structurally valid and no byte copying is done.
+//
+char* UTF8CoerceToStructurallyValid(const StringPiece& src_str,
+                                    char* idst,
+                                    const char replace_char) {
+  const char* isrc = src_str.data();
+  const int len = src_str.length();
+  int n = UTF8SpnStructurallyValid(src_str);
+  if (n == len) {               // Normal case -- all is cool, return
+    return const_cast<char*>(isrc);
+  } else {                      // Unusual case -- copy w/o bad bytes
+    const char* src = isrc;
+    const char* srclimit = isrc + len;
+    char* dst = idst;
+    memmove(dst, src, n);       // Copy initial good chunk
+    src += n;
+    dst += n;
+    while (src < srclimit) {    // src points to bogus byte or is off the end
+      dst[0] = replace_char;                    // replace one bad byte
+      src++;
+      dst++;
+      StringPiece str2(src, srclimit - src);
+      n = UTF8SpnStructurallyValid(str2);       // scan the remainder
+      memmove(dst, src, n);                     // copy next good chunk
+      src += n;
+      dst += n;
+    }
+  }
+  return idst;
+}
+
+}  // namespace internal
+}  // namespace protobuf
+}  // namespace google