Brian Silverman | 9c614bc | 2016-02-15 20:20:02 -0500 | [diff] [blame^] | 1 | #region Copyright notice and license |
| 2 | // Protocol Buffers - Google's data interchange format |
| 3 | // Copyright 2008 Google Inc. All rights reserved. |
| 4 | // https://developers.google.com/protocol-buffers/ |
| 5 | // |
| 6 | // Redistribution and use in source and binary forms, with or without |
| 7 | // modification, are permitted provided that the following conditions are |
| 8 | // met: |
| 9 | // |
| 10 | // * Redistributions of source code must retain the above copyright |
| 11 | // notice, this list of conditions and the following disclaimer. |
| 12 | // * Redistributions in binary form must reproduce the above |
| 13 | // copyright notice, this list of conditions and the following disclaimer |
| 14 | // in the documentation and/or other materials provided with the |
| 15 | // distribution. |
| 16 | // * Neither the name of Google Inc. nor the names of its |
| 17 | // contributors may be used to endorse or promote products derived from |
| 18 | // this software without specific prior written permission. |
| 19 | // |
| 20 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 21 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 22 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 23 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
| 24 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
| 25 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 26 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| 27 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| 28 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| 29 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 30 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 31 | #endregion |
| 32 | using System; |
| 33 | using System.Collections.Generic; |
| 34 | using System.Globalization; |
| 35 | using System.IO; |
| 36 | using System.Text; |
| 37 | |
| 38 | namespace Google.Protobuf |
| 39 | { |
| 40 | /// <summary> |
| 41 | /// Simple but strict JSON tokenizer, rigidly following RFC 7159. |
| 42 | /// </summary> |
| 43 | /// <remarks> |
| 44 | /// <para> |
| 45 | /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc. |
| 46 | /// It does not create tokens for the separator between names and values, or for the comma |
| 47 | /// between values. It validates the token stream as it goes - so callers can assume that the |
| 48 | /// tokens it produces are appropriate. For example, it would never produce "start object, end array." |
| 49 | /// </para> |
| 50 | /// <para>Implementation details: the base class handles single token push-back and </para> |
| 51 | /// <para>Not thread-safe.</para> |
| 52 | /// </remarks> |
| 53 | internal abstract class JsonTokenizer |
| 54 | { |
| 55 | private JsonToken bufferedToken; |
| 56 | |
| 57 | /// <summary> |
| 58 | /// Creates a tokenizer that reads from the given text reader. |
| 59 | /// </summary> |
| 60 | internal static JsonTokenizer FromTextReader(TextReader reader) |
| 61 | { |
| 62 | return new JsonTextTokenizer(reader); |
| 63 | } |
| 64 | |
| 65 | /// <summary> |
| 66 | /// Creates a tokenizer that first replays the given list of tokens, then continues reading |
| 67 | /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back |
| 68 | /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was |
| 69 | /// created for the sake of Any parsing. |
| 70 | /// </summary> |
| 71 | internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation) |
| 72 | { |
| 73 | return new JsonReplayTokenizer(tokens, continuation); |
| 74 | } |
| 75 | |
| 76 | /// <summary> |
| 77 | /// Returns the depth of the stack, purely in objects (not collections). |
| 78 | /// Informally, this is the number of remaining unclosed '{' characters we have. |
| 79 | /// </summary> |
| 80 | internal int ObjectDepth { get; private set; } |
| 81 | |
| 82 | // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous |
| 83 | // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). |
| 84 | internal void PushBack(JsonToken token) |
| 85 | { |
| 86 | if (bufferedToken != null) |
| 87 | { |
| 88 | throw new InvalidOperationException("Can't push back twice"); |
| 89 | } |
| 90 | bufferedToken = token; |
| 91 | if (token.Type == JsonToken.TokenType.StartObject) |
| 92 | { |
| 93 | ObjectDepth--; |
| 94 | } |
| 95 | else if (token.Type == JsonToken.TokenType.EndObject) |
| 96 | { |
| 97 | ObjectDepth++; |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | /// <summary> |
| 102 | /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, |
| 103 | /// after which point <c>Next()</c> should not be called again. |
| 104 | /// </summary> |
| 105 | /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks> |
| 106 | /// <returns>The next token in the stream. This is never null.</returns> |
| 107 | /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> |
| 108 | /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> |
| 109 | internal JsonToken Next() |
| 110 | { |
| 111 | JsonToken tokenToReturn; |
| 112 | if (bufferedToken != null) |
| 113 | { |
| 114 | tokenToReturn = bufferedToken; |
| 115 | bufferedToken = null; |
| 116 | } |
| 117 | else |
| 118 | { |
| 119 | tokenToReturn = NextImpl(); |
| 120 | } |
| 121 | if (tokenToReturn.Type == JsonToken.TokenType.StartObject) |
| 122 | { |
| 123 | ObjectDepth++; |
| 124 | } |
| 125 | else if (tokenToReturn.Type == JsonToken.TokenType.EndObject) |
| 126 | { |
| 127 | ObjectDepth--; |
| 128 | } |
| 129 | return tokenToReturn; |
| 130 | } |
| 131 | |
| 132 | /// <summary> |
| 133 | /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates |
| 134 | /// to this if it doesn't have a buffered token.) |
| 135 | /// </summary> |
| 136 | /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> |
| 137 | /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> |
| 138 | protected abstract JsonToken NextImpl(); |
| 139 | |
| 140 | /// <summary> |
| 141 | /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer. |
| 142 | /// </summary> |
| 143 | private class JsonReplayTokenizer : JsonTokenizer |
| 144 | { |
| 145 | private readonly IList<JsonToken> tokens; |
| 146 | private readonly JsonTokenizer nextTokenizer; |
| 147 | private int nextTokenIndex; |
| 148 | |
| 149 | internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer) |
| 150 | { |
| 151 | this.tokens = tokens; |
| 152 | this.nextTokenizer = nextTokenizer; |
| 153 | } |
| 154 | |
| 155 | // FIXME: Object depth not maintained... |
| 156 | protected override JsonToken NextImpl() |
| 157 | { |
| 158 | if (nextTokenIndex >= tokens.Count) |
| 159 | { |
| 160 | return nextTokenizer.Next(); |
| 161 | } |
| 162 | return tokens[nextTokenIndex++]; |
| 163 | } |
| 164 | } |
| 165 | |
| 166 | /// <summary> |
| 167 | /// Tokenizer which does all the *real* work of parsing JSON. |
| 168 | /// </summary> |
| 169 | private sealed class JsonTextTokenizer : JsonTokenizer |
| 170 | { |
| 171 | // The set of states in which a value is valid next token. |
| 172 | private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument; |
| 173 | |
| 174 | private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>(); |
| 175 | private readonly PushBackReader reader; |
| 176 | private State state; |
| 177 | |
| 178 | internal JsonTextTokenizer(TextReader reader) |
| 179 | { |
| 180 | this.reader = new PushBackReader(reader); |
| 181 | state = State.StartOfDocument; |
| 182 | containerStack.Push(ContainerType.Document); |
| 183 | } |
| 184 | |
| 185 | /// <remarks> |
| 186 | /// This method essentially just loops through characters skipping whitespace, validating and |
| 187 | /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon) |
| 188 | /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point |
| 189 | /// it returns the token. Although the method is large, it would be relatively hard to break down further... most |
| 190 | /// of it is the large switch statement, which sometimes returns and sometimes doesn't. |
| 191 | /// </remarks> |
| 192 | protected override JsonToken NextImpl() |
| 193 | { |
| 194 | if (state == State.ReaderExhausted) |
| 195 | { |
| 196 | throw new InvalidOperationException("Next() called after end of document"); |
| 197 | } |
| 198 | while (true) |
| 199 | { |
| 200 | var next = reader.Read(); |
| 201 | if (next == null) |
| 202 | { |
| 203 | ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: "); |
| 204 | state = State.ReaderExhausted; |
| 205 | return JsonToken.EndDocument; |
| 206 | } |
| 207 | switch (next.Value) |
| 208 | { |
| 209 | // Skip whitespace between tokens |
| 210 | case ' ': |
| 211 | case '\t': |
| 212 | case '\r': |
| 213 | case '\n': |
| 214 | break; |
| 215 | case ':': |
| 216 | ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: "); |
| 217 | state = State.ObjectAfterColon; |
| 218 | break; |
| 219 | case ',': |
| 220 | ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a colon: "); |
| 221 | state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma; |
| 222 | break; |
| 223 | case '"': |
| 224 | string stringValue = ReadString(); |
| 225 | if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0) |
| 226 | { |
| 227 | state = State.ObjectBeforeColon; |
| 228 | return JsonToken.Name(stringValue); |
| 229 | } |
| 230 | else |
| 231 | { |
| 232 | ValidateAndModifyStateForValue("Invalid state to read a double quote: "); |
| 233 | return JsonToken.Value(stringValue); |
| 234 | } |
| 235 | case '{': |
| 236 | ValidateState(ValueStates, "Invalid state to read an open brace: "); |
| 237 | state = State.ObjectStart; |
| 238 | containerStack.Push(ContainerType.Object); |
| 239 | return JsonToken.StartObject; |
| 240 | case '}': |
| 241 | ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); |
| 242 | PopContainer(); |
| 243 | return JsonToken.EndObject; |
| 244 | case '[': |
| 245 | ValidateState(ValueStates, "Invalid state to read an open square bracket: "); |
| 246 | state = State.ArrayStart; |
| 247 | containerStack.Push(ContainerType.Array); |
| 248 | return JsonToken.StartArray; |
| 249 | case ']': |
| 250 | ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: "); |
| 251 | PopContainer(); |
| 252 | return JsonToken.EndArray; |
| 253 | case 'n': // Start of null |
| 254 | ConsumeLiteral("null"); |
| 255 | ValidateAndModifyStateForValue("Invalid state to read a null literal: "); |
| 256 | return JsonToken.Null; |
| 257 | case 't': // Start of true |
| 258 | ConsumeLiteral("true"); |
| 259 | ValidateAndModifyStateForValue("Invalid state to read a true literal: "); |
| 260 | return JsonToken.True; |
| 261 | case 'f': // Start of false |
| 262 | ConsumeLiteral("false"); |
| 263 | ValidateAndModifyStateForValue("Invalid state to read a false literal: "); |
| 264 | return JsonToken.False; |
| 265 | case '-': // Start of a number |
| 266 | case '0': |
| 267 | case '1': |
| 268 | case '2': |
| 269 | case '3': |
| 270 | case '4': |
| 271 | case '5': |
| 272 | case '6': |
| 273 | case '7': |
| 274 | case '8': |
| 275 | case '9': |
| 276 | double number = ReadNumber(next.Value); |
| 277 | ValidateAndModifyStateForValue("Invalid state to read a number token: "); |
| 278 | return JsonToken.Value(number); |
| 279 | default: |
| 280 | throw new InvalidJsonException("Invalid first character of token: " + next.Value); |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | private void ValidateState(State validStates, string errorPrefix) |
| 286 | { |
| 287 | if ((validStates & state) == 0) |
| 288 | { |
| 289 | throw reader.CreateException(errorPrefix + state); |
| 290 | } |
| 291 | } |
| 292 | |
| 293 | /// <summary> |
| 294 | /// Reads a string token. It is assumed that the opening " has already been read. |
| 295 | /// </summary> |
| 296 | private string ReadString() |
| 297 | { |
| 298 | var value = new StringBuilder(); |
| 299 | bool haveHighSurrogate = false; |
| 300 | while (true) |
| 301 | { |
| 302 | char c = reader.ReadOrFail("Unexpected end of text while reading string"); |
| 303 | if (c < ' ') |
| 304 | { |
| 305 | throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c)); |
| 306 | } |
| 307 | if (c == '"') |
| 308 | { |
| 309 | if (haveHighSurrogate) |
| 310 | { |
| 311 | throw reader.CreateException("Invalid use of surrogate pair code units"); |
| 312 | } |
| 313 | return value.ToString(); |
| 314 | } |
| 315 | if (c == '\\') |
| 316 | { |
| 317 | c = ReadEscapedCharacter(); |
| 318 | } |
| 319 | // TODO: Consider only allowing surrogate pairs that are either both escaped, |
| 320 | // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate |
| 321 | // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8. |
| 322 | if (haveHighSurrogate != char.IsLowSurrogate(c)) |
| 323 | { |
| 324 | throw reader.CreateException("Invalid use of surrogate pair code units"); |
| 325 | } |
| 326 | haveHighSurrogate = char.IsHighSurrogate(c); |
| 327 | value.Append(c); |
| 328 | } |
| 329 | } |
| 330 | |
| 331 | /// <summary> |
| 332 | /// Reads an escaped character. It is assumed that the leading backslash has already been read. |
| 333 | /// </summary> |
| 334 | private char ReadEscapedCharacter() |
| 335 | { |
| 336 | char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence"); |
| 337 | switch (c) |
| 338 | { |
| 339 | case 'n': |
| 340 | return '\n'; |
| 341 | case '\\': |
| 342 | return '\\'; |
| 343 | case 'b': |
| 344 | return '\b'; |
| 345 | case 'f': |
| 346 | return '\f'; |
| 347 | case 'r': |
| 348 | return '\r'; |
| 349 | case 't': |
| 350 | return '\t'; |
| 351 | case '"': |
| 352 | return '"'; |
| 353 | case '/': |
| 354 | return '/'; |
| 355 | case 'u': |
| 356 | return ReadUnicodeEscape(); |
| 357 | default: |
| 358 | throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); |
| 359 | } |
| 360 | } |
| 361 | |
| 362 | /// <summary> |
| 363 | /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read. |
| 364 | /// </summary> |
| 365 | private char ReadUnicodeEscape() |
| 366 | { |
| 367 | int result = 0; |
| 368 | for (int i = 0; i < 4; i++) |
| 369 | { |
| 370 | char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence"); |
| 371 | int nybble; |
| 372 | if (c >= '0' && c <= '9') |
| 373 | { |
| 374 | nybble = c - '0'; |
| 375 | } |
| 376 | else if (c >= 'a' && c <= 'f') |
| 377 | { |
| 378 | nybble = c - 'a' + 10; |
| 379 | } |
| 380 | else if (c >= 'A' && c <= 'F') |
| 381 | { |
| 382 | nybble = c - 'A' + 10; |
| 383 | } |
| 384 | else |
| 385 | { |
| 386 | throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c)); |
| 387 | } |
| 388 | result = (result << 4) + nybble; |
| 389 | } |
| 390 | return (char) result; |
| 391 | } |
| 392 | |
| 393 | /// <summary> |
| 394 | /// Consumes a text-only literal, throwing an exception if the read text doesn't match it. |
| 395 | /// It is assumed that the first letter of the literal has already been read. |
| 396 | /// </summary> |
| 397 | private void ConsumeLiteral(string text) |
| 398 | { |
| 399 | for (int i = 1; i < text.Length; i++) |
| 400 | { |
| 401 | char? next = reader.Read(); |
| 402 | if (next == null) |
| 403 | { |
| 404 | throw reader.CreateException("Unexpected end of text while reading literal token " + text); |
| 405 | } |
| 406 | if (next.Value != text[i]) |
| 407 | { |
| 408 | throw reader.CreateException("Unexpected character while reading literal token " + text); |
| 409 | } |
| 410 | } |
| 411 | } |
| 412 | |
| 413 | private double ReadNumber(char initialCharacter) |
| 414 | { |
| 415 | StringBuilder builder = new StringBuilder(); |
| 416 | if (initialCharacter == '-') |
| 417 | { |
| 418 | builder.Append("-"); |
| 419 | } |
| 420 | else |
| 421 | { |
| 422 | reader.PushBack(initialCharacter); |
| 423 | } |
| 424 | // Each method returns the character it read that doesn't belong in that part, |
| 425 | // so we know what to do next, including pushing the character back at the end. |
| 426 | // null is returned for "end of text". |
| 427 | char? next = ReadInt(builder); |
| 428 | if (next == '.') |
| 429 | { |
| 430 | next = ReadFrac(builder); |
| 431 | } |
| 432 | if (next == 'e' || next == 'E') |
| 433 | { |
| 434 | next = ReadExp(builder); |
| 435 | } |
| 436 | // If we read a character which wasn't part of the number, push it back so we can read it again |
| 437 | // to parse the next token. |
| 438 | if (next != null) |
| 439 | { |
| 440 | reader.PushBack(next.Value); |
| 441 | } |
| 442 | |
| 443 | // TODO: What exception should we throw if the value can't be represented as a double? |
| 444 | try |
| 445 | { |
| 446 | return double.Parse(builder.ToString(), |
| 447 | NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, |
| 448 | CultureInfo.InvariantCulture); |
| 449 | } |
| 450 | catch (OverflowException) |
| 451 | { |
| 452 | throw reader.CreateException("Numeric value out of range: " + builder); |
| 453 | } |
| 454 | } |
| 455 | |
| 456 | private char? ReadInt(StringBuilder builder) |
| 457 | { |
| 458 | char first = reader.ReadOrFail("Invalid numeric literal"); |
| 459 | if (first < '0' || first > '9') |
| 460 | { |
| 461 | throw reader.CreateException("Invalid numeric literal"); |
| 462 | } |
| 463 | builder.Append(first); |
| 464 | int digitCount; |
| 465 | char? next = ConsumeDigits(builder, out digitCount); |
| 466 | if (first == '0' && digitCount != 0) |
| 467 | { |
| 468 | throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value."); |
| 469 | } |
| 470 | return next; |
| 471 | } |
| 472 | |
| 473 | private char? ReadFrac(StringBuilder builder) |
| 474 | { |
| 475 | builder.Append('.'); // Already consumed this |
| 476 | int digitCount; |
| 477 | char? next = ConsumeDigits(builder, out digitCount); |
| 478 | if (digitCount == 0) |
| 479 | { |
| 480 | throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits"); |
| 481 | } |
| 482 | return next; |
| 483 | } |
| 484 | |
| 485 | private char? ReadExp(StringBuilder builder) |
| 486 | { |
| 487 | builder.Append('E'); // Already consumed this (or 'e') |
| 488 | char? next = reader.Read(); |
| 489 | if (next == null) |
| 490 | { |
| 491 | throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits"); |
| 492 | } |
| 493 | if (next == '-' || next == '+') |
| 494 | { |
| 495 | builder.Append(next.Value); |
| 496 | } |
| 497 | else |
| 498 | { |
| 499 | reader.PushBack(next.Value); |
| 500 | } |
| 501 | int digitCount; |
| 502 | next = ConsumeDigits(builder, out digitCount); |
| 503 | if (digitCount == 0) |
| 504 | { |
| 505 | throw reader.CreateException("Invalid numeric literal: exponent without value"); |
| 506 | } |
| 507 | return next; |
| 508 | } |
| 509 | |
| 510 | private char? ConsumeDigits(StringBuilder builder, out int count) |
| 511 | { |
| 512 | count = 0; |
| 513 | while (true) |
| 514 | { |
| 515 | char? next = reader.Read(); |
| 516 | if (next == null || next.Value < '0' || next.Value > '9') |
| 517 | { |
| 518 | return next; |
| 519 | } |
| 520 | count++; |
| 521 | builder.Append(next.Value); |
| 522 | } |
| 523 | } |
| 524 | |
| 525 | /// <summary> |
| 526 | /// Validates that we're in a valid state to read a value (using the given error prefix if necessary) |
| 527 | /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty. |
| 528 | /// </summary> |
| 529 | private void ValidateAndModifyStateForValue(string errorPrefix) |
| 530 | { |
| 531 | ValidateState(ValueStates, errorPrefix); |
| 532 | switch (state) |
| 533 | { |
| 534 | case State.StartOfDocument: |
| 535 | state = State.ExpectedEndOfDocument; |
| 536 | return; |
| 537 | case State.ObjectAfterColon: |
| 538 | state = State.ObjectAfterProperty; |
| 539 | return; |
| 540 | case State.ArrayStart: |
| 541 | case State.ArrayAfterComma: |
| 542 | state = State.ArrayAfterValue; |
| 543 | return; |
| 544 | default: |
| 545 | throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)"); |
| 546 | } |
| 547 | } |
| 548 | |
| 549 | /// <summary> |
| 550 | /// Pops the top-most container, and sets the state to the appropriate one for the end of a value |
| 551 | /// in the parent container. |
| 552 | /// </summary> |
| 553 | private void PopContainer() |
| 554 | { |
| 555 | containerStack.Pop(); |
| 556 | var parent = containerStack.Peek(); |
| 557 | switch (parent) |
| 558 | { |
| 559 | case ContainerType.Object: |
| 560 | state = State.ObjectAfterProperty; |
| 561 | break; |
| 562 | case ContainerType.Array: |
| 563 | state = State.ArrayAfterValue; |
| 564 | break; |
| 565 | case ContainerType.Document: |
| 566 | state = State.ExpectedEndOfDocument; |
| 567 | break; |
| 568 | default: |
| 569 | throw new InvalidOperationException("Unexpected container type: " + parent); |
| 570 | } |
| 571 | } |
| 572 | |
| 573 | private enum ContainerType |
| 574 | { |
| 575 | Document, Object, Array |
| 576 | } |
| 577 | |
| 578 | /// <summary> |
| 579 | /// Possible states of the tokenizer. |
| 580 | /// </summary> |
| 581 | /// <remarks> |
| 582 | /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states |
| 583 | /// for checking.</para> |
| 584 | /// <para> |
| 585 | /// Each is documented with an example, |
| 586 | /// where ^ represents the current position within the text stream. The examples all use string values, |
| 587 | /// but could be any value, including nested objects/arrays. |
| 588 | /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects). |
| 589 | /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which |
| 590 | /// point there's an immediate transition to ExpectedEndOfDocument, ObjectAfterProperty or ArrayAfterValue. |
| 591 | /// </para> |
| 592 | /// <para> |
| 593 | /// These states were derived manually by reading RFC 7159 carefully. |
| 594 | /// </para> |
| 595 | /// </remarks> |
| 596 | [Flags] |
| 597 | private enum State |
| 598 | { |
| 599 | /// <summary> |
| 600 | /// ^ { "foo": "bar" } |
| 601 | /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue" |
| 602 | /// </summary> |
| 603 | StartOfDocument = 1 << 0, |
| 604 | /// <summary> |
| 605 | /// { "foo": "bar" } ^ |
| 606 | /// After the value in a document. Next states: ReaderExhausted |
| 607 | /// </summary> |
| 608 | ExpectedEndOfDocument = 1 << 1, |
| 609 | /// <summary> |
| 610 | /// { "foo": "bar" } ^ (and already read to the end of the reader) |
| 611 | /// Terminal state. |
| 612 | /// </summary> |
| 613 | ReaderExhausted = 1 << 2, |
| 614 | /// <summary> |
| 615 | /// { ^ "foo": "bar" } |
| 616 | /// Before the *first* property in an object. |
| 617 | /// Next states: |
| 618 | /// "AfterValue" (empty object) |
| 619 | /// ObjectBeforeColon (read a name) |
| 620 | /// </summary> |
| 621 | ObjectStart = 1 << 3, |
| 622 | /// <summary> |
| 623 | /// { "foo" ^ : "bar", "x": "y" } |
| 624 | /// Next state: ObjectAfterColon |
| 625 | /// </summary> |
| 626 | ObjectBeforeColon = 1 << 4, |
| 627 | /// <summary> |
| 628 | /// { "foo" : ^ "bar", "x": "y" } |
| 629 | /// Before any property other than the first in an object. |
| 630 | /// (Equivalently: after any property in an object) |
| 631 | /// Next states: |
| 632 | /// "AfterValue" (value is simple) |
| 633 | /// ObjectStart (value is object) |
| 634 | /// ArrayStart (value is array) |
| 635 | /// </summary> |
| 636 | ObjectAfterColon = 1 << 5, |
| 637 | /// <summary> |
| 638 | /// { "foo" : "bar" ^ , "x" : "y" } |
| 639 | /// At the end of a property, so expecting either a comma or end-of-object |
| 640 | /// Next states: ObjectAfterComma or "AfterValue" |
| 641 | /// </summary> |
| 642 | ObjectAfterProperty = 1 << 6, |
| 643 | /// <summary> |
| 644 | /// { "foo":"bar", ^ "x":"y" } |
| 645 | /// Read the comma after the previous property, so expecting another property. |
| 646 | /// This is like ObjectStart, but closing brace isn't valid here |
| 647 | /// Next state: ObjectBeforeColon. |
| 648 | /// </summary> |
| 649 | ObjectAfterComma = 1 << 7, |
| 650 | /// <summary> |
| 651 | /// [ ^ "foo", "bar" ] |
| 652 | /// Before the *first* value in an array. |
| 653 | /// Next states: |
| 654 | /// "AfterValue" (read a value) |
| 655 | /// "AfterValue" (end of array; will pop stack) |
| 656 | /// </summary> |
| 657 | ArrayStart = 1 << 8, |
| 658 | /// <summary> |
| 659 | /// [ "foo" ^ , "bar" ] |
| 660 | /// After any value in an array, so expecting either a comma or end-of-array |
| 661 | /// Next states: ArrayAfterComma or "AfterValue" |
| 662 | /// </summary> |
| 663 | ArrayAfterValue = 1 << 9, |
| 664 | /// <summary> |
| 665 | /// [ "foo", ^ "bar" ] |
| 666 | /// After a comma in an array, so there *must* be another value (simple or complex). |
| 667 | /// Next states: "AfterValue" (simple value), StartObject, StartArray |
| 668 | /// </summary> |
| 669 | ArrayAfterComma = 1 << 10 |
| 670 | } |
| 671 | |
| 672 | /// <summary> |
| 673 | /// Wrapper around a text reader allowing small amounts of buffering and location handling. |
| 674 | /// </summary> |
| 675 | private class PushBackReader |
| 676 | { |
| 677 | // TODO: Add locations for errors etc. |
| 678 | |
| 679 | private readonly TextReader reader; |
| 680 | |
| 681 | internal PushBackReader(TextReader reader) |
| 682 | { |
| 683 | // TODO: Wrap the reader in a BufferedReader? |
| 684 | this.reader = reader; |
| 685 | } |
| 686 | |
| 687 | /// <summary> |
| 688 | /// The buffered next character, if we have one. |
| 689 | /// </summary> |
| 690 | private char? nextChar; |
| 691 | |
| 692 | /// <summary> |
| 693 | /// Returns the next character in the stream, or null if we have reached the end. |
| 694 | /// </summary> |
| 695 | /// <returns></returns> |
| 696 | internal char? Read() |
| 697 | { |
| 698 | if (nextChar != null) |
| 699 | { |
| 700 | char? tmp = nextChar; |
| 701 | nextChar = null; |
| 702 | return tmp; |
| 703 | } |
| 704 | int next = reader.Read(); |
| 705 | return next == -1 ? null : (char?) next; |
| 706 | } |
| 707 | |
| 708 | internal char ReadOrFail(string messageOnFailure) |
| 709 | { |
| 710 | char? next = Read(); |
| 711 | if (next == null) |
| 712 | { |
| 713 | throw CreateException(messageOnFailure); |
| 714 | } |
| 715 | return next.Value; |
| 716 | } |
| 717 | |
| 718 | internal void PushBack(char c) |
| 719 | { |
| 720 | if (nextChar != null) |
| 721 | { |
| 722 | throw new InvalidOperationException("Cannot push back when already buffering a character"); |
| 723 | } |
| 724 | nextChar = c; |
| 725 | } |
| 726 | |
| 727 | /// <summary> |
| 728 | /// Creates a new exception appropriate for the current state of the reader. |
| 729 | /// </summary> |
| 730 | internal InvalidJsonException CreateException(string message) |
| 731 | { |
| 732 | // TODO: Keep track of and use the location. |
| 733 | return new InvalidJsonException(message); |
| 734 | } |
| 735 | } |
| 736 | } |
| 737 | } |
| 738 | } |