blob: 80d3013da37c0200ab8bab2081ca2d6a8687439c [file] [log] [blame]
Brian Silverman9c614bc2016-02-15 20:20:02 -05001#region Copyright notice and license
2// Protocol Buffers - Google's data interchange format
3// Copyright 2015 Google Inc. All rights reserved.
4// https://developers.google.com/protocol-buffers/
5//
6// Redistribution and use in source and binary forms, with or without
7// modification, are permitted provided that the following conditions are
8// met:
9//
10// * Redistributions of source code must retain the above copyright
11// notice, this list of conditions and the following disclaimer.
12// * Redistributions in binary form must reproduce the above
13// copyright notice, this list of conditions and the following disclaimer
14// in the documentation and/or other materials provided with the
15// distribution.
16// * Neither the name of Google Inc. nor the names of its
17// contributors may be used to endorse or promote products derived from
18// this software without specific prior written permission.
19//
20// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#endregion
32
33using Google.Protobuf.Reflection;
34using Google.Protobuf.WellKnownTypes;
35using System;
36using System.Collections;
37using System.Collections.Generic;
38using System.Globalization;
39using System.IO;
40using System.Text;
41using System.Text.RegularExpressions;
42
43namespace Google.Protobuf
44{
45 /// <summary>
46 /// Reflection-based converter from JSON to messages.
47 /// </summary>
48 /// <remarks>
49 /// <para>
50 /// Instances of this class are thread-safe, with no mutable state.
51 /// </para>
52 /// <para>
53 /// This is a simple start to get JSON parsing working. As it's reflection-based,
54 /// it's not as quick as baking calls into generated messages - but is a simpler implementation.
55 /// (This code is generally not heavily optimized.)
56 /// </para>
57 /// </remarks>
58 public sealed class JsonParser
59 {
60 // Note: using 0-9 instead of \d to ensure no non-ASCII digits.
61 // This regex isn't a complete validator, but will remove *most* invalid input. We rely on parsing to do the rest.
62 private static readonly Regex TimestampRegex = new Regex(@"^(?<datetime>[0-9]{4}-[01][0-9]-[0-3][0-9]T[012][0-9]:[0-5][0-9]:[0-5][0-9])(?<subseconds>\.[0-9]{1,9})?(?<offset>(Z|[+-][0-1][0-9]:[0-5][0-9]))$", FrameworkPortability.CompiledRegexWhereAvailable);
63 private static readonly Regex DurationRegex = new Regex(@"^(?<sign>-)?(?<int>[0-9]{1,12})(?<subseconds>\.[0-9]{1,9})?s$", FrameworkPortability.CompiledRegexWhereAvailable);
64 private static readonly int[] SubsecondScalingFactors = { 0, 100000000, 100000000, 10000000, 1000000, 100000, 10000, 1000, 100, 10, 1 };
65 private static readonly char[] FieldMaskPathSeparators = new[] { ',' };
66
67 private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
68
69 // TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
70 // and the signatures of various methods.
71 private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
72 WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
73 {
74 { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
75 { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
76 { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
77 { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
78 parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
79 { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
80 { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
81 { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
82 { Int32Value.Descriptor.FullName, MergeWrapperField },
83 { Int64Value.Descriptor.FullName, MergeWrapperField },
84 { UInt32Value.Descriptor.FullName, MergeWrapperField },
85 { UInt64Value.Descriptor.FullName, MergeWrapperField },
86 { FloatValue.Descriptor.FullName, MergeWrapperField },
87 { DoubleValue.Descriptor.FullName, MergeWrapperField },
88 { BytesValue.Descriptor.FullName, MergeWrapperField },
89 { StringValue.Descriptor.FullName, MergeWrapperField }
90 };
91
92 // Convenience method to avoid having to repeat the same code multiple times in the above
93 // dictionary initialization.
94 private static void MergeWrapperField(JsonParser parser, IMessage message, JsonTokenizer tokenizer)
95 {
96 parser.MergeField(message, message.Descriptor.Fields[WrappersReflection.WrapperValueFieldNumber], tokenizer);
97 }
98
99 /// <summary>
100 /// Returns a formatter using the default settings.
101 /// </summary>
102 public static JsonParser Default { get { return defaultInstance; } }
103
104 private readonly Settings settings;
105
106 /// <summary>
107 /// Creates a new formatted with the given settings.
108 /// </summary>
109 /// <param name="settings">The settings.</param>
110 public JsonParser(Settings settings)
111 {
112 this.settings = settings;
113 }
114
115 /// <summary>
116 /// Parses <paramref name="json"/> and merges the information into the given message.
117 /// </summary>
118 /// <param name="message">The message to merge the JSON information into.</param>
119 /// <param name="json">The JSON to parse.</param>
120 internal void Merge(IMessage message, string json)
121 {
122 Merge(message, new StringReader(json));
123 }
124
125 /// <summary>
126 /// Parses JSON read from <paramref name="jsonReader"/> and merges the information into the given message.
127 /// </summary>
128 /// <param name="message">The message to merge the JSON information into.</param>
129 /// <param name="jsonReader">Reader providing the JSON to parse.</param>
130 internal void Merge(IMessage message, TextReader jsonReader)
131 {
132 var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
133 Merge(message, tokenizer);
134 var lastToken = tokenizer.Next();
135 if (lastToken != JsonToken.EndDocument)
136 {
137 throw new InvalidProtocolBufferException("Expected end of JSON after object");
138 }
139 }
140
141 /// <summary>
142 /// Merges the given message using data from the given tokenizer. In most cases, the next
143 /// token should be a "start object" token, but wrapper types and nullity can invalidate
144 /// that assumption. This is implemented as an LL(1) recursive descent parser over the stream
145 /// of tokens provided by the tokenizer. This token stream is assumed to be valid JSON, with the
146 /// tokenizer performing that validation - but not every token stream is valid "protobuf JSON".
147 /// </summary>
148 private void Merge(IMessage message, JsonTokenizer tokenizer)
149 {
150 if (tokenizer.ObjectDepth > settings.RecursionLimit)
151 {
152 throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
153 }
154 if (message.Descriptor.IsWellKnownType)
155 {
156 Action<JsonParser, IMessage, JsonTokenizer> handler;
157 if (WellKnownTypeHandlers.TryGetValue(message.Descriptor.FullName, out handler))
158 {
159 handler(this, message, tokenizer);
160 return;
161 }
162 // Well-known types with no special handling continue in the normal way.
163 }
164 var token = tokenizer.Next();
165 if (token.Type != JsonToken.TokenType.StartObject)
166 {
167 throw new InvalidProtocolBufferException("Expected an object");
168 }
169 var descriptor = message.Descriptor;
170 var jsonFieldMap = descriptor.Fields.ByJsonName();
171 // All the oneof fields we've already accounted for - we can only see each of them once.
172 // The set is created lazily to avoid the overhead of creating a set for every message
173 // we parsed, when oneofs are relatively rare.
174 HashSet<OneofDescriptor> seenOneofs = null;
175 while (true)
176 {
177 token = tokenizer.Next();
178 if (token.Type == JsonToken.TokenType.EndObject)
179 {
180 return;
181 }
182 if (token.Type != JsonToken.TokenType.Name)
183 {
184 throw new InvalidOperationException("Unexpected token type " + token.Type);
185 }
186 string name = token.StringValue;
187 FieldDescriptor field;
188 if (jsonFieldMap.TryGetValue(name, out field))
189 {
190 if (field.ContainingOneof != null)
191 {
192 if (seenOneofs == null)
193 {
194 seenOneofs = new HashSet<OneofDescriptor>();
195 }
196 if (!seenOneofs.Add(field.ContainingOneof))
197 {
198 throw new InvalidProtocolBufferException($"Multiple values specified for oneof {field.ContainingOneof.Name}");
199 }
200 }
201 MergeField(message, field, tokenizer);
202 }
203 else
204 {
205 // TODO: Is this what we want to do? If not, we'll need to skip the value,
206 // which may be an object or array. (We might want to put code in the tokenizer
207 // to do that.)
208 throw new InvalidProtocolBufferException("Unknown field: " + name);
209 }
210 }
211 }
212
213 private void MergeField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
214 {
215 var token = tokenizer.Next();
216 if (token.Type == JsonToken.TokenType.Null)
217 {
218 // Clear the field if we see a null token, unless it's for a singular field of type
219 // google.protobuf.Value.
220 // Note: different from Java API, which just ignores it.
221 // TODO: Bring it more in line? Discuss...
222 if (field.IsMap || field.IsRepeated || !IsGoogleProtobufValueField(field))
223 {
224 field.Accessor.Clear(message);
225 return;
226 }
227 }
228 tokenizer.PushBack(token);
229
230 if (field.IsMap)
231 {
232 MergeMapField(message, field, tokenizer);
233 }
234 else if (field.IsRepeated)
235 {
236 MergeRepeatedField(message, field, tokenizer);
237 }
238 else
239 {
240 var value = ParseSingleValue(field, tokenizer);
241 field.Accessor.SetValue(message, value);
242 }
243 }
244
245 private void MergeRepeatedField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
246 {
247 var token = tokenizer.Next();
248 if (token.Type != JsonToken.TokenType.StartArray)
249 {
250 throw new InvalidProtocolBufferException("Repeated field value was not an array. Token type: " + token.Type);
251 }
252
253 IList list = (IList) field.Accessor.GetValue(message);
254 while (true)
255 {
256 token = tokenizer.Next();
257 if (token.Type == JsonToken.TokenType.EndArray)
258 {
259 return;
260 }
261 tokenizer.PushBack(token);
262 if (token.Type == JsonToken.TokenType.Null)
263 {
264 throw new InvalidProtocolBufferException("Repeated field elements cannot be null");
265 }
266 list.Add(ParseSingleValue(field, tokenizer));
267 }
268 }
269
270 private void MergeMapField(IMessage message, FieldDescriptor field, JsonTokenizer tokenizer)
271 {
272 // Map fields are always objects, even if the values are well-known types: ParseSingleValue handles those.
273 var token = tokenizer.Next();
274 if (token.Type != JsonToken.TokenType.StartObject)
275 {
276 throw new InvalidProtocolBufferException("Expected an object to populate a map");
277 }
278
279 var type = field.MessageType;
280 var keyField = type.FindFieldByNumber(1);
281 var valueField = type.FindFieldByNumber(2);
282 if (keyField == null || valueField == null)
283 {
284 throw new InvalidProtocolBufferException("Invalid map field: " + field.FullName);
285 }
286 IDictionary dictionary = (IDictionary) field.Accessor.GetValue(message);
287
288 while (true)
289 {
290 token = tokenizer.Next();
291 if (token.Type == JsonToken.TokenType.EndObject)
292 {
293 return;
294 }
295 object key = ParseMapKey(keyField, token.StringValue);
296 object value = ParseSingleValue(valueField, tokenizer);
297 if (value == null)
298 {
299 throw new InvalidProtocolBufferException("Map values must not be null");
300 }
301 dictionary[key] = value;
302 }
303 }
304
305 private static bool IsGoogleProtobufValueField(FieldDescriptor field)
306 {
307 return field.FieldType == FieldType.Message &&
308 field.MessageType.FullName == Value.Descriptor.FullName;
309 }
310
311 private object ParseSingleValue(FieldDescriptor field, JsonTokenizer tokenizer)
312 {
313 var token = tokenizer.Next();
314 if (token.Type == JsonToken.TokenType.Null)
315 {
316 // TODO: In order to support dynamic messages, we should really build this up
317 // dynamically.
318 if (IsGoogleProtobufValueField(field))
319 {
320 return Value.ForNull();
321 }
322 return null;
323 }
324
325 var fieldType = field.FieldType;
326 if (fieldType == FieldType.Message)
327 {
328 // Parse wrapper types as their constituent types.
329 // TODO: What does this mean for null?
330 if (field.MessageType.IsWrapperType)
331 {
332 field = field.MessageType.Fields[WrappersReflection.WrapperValueFieldNumber];
333 fieldType = field.FieldType;
334 }
335 else
336 {
337 // TODO: Merge the current value in message? (Public API currently doesn't make this relevant as we don't expose merging.)
338 tokenizer.PushBack(token);
339 IMessage subMessage = NewMessageForField(field);
340 Merge(subMessage, tokenizer);
341 return subMessage;
342 }
343 }
344
345 switch (token.Type)
346 {
347 case JsonToken.TokenType.True:
348 case JsonToken.TokenType.False:
349 if (fieldType == FieldType.Bool)
350 {
351 return token.Type == JsonToken.TokenType.True;
352 }
353 // Fall through to "we don't support this type for this case"; could duplicate the behaviour of the default
354 // case instead, but this way we'd only need to change one place.
355 goto default;
356 case JsonToken.TokenType.StringValue:
357 return ParseSingleStringValue(field, token.StringValue);
358 // Note: not passing the number value itself here, as we may end up storing the string value in the token too.
359 case JsonToken.TokenType.Number:
360 return ParseSingleNumberValue(field, token);
361 case JsonToken.TokenType.Null:
362 throw new NotImplementedException("Haven't worked out what to do for null yet");
363 default:
364 throw new InvalidProtocolBufferException("Unsupported JSON token type " + token.Type + " for field type " + fieldType);
365 }
366 }
367
368 /// <summary>
369 /// Parses <paramref name="json"/> into a new message.
370 /// </summary>
371 /// <typeparam name="T">The type of message to create.</typeparam>
372 /// <param name="json">The JSON to parse.</param>
373 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
374 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
375 public T Parse<T>(string json) where T : IMessage, new()
376 {
377 ProtoPreconditions.CheckNotNull(json, nameof(json));
378 return Parse<T>(new StringReader(json));
379 }
380
381 /// <summary>
382 /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
383 /// </summary>
384 /// <typeparam name="T">The type of message to create.</typeparam>
385 /// <param name="jsonReader">Reader providing the JSON to parse.</param>
386 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
387 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
388 public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
389 {
390 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
391 T message = new T();
392 Merge(message, jsonReader);
393 return message;
394 }
395
396 /// <summary>
397 /// Parses <paramref name="json"/> into a new message.
398 /// </summary>
399 /// <param name="json">The JSON to parse.</param>
400 /// <param name="descriptor">Descriptor of message type to parse.</param>
401 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
402 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
403 public IMessage Parse(string json, MessageDescriptor descriptor)
404 {
405 ProtoPreconditions.CheckNotNull(json, nameof(json));
406 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
407 return Parse(new StringReader(json), descriptor);
408 }
409
410 /// <summary>
411 /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
412 /// </summary>
413 /// <param name="jsonReader">Reader providing the JSON to parse.</param>
414 /// <param name="descriptor">Descriptor of message type to parse.</param>
415 /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
416 /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
417 public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
418 {
419 ProtoPreconditions.CheckNotNull(jsonReader, nameof(jsonReader));
420 ProtoPreconditions.CheckNotNull(descriptor, nameof(descriptor));
421 IMessage message = descriptor.Parser.CreateTemplate();
422 Merge(message, jsonReader);
423 return message;
424 }
425
426 private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
427 {
428 var firstToken = tokenizer.Next();
429 var fields = message.Descriptor.Fields;
430 switch (firstToken.Type)
431 {
432 case JsonToken.TokenType.Null:
433 fields[Value.NullValueFieldNumber].Accessor.SetValue(message, 0);
434 return;
435 case JsonToken.TokenType.StringValue:
436 fields[Value.StringValueFieldNumber].Accessor.SetValue(message, firstToken.StringValue);
437 return;
438 case JsonToken.TokenType.Number:
439 fields[Value.NumberValueFieldNumber].Accessor.SetValue(message, firstToken.NumberValue);
440 return;
441 case JsonToken.TokenType.False:
442 case JsonToken.TokenType.True:
443 fields[Value.BoolValueFieldNumber].Accessor.SetValue(message, firstToken.Type == JsonToken.TokenType.True);
444 return;
445 case JsonToken.TokenType.StartObject:
446 {
447 var field = fields[Value.StructValueFieldNumber];
448 var structMessage = NewMessageForField(field);
449 tokenizer.PushBack(firstToken);
450 Merge(structMessage, tokenizer);
451 field.Accessor.SetValue(message, structMessage);
452 return;
453 }
454 case JsonToken.TokenType.StartArray:
455 {
456 var field = fields[Value.ListValueFieldNumber];
457 var list = NewMessageForField(field);
458 tokenizer.PushBack(firstToken);
459 Merge(list, tokenizer);
460 field.Accessor.SetValue(message, list);
461 return;
462 }
463 default:
464 throw new InvalidOperationException("Unexpected token type: " + firstToken.Type);
465 }
466 }
467
468 private void MergeStruct(IMessage message, JsonTokenizer tokenizer)
469 {
470 var token = tokenizer.Next();
471 if (token.Type != JsonToken.TokenType.StartObject)
472 {
473 throw new InvalidProtocolBufferException("Expected object value for Struct");
474 }
475 tokenizer.PushBack(token);
476
477 var field = message.Descriptor.Fields[Struct.FieldsFieldNumber];
478 MergeMapField(message, field, tokenizer);
479 }
480
481 private void MergeAny(IMessage message, JsonTokenizer tokenizer)
482 {
483 // Record the token stream until we see the @type property. At that point, we can take the value, consult
484 // the type registry for the relevant message, and replay the stream, omitting the @type property.
485 var tokens = new List<JsonToken>();
486
487 var token = tokenizer.Next();
488 if (token.Type != JsonToken.TokenType.StartObject)
489 {
490 throw new InvalidProtocolBufferException("Expected object value for Any");
491 }
492 int typeUrlObjectDepth = tokenizer.ObjectDepth;
493
494 // The check for the property depth protects us from nested Any values which occur before the type URL
495 // for *this* Any.
496 while (token.Type != JsonToken.TokenType.Name ||
497 token.StringValue != JsonFormatter.AnyTypeUrlField ||
498 tokenizer.ObjectDepth != typeUrlObjectDepth)
499 {
500 tokens.Add(token);
501 token = tokenizer.Next();
502
503 if (tokenizer.ObjectDepth < typeUrlObjectDepth)
504 {
505 throw new InvalidProtocolBufferException("Any message with no @type");
506 }
507 }
508
509 // Don't add the @type property or its value to the recorded token list
510 token = tokenizer.Next();
511 if (token.Type != JsonToken.TokenType.StringValue)
512 {
513 throw new InvalidProtocolBufferException("Expected string value for Any.@type");
514 }
515 string typeUrl = token.StringValue;
516 string typeName = JsonFormatter.GetTypeName(typeUrl);
517
518 MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
519 if (descriptor == null)
520 {
521 throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
522 }
523
524 // Now replay the token stream we've already read and anything that remains of the object, just parsing it
525 // as normal. Our original tokenizer should end up at the end of the object.
526 var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
527 var body = descriptor.Parser.CreateTemplate();
528 if (descriptor.IsWellKnownType)
529 {
530 MergeWellKnownTypeAnyBody(body, replay);
531 }
532 else
533 {
534 Merge(body, replay);
535 }
536 var data = body.ToByteString();
537
538 // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
539 message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
540 message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
541 }
542
543 // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
544 // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
545 // itself, and then end-object.
546 private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
547 {
548 var token = tokenizer.Next(); // Definitely start-object; checked in previous method
549 token = tokenizer.Next();
550 // TODO: What about an absent Int32Value, for example?
551 if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
552 {
553 throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
554 }
555 Merge(body, tokenizer);
556 token = tokenizer.Next();
557 if (token.Type != JsonToken.TokenType.EndObject)
558 {
559 throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
560 }
561 }
562
563 #region Utility methods which don't depend on the state (or settings) of the parser.
564 private static object ParseMapKey(FieldDescriptor field, string keyText)
565 {
566 switch (field.FieldType)
567 {
568 case FieldType.Bool:
569 if (keyText == "true")
570 {
571 return true;
572 }
573 if (keyText == "false")
574 {
575 return false;
576 }
577 throw new InvalidProtocolBufferException("Invalid string for bool map key: " + keyText);
578 case FieldType.String:
579 return keyText;
580 case FieldType.Int32:
581 case FieldType.SInt32:
582 case FieldType.SFixed32:
583 return ParseNumericString(keyText, int.Parse);
584 case FieldType.UInt32:
585 case FieldType.Fixed32:
586 return ParseNumericString(keyText, uint.Parse);
587 case FieldType.Int64:
588 case FieldType.SInt64:
589 case FieldType.SFixed64:
590 return ParseNumericString(keyText, long.Parse);
591 case FieldType.UInt64:
592 case FieldType.Fixed64:
593 return ParseNumericString(keyText, ulong.Parse);
594 default:
595 throw new InvalidProtocolBufferException("Invalid field type for map: " + field.FieldType);
596 }
597 }
598
599 private static object ParseSingleNumberValue(FieldDescriptor field, JsonToken token)
600 {
601 double value = token.NumberValue;
602 checked
603 {
604 try
605 {
606 switch (field.FieldType)
607 {
608 case FieldType.Int32:
609 case FieldType.SInt32:
610 case FieldType.SFixed32:
611 CheckInteger(value);
612 return (int) value;
613 case FieldType.UInt32:
614 case FieldType.Fixed32:
615 CheckInteger(value);
616 return (uint) value;
617 case FieldType.Int64:
618 case FieldType.SInt64:
619 case FieldType.SFixed64:
620 CheckInteger(value);
621 return (long) value;
622 case FieldType.UInt64:
623 case FieldType.Fixed64:
624 CheckInteger(value);
625 return (ulong) value;
626 case FieldType.Double:
627 return value;
628 case FieldType.Float:
629 if (double.IsNaN(value))
630 {
631 return float.NaN;
632 }
633 if (value > float.MaxValue || value < float.MinValue)
634 {
635 if (double.IsPositiveInfinity(value))
636 {
637 return float.PositiveInfinity;
638 }
639 if (double.IsNegativeInfinity(value))
640 {
641 return float.NegativeInfinity;
642 }
643 throw new InvalidProtocolBufferException($"Value out of range: {value}");
644 }
645 return (float) value;
646 case FieldType.Enum:
647 CheckInteger(value);
648 // Just return it as an int, and let the CLR convert it.
649 // Note that we deliberately don't check that it's a known value.
650 return (int) value;
651 default:
652 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON number for field type {field.FieldType}");
653 }
654 }
655 catch (OverflowException)
656 {
657 throw new InvalidProtocolBufferException($"Value out of range: {value}");
658 }
659 }
660 }
661
662 private static void CheckInteger(double value)
663 {
664 if (double.IsInfinity(value) || double.IsNaN(value))
665 {
666 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
667 }
668 if (value != Math.Floor(value))
669 {
670 throw new InvalidProtocolBufferException($"Value not an integer: {value}");
671 }
672 }
673
674 private static object ParseSingleStringValue(FieldDescriptor field, string text)
675 {
676 switch (field.FieldType)
677 {
678 case FieldType.String:
679 return text;
680 case FieldType.Bytes:
681 try
682 {
683 return ByteString.FromBase64(text);
684 }
685 catch (FormatException e)
686 {
687 throw InvalidProtocolBufferException.InvalidBase64(e);
688 }
689 case FieldType.Int32:
690 case FieldType.SInt32:
691 case FieldType.SFixed32:
692 return ParseNumericString(text, int.Parse);
693 case FieldType.UInt32:
694 case FieldType.Fixed32:
695 return ParseNumericString(text, uint.Parse);
696 case FieldType.Int64:
697 case FieldType.SInt64:
698 case FieldType.SFixed64:
699 return ParseNumericString(text, long.Parse);
700 case FieldType.UInt64:
701 case FieldType.Fixed64:
702 return ParseNumericString(text, ulong.Parse);
703 case FieldType.Double:
704 double d = ParseNumericString(text, double.Parse);
705 ValidateInfinityAndNan(text, double.IsPositiveInfinity(d), double.IsNegativeInfinity(d), double.IsNaN(d));
706 return d;
707 case FieldType.Float:
708 float f = ParseNumericString(text, float.Parse);
709 ValidateInfinityAndNan(text, float.IsPositiveInfinity(f), float.IsNegativeInfinity(f), float.IsNaN(f));
710 return f;
711 case FieldType.Enum:
712 var enumValue = field.EnumType.FindValueByName(text);
713 if (enumValue == null)
714 {
715 throw new InvalidProtocolBufferException($"Invalid enum value: {text} for enum type: {field.EnumType.FullName}");
716 }
717 // Just return it as an int, and let the CLR convert it.
718 return enumValue.Number;
719 default:
720 throw new InvalidProtocolBufferException($"Unsupported conversion from JSON string for field type {field.FieldType}");
721 }
722 }
723
724 /// <summary>
725 /// Creates a new instance of the message type for the given field.
726 /// </summary>
727 private static IMessage NewMessageForField(FieldDescriptor field)
728 {
729 return field.MessageType.Parser.CreateTemplate();
730 }
731
732 private static T ParseNumericString<T>(string text, Func<string, NumberStyles, IFormatProvider, T> parser)
733 {
734 // Can't prohibit this with NumberStyles.
735 if (text.StartsWith("+"))
736 {
737 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
738 }
739 if (text.StartsWith("0") && text.Length > 1)
740 {
741 if (text[1] >= '0' && text[1] <= '9')
742 {
743 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
744 }
745 }
746 else if (text.StartsWith("-0") && text.Length > 2)
747 {
748 if (text[2] >= '0' && text[2] <= '9')
749 {
750 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
751 }
752 }
753 try
754 {
755 return parser(text, NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent, CultureInfo.InvariantCulture);
756 }
757 catch (FormatException)
758 {
759 throw new InvalidProtocolBufferException($"Invalid numeric value for type: {text}");
760 }
761 catch (OverflowException)
762 {
763 throw new InvalidProtocolBufferException($"Value out of range: {text}");
764 }
765 }
766
767 /// <summary>
768 /// Checks that any infinite/NaN values originated from the correct text.
769 /// This corrects the lenient whitespace handling of double.Parse/float.Parse, as well as the
770 /// way that Mono parses out-of-range values as infinity.
771 /// </summary>
772 private static void ValidateInfinityAndNan(string text, bool isPositiveInfinity, bool isNegativeInfinity, bool isNaN)
773 {
774 if ((isPositiveInfinity && text != "Infinity") ||
775 (isNegativeInfinity && text != "-Infinity") ||
776 (isNaN && text != "NaN"))
777 {
778 throw new InvalidProtocolBufferException($"Invalid numeric value: {text}");
779 }
780 }
781
782 private static void MergeTimestamp(IMessage message, JsonToken token)
783 {
784 if (token.Type != JsonToken.TokenType.StringValue)
785 {
786 throw new InvalidProtocolBufferException("Expected string value for Timestamp");
787 }
788 var match = TimestampRegex.Match(token.StringValue);
789 if (!match.Success)
790 {
791 throw new InvalidProtocolBufferException($"Invalid Timestamp value: {token.StringValue}");
792 }
793 var dateTime = match.Groups["datetime"].Value;
794 var subseconds = match.Groups["subseconds"].Value;
795 var offset = match.Groups["offset"].Value;
796
797 try
798 {
799 DateTime parsed = DateTime.ParseExact(
800 dateTime,
801 "yyyy-MM-dd'T'HH:mm:ss",
802 CultureInfo.InvariantCulture,
803 DateTimeStyles.AssumeUniversal | DateTimeStyles.AdjustToUniversal);
804 // TODO: It would be nice not to have to create all these objects... easy to optimize later though.
805 Timestamp timestamp = Timestamp.FromDateTime(parsed);
806 int nanosToAdd = 0;
807 if (subseconds != "")
808 {
809 // This should always work, as we've got 1-9 digits.
810 int parsedFraction = int.Parse(subseconds.Substring(1), CultureInfo.InvariantCulture);
811 nanosToAdd = parsedFraction * SubsecondScalingFactors[subseconds.Length];
812 }
813 int secondsToAdd = 0;
814 if (offset != "Z")
815 {
816 // This is the amount we need to *subtract* from the local time to get to UTC - hence - => +1 and vice versa.
817 int sign = offset[0] == '-' ? 1 : -1;
818 int hours = int.Parse(offset.Substring(1, 2), CultureInfo.InvariantCulture);
819 int minutes = int.Parse(offset.Substring(4, 2));
820 int totalMinutes = hours * 60 + minutes;
821 if (totalMinutes > 18 * 60)
822 {
823 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
824 }
825 if (totalMinutes == 0 && sign == 1)
826 {
827 // This is an offset of -00:00, which means "unknown local offset". It makes no sense for a timestamp.
828 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
829 }
830 // We need to *subtract* the offset from local time to get UTC.
831 secondsToAdd = sign * totalMinutes * 60;
832 }
833 // Ensure we've got the right signs. Currently unnecessary, but easy to do.
834 if (secondsToAdd < 0 && nanosToAdd > 0)
835 {
836 secondsToAdd++;
837 nanosToAdd = nanosToAdd - Duration.NanosecondsPerSecond;
838 }
839 if (secondsToAdd != 0 || nanosToAdd != 0)
840 {
841 timestamp += new Duration { Nanos = nanosToAdd, Seconds = secondsToAdd };
842 // The resulting timestamp after offset change would be out of our expected range. Currently the Timestamp message doesn't validate this
843 // anywhere, but we shouldn't parse it.
844 if (timestamp.Seconds < Timestamp.UnixSecondsAtBclMinValue || timestamp.Seconds > Timestamp.UnixSecondsAtBclMaxValue)
845 {
846 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
847 }
848 }
849 message.Descriptor.Fields[Timestamp.SecondsFieldNumber].Accessor.SetValue(message, timestamp.Seconds);
850 message.Descriptor.Fields[Timestamp.NanosFieldNumber].Accessor.SetValue(message, timestamp.Nanos);
851 }
852 catch (FormatException)
853 {
854 throw new InvalidProtocolBufferException("Invalid Timestamp value: " + token.StringValue);
855 }
856 }
857
858 private static void MergeDuration(IMessage message, JsonToken token)
859 {
860 if (token.Type != JsonToken.TokenType.StringValue)
861 {
862 throw new InvalidProtocolBufferException("Expected string value for Duration");
863 }
864 var match = DurationRegex.Match(token.StringValue);
865 if (!match.Success)
866 {
867 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
868 }
869 var sign = match.Groups["sign"].Value;
870 var secondsText = match.Groups["int"].Value;
871 // Prohibit leading insignficant zeroes
872 if (secondsText[0] == '0' && secondsText.Length > 1)
873 {
874 throw new InvalidProtocolBufferException("Invalid Duration value: " + token.StringValue);
875 }
876 var subseconds = match.Groups["subseconds"].Value;
877 var multiplier = sign == "-" ? -1 : 1;
878
879 try
880 {
881 long seconds = long.Parse(secondsText, CultureInfo.InvariantCulture) * multiplier;
882 int nanos = 0;
883 if (subseconds != "")
884 {
885 // This should always work, as we've got 1-9 digits.
886 int parsedFraction = int.Parse(subseconds.Substring(1));
887 nanos = parsedFraction * SubsecondScalingFactors[subseconds.Length] * multiplier;
888 }
889 if (!Duration.IsNormalized(seconds, nanos))
890 {
891 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
892 }
893 message.Descriptor.Fields[Duration.SecondsFieldNumber].Accessor.SetValue(message, seconds);
894 message.Descriptor.Fields[Duration.NanosFieldNumber].Accessor.SetValue(message, nanos);
895 }
896 catch (FormatException)
897 {
898 throw new InvalidProtocolBufferException($"Invalid Duration value: {token.StringValue}");
899 }
900 }
901
902 private static void MergeFieldMask(IMessage message, JsonToken token)
903 {
904 if (token.Type != JsonToken.TokenType.StringValue)
905 {
906 throw new InvalidProtocolBufferException("Expected string value for FieldMask");
907 }
908 // TODO: Do we *want* to remove empty entries? Probably okay to treat "" as "no paths", but "foo,,bar"?
909 string[] jsonPaths = token.StringValue.Split(FieldMaskPathSeparators, StringSplitOptions.RemoveEmptyEntries);
910 IList messagePaths = (IList) message.Descriptor.Fields[FieldMask.PathsFieldNumber].Accessor.GetValue(message);
911 foreach (var path in jsonPaths)
912 {
913 messagePaths.Add(ToSnakeCase(path));
914 }
915 }
916
917 // Ported from src/google/protobuf/util/internal/utility.cc
918 private static string ToSnakeCase(string text)
919 {
920 var builder = new StringBuilder(text.Length * 2);
921 // Note: this is probably unnecessary now, but currently retained to be as close as possible to the
922 // C++, whilst still throwing an exception on underscores.
923 bool wasNotUnderscore = false; // Initialize to false for case 1 (below)
924 bool wasNotCap = false;
925
926 for (int i = 0; i < text.Length; i++)
927 {
928 char c = text[i];
929 if (c >= 'A' && c <= 'Z') // ascii_isupper
930 {
931 // Consider when the current character B is capitalized:
932 // 1) At beginning of input: "B..." => "b..."
933 // (e.g. "Biscuit" => "biscuit")
934 // 2) Following a lowercase: "...aB..." => "...a_b..."
935 // (e.g. "gBike" => "g_bike")
936 // 3) At the end of input: "...AB" => "...ab"
937 // (e.g. "GoogleLAB" => "google_lab")
938 // 4) Followed by a lowercase: "...ABc..." => "...a_bc..."
939 // (e.g. "GBike" => "g_bike")
940 if (wasNotUnderscore && // case 1 out
941 (wasNotCap || // case 2 in, case 3 out
942 (i + 1 < text.Length && // case 3 out
943 (text[i + 1] >= 'a' && text[i + 1] <= 'z')))) // ascii_islower(text[i + 1])
944 { // case 4 in
945 // We add an underscore for case 2 and case 4.
946 builder.Append('_');
947 }
948 // ascii_tolower, but we already know that c *is* an upper case ASCII character...
949 builder.Append((char) (c + 'a' - 'A'));
950 wasNotUnderscore = true;
951 wasNotCap = false;
952 }
953 else
954 {
955 builder.Append(c);
956 if (c == '_')
957 {
958 throw new InvalidProtocolBufferException($"Invalid field mask: {text}");
959 }
960 wasNotUnderscore = true;
961 wasNotCap = true;
962 }
963 }
964 return builder.ToString();
965 }
966 #endregion
967
968 /// <summary>
969 /// Settings controlling JSON parsing.
970 /// </summary>
971 public sealed class Settings
972 {
973 /// <summary>
974 /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
975 /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
976 /// </summary>
977 public static Settings Default { get; }
978
979 // Workaround for the Mono compiler complaining about XML comments not being on
980 // valid language elements.
981 static Settings()
982 {
983 Default = new Settings(CodedInputStream.DefaultRecursionLimit);
984 }
985
986 /// <summary>
987 /// The maximum depth of messages to parse. Note that this limit only applies to parsing
988 /// messages, not collections - so a message within a collection within a message only counts as
989 /// depth 2, not 3.
990 /// </summary>
991 public int RecursionLimit { get; }
992
993 /// <summary>
994 /// The type registry used to parse <see cref="Any"/> messages.
995 /// </summary>
996 public TypeRegistry TypeRegistry { get; }
997
998 /// <summary>
999 /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
1000 /// </summary>
1001 /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1002 public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
1003 {
1004 }
1005
1006 /// <summary>
1007 /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
1008 /// </summary>
1009 /// <param name="recursionLimit">The maximum depth of messages to parse</param>
1010 /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
1011 public Settings(int recursionLimit, TypeRegistry typeRegistry)
1012 {
1013 RecursionLimit = recursionLimit;
1014 TypeRegistry = ProtoPreconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
1015 }
1016 }
1017 }
1018}