blob: b0a5ce63c8406f6fc9003aab17634de7fd2742b3 [file] [log] [blame]
Brian Silverman9c614bc2016-02-15 20:20:02 -05001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: jschorr@google.com (Joseph Schorr)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <algorithm>
36#include <float.h>
37#include <math.h>
38#include <stdio.h>
39#include <stack>
40#include <limits>
41#include <vector>
42
43#include <google/protobuf/text_format.h>
44
45#include <google/protobuf/descriptor.h>
46#include <google/protobuf/dynamic_message.h>
47#include <google/protobuf/repeated_field.h>
48#include <google/protobuf/wire_format_lite.h>
49#include <google/protobuf/io/strtod.h>
50#include <google/protobuf/io/coded_stream.h>
51#include <google/protobuf/io/zero_copy_stream.h>
52#include <google/protobuf/io/zero_copy_stream_impl.h>
53#include <google/protobuf/unknown_field_set.h>
54#include <google/protobuf/descriptor.pb.h>
55#include <google/protobuf/io/tokenizer.h>
56#include <google/protobuf/any.h>
57#include <google/protobuf/stubs/stringprintf.h>
58#include <google/protobuf/stubs/strutil.h>
59#include <google/protobuf/stubs/map_util.h>
60#include <google/protobuf/stubs/stl_util.h>
61
62namespace google {
63namespace protobuf {
64
65namespace {
66
67inline bool IsHexNumber(const string& str) {
68 return (str.length() >= 2 && str[0] == '0' &&
69 (str[1] == 'x' || str[1] == 'X'));
70}
71
72inline bool IsOctNumber(const string& str) {
73 return (str.length() >= 2 && str[0] == '0' &&
74 (str[1] >= '0' && str[1] < '8'));
75}
76
77inline bool GetAnyFieldDescriptors(const Message& message,
78 const FieldDescriptor** type_url_field,
79 const FieldDescriptor** value_field) {
80 const Descriptor* descriptor = message.GetDescriptor();
81 *type_url_field = descriptor->FindFieldByNumber(1);
82 *value_field = descriptor->FindFieldByNumber(2);
83 return (*type_url_field != NULL &&
84 (*type_url_field)->type() == FieldDescriptor::TYPE_STRING &&
85 *value_field != NULL &&
86 (*value_field)->type() == FieldDescriptor::TYPE_BYTES);
87}
88
89} // namespace
90
91string Message::DebugString() const {
92 string debug_string;
93
94 TextFormat::PrintToString(*this, &debug_string);
95
96 return debug_string;
97}
98
99string Message::ShortDebugString() const {
100 string debug_string;
101
102 TextFormat::Printer printer;
103 printer.SetSingleLineMode(true);
104
105 printer.PrintToString(*this, &debug_string);
106 // Single line mode currently might have an extra space at the end.
107 if (debug_string.size() > 0 &&
108 debug_string[debug_string.size() - 1] == ' ') {
109 debug_string.resize(debug_string.size() - 1);
110 }
111
112 return debug_string;
113}
114
115string Message::Utf8DebugString() const {
116 string debug_string;
117
118 TextFormat::Printer printer;
119 printer.SetUseUtf8StringEscaping(true);
120
121 printer.PrintToString(*this, &debug_string);
122
123 return debug_string;
124}
125
126void Message::PrintDebugString() const {
127 printf("%s", DebugString().c_str());
128}
129
130
131// ===========================================================================
132// Implementation of the parse information tree class.
133TextFormat::ParseInfoTree::ParseInfoTree() { }
134
135TextFormat::ParseInfoTree::~ParseInfoTree() {
136 // Remove any nested information trees, as they are owned by this tree.
137 for (NestedMap::iterator it = nested_.begin(); it != nested_.end(); ++it) {
138 STLDeleteElements(&(it->second));
139 }
140}
141
142void TextFormat::ParseInfoTree::RecordLocation(
143 const FieldDescriptor* field,
144 TextFormat::ParseLocation location) {
145 locations_[field].push_back(location);
146}
147
148TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::CreateNested(
149 const FieldDescriptor* field) {
150 // Owned by us in the map.
151 TextFormat::ParseInfoTree* instance = new TextFormat::ParseInfoTree();
152 vector<TextFormat::ParseInfoTree*>* trees = &nested_[field];
153 GOOGLE_CHECK(trees);
154 trees->push_back(instance);
155 return instance;
156}
157
158void CheckFieldIndex(const FieldDescriptor* field, int index) {
159 if (field == NULL) { return; }
160
161 if (field->is_repeated() && index == -1) {
162 GOOGLE_LOG(DFATAL) << "Index must be in range of repeated field values. "
163 << "Field: " << field->name();
164 } else if (!field->is_repeated() && index != -1) {
165 GOOGLE_LOG(DFATAL) << "Index must be -1 for singular fields."
166 << "Field: " << field->name();
167 }
168}
169
170TextFormat::ParseLocation TextFormat::ParseInfoTree::GetLocation(
171 const FieldDescriptor* field, int index) const {
172 CheckFieldIndex(field, index);
173 if (index == -1) { index = 0; }
174
175 const vector<TextFormat::ParseLocation>* locations =
176 FindOrNull(locations_, field);
177 if (locations == NULL || index >= locations->size()) {
178 return TextFormat::ParseLocation();
179 }
180
181 return (*locations)[index];
182}
183
184TextFormat::ParseInfoTree* TextFormat::ParseInfoTree::GetTreeForNested(
185 const FieldDescriptor* field, int index) const {
186 CheckFieldIndex(field, index);
187 if (index == -1) { index = 0; }
188
189 const vector<TextFormat::ParseInfoTree*>* trees = FindOrNull(nested_, field);
190 if (trees == NULL || index >= trees->size()) {
191 return NULL;
192 }
193
194 return (*trees)[index];
195}
196
197
198// ===========================================================================
199// Internal class for parsing an ASCII representation of a Protocol Message.
200// This class makes use of the Protocol Message compiler's tokenizer found
201// in //google/protobuf/io/tokenizer.h. Note that class's Parse
202// method is *not* thread-safe and should only be used in a single thread at
203// a time.
204
205// Makes code slightly more readable. The meaning of "DO(foo)" is
206// "Execute foo and fail if it fails.", where failure is indicated by
207// returning false. Borrowed from parser.cc (Thanks Kenton!).
208#define DO(STATEMENT) if (STATEMENT) {} else return false
209
210class TextFormat::Parser::ParserImpl {
211 public:
212
213 // Determines if repeated values for non-repeated fields and
214 // oneofs are permitted, e.g., the string "foo: 1 foo: 2" for a
215 // required/optional field named "foo", or "baz: 1 qux: 2"
216 // where "baz" and "qux" are members of the same oneof.
217 enum SingularOverwritePolicy {
218 ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained
219 FORBID_SINGULAR_OVERWRITES = 1, // an error is issued
220 };
221
222 ParserImpl(const Descriptor* root_message_type,
223 io::ZeroCopyInputStream* input_stream,
224 io::ErrorCollector* error_collector,
225 TextFormat::Finder* finder,
226 ParseInfoTree* parse_info_tree,
227 SingularOverwritePolicy singular_overwrite_policy,
228 bool allow_case_insensitive_field,
229 bool allow_unknown_field,
230 bool allow_unknown_enum,
231 bool allow_field_number,
232 bool allow_relaxed_whitespace)
233 : error_collector_(error_collector),
234 finder_(finder),
235 parse_info_tree_(parse_info_tree),
236 tokenizer_error_collector_(this),
237 tokenizer_(input_stream, &tokenizer_error_collector_),
238 root_message_type_(root_message_type),
239 singular_overwrite_policy_(singular_overwrite_policy),
240 allow_case_insensitive_field_(allow_case_insensitive_field),
241 allow_unknown_field_(allow_unknown_field),
242 allow_unknown_enum_(allow_unknown_enum),
243 allow_field_number_(allow_field_number),
244 had_errors_(false) {
245 // For backwards-compatibility with proto1, we need to allow the 'f' suffix
246 // for floats.
247 tokenizer_.set_allow_f_after_float(true);
248
249 // '#' starts a comment.
250 tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
251
252 if (allow_relaxed_whitespace) {
253 tokenizer_.set_require_space_after_number(false);
254 tokenizer_.set_allow_multiline_strings(true);
255 }
256
257 // Consume the starting token.
258 tokenizer_.Next();
259 }
260 ~ParserImpl() { }
261
262 // Parses the ASCII representation specified in input and saves the
263 // information into the output pointer (a Message). Returns
264 // false if an error occurs (an error will also be logged to
265 // GOOGLE_LOG(ERROR)).
266 bool Parse(Message* output) {
267 // Consume fields until we cannot do so anymore.
268 while (true) {
269 if (LookingAtType(io::Tokenizer::TYPE_END)) {
270 return !had_errors_;
271 }
272
273 DO(ConsumeField(output));
274 }
275 }
276
277 bool ParseField(const FieldDescriptor* field, Message* output) {
278 bool suc;
279 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
280 suc = ConsumeFieldMessage(output, output->GetReflection(), field);
281 } else {
282 suc = ConsumeFieldValue(output, output->GetReflection(), field);
283 }
284 return suc && LookingAtType(io::Tokenizer::TYPE_END);
285 }
286
287 void ReportError(int line, int col, const string& message) {
288 had_errors_ = true;
289 if (error_collector_ == NULL) {
290 if (line >= 0) {
291 GOOGLE_LOG(ERROR) << "Error parsing text-format "
292 << root_message_type_->full_name()
293 << ": " << (line + 1) << ":"
294 << (col + 1) << ": " << message;
295 } else {
296 GOOGLE_LOG(ERROR) << "Error parsing text-format "
297 << root_message_type_->full_name()
298 << ": " << message;
299 }
300 } else {
301 error_collector_->AddError(line, col, message);
302 }
303 }
304
305 void ReportWarning(int line, int col, const string& message) {
306 if (error_collector_ == NULL) {
307 if (line >= 0) {
308 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
309 << root_message_type_->full_name()
310 << ": " << (line + 1) << ":"
311 << (col + 1) << ": " << message;
312 } else {
313 GOOGLE_LOG(WARNING) << "Warning parsing text-format "
314 << root_message_type_->full_name()
315 << ": " << message;
316 }
317 } else {
318 error_collector_->AddWarning(line, col, message);
319 }
320 }
321
322 private:
323 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
324
325 // Reports an error with the given message with information indicating
326 // the position (as derived from the current token).
327 void ReportError(const string& message) {
328 ReportError(tokenizer_.current().line, tokenizer_.current().column,
329 message);
330 }
331
332 // Reports a warning with the given message with information indicating
333 // the position (as derived from the current token).
334 void ReportWarning(const string& message) {
335 ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
336 message);
337 }
338
339 // Consumes the specified message with the given starting delimiter.
340 // This method checks to see that the end delimiter at the conclusion of
341 // the consumption matches the starting delimiter passed in here.
342 bool ConsumeMessage(Message* message, const string delimiter) {
343 while (!LookingAt(">") && !LookingAt("}")) {
344 DO(ConsumeField(message));
345 }
346
347 // Confirm that we have a valid ending delimiter.
348 DO(Consume(delimiter));
349 return true;
350 }
351
352 // Consume either "<" or "{".
353 bool ConsumeMessageDelimiter(string* delimiter) {
354 if (TryConsume("<")) {
355 *delimiter = ">";
356 } else {
357 DO(Consume("{"));
358 *delimiter = "}";
359 }
360 return true;
361 }
362
363
364 // Consumes the current field (as returned by the tokenizer) on the
365 // passed in message.
366 bool ConsumeField(Message* message) {
367 const Reflection* reflection = message->GetReflection();
368 const Descriptor* descriptor = message->GetDescriptor();
369
370 string field_name;
371
372 const FieldDescriptor* field = NULL;
373 int start_line = tokenizer_.current().line;
374 int start_column = tokenizer_.current().column;
375
376 const FieldDescriptor* any_type_url_field;
377 const FieldDescriptor* any_value_field;
378 if (internal::GetAnyFieldDescriptors(*message, &any_type_url_field,
379 &any_value_field) &&
380 TryConsume("[")) {
381 string full_type_name, prefix;
382 DO(ConsumeAnyTypeUrl(&full_type_name, &prefix));
383 DO(Consume("]"));
384 TryConsume(":"); // ':' is optional between message labels and values.
385 string serialized_value;
386 DO(ConsumeAnyValue(full_type_name,
387 message->GetDescriptor()->file()->pool(),
388 &serialized_value));
389 reflection->SetString(
390 message, any_type_url_field,
391 string(prefix + full_type_name));
392 reflection->SetString(message, any_value_field, serialized_value);
393 return true;
394 }
395 if (TryConsume("[")) {
396 // Extension.
397 DO(ConsumeFullTypeName(&field_name));
398 DO(Consume("]"));
399
400 field = (finder_ != NULL
401 ? finder_->FindExtension(message, field_name)
402 : reflection->FindKnownExtensionByName(field_name));
403
404 if (field == NULL) {
405 if (!allow_unknown_field_) {
406 ReportError("Extension \"" + field_name + "\" is not defined or "
407 "is not an extension of \"" +
408 descriptor->full_name() + "\".");
409 return false;
410 } else {
411 ReportWarning("Extension \"" + field_name + "\" is not defined or "
412 "is not an extension of \"" +
413 descriptor->full_name() + "\".");
414 }
415 }
416 } else {
417 DO(ConsumeIdentifier(&field_name));
418
419 int32 field_number;
420 if (allow_field_number_ && safe_strto32(field_name, &field_number)) {
421 if (descriptor->IsExtensionNumber(field_number)) {
422 field = reflection->FindKnownExtensionByNumber(field_number);
423 } else {
424 field = descriptor->FindFieldByNumber(field_number);
425 }
426 } else {
427 field = descriptor->FindFieldByName(field_name);
428 // Group names are expected to be capitalized as they appear in the
429 // .proto file, which actually matches their type names, not their
430 // field names.
431 if (field == NULL) {
432 string lower_field_name = field_name;
433 LowerString(&lower_field_name);
434 field = descriptor->FindFieldByName(lower_field_name);
435 // If the case-insensitive match worked but the field is NOT a group,
436 if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
437 field = NULL;
438 }
439 }
440 // Again, special-case group names as described above.
441 if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
442 && field->message_type()->name() != field_name) {
443 field = NULL;
444 }
445
446 if (field == NULL && allow_case_insensitive_field_) {
447 string lower_field_name = field_name;
448 LowerString(&lower_field_name);
449 field = descriptor->FindFieldByLowercaseName(lower_field_name);
450 }
451 }
452
453 if (field == NULL) {
454 if (!allow_unknown_field_) {
455 ReportError("Message type \"" + descriptor->full_name() +
456 "\" has no field named \"" + field_name + "\".");
457 return false;
458 } else {
459 ReportWarning("Message type \"" + descriptor->full_name() +
460 "\" has no field named \"" + field_name + "\".");
461 }
462 }
463 }
464
465 // Skips unknown field.
466 if (field == NULL) {
467 GOOGLE_CHECK(allow_unknown_field_);
468 // Try to guess the type of this field.
469 // If this field is not a message, there should be a ":" between the
470 // field name and the field value and also the field value should not
471 // start with "{" or "<" which indicates the beginning of a message body.
472 // If there is no ":" or there is a "{" or "<" after ":", this field has
473 // to be a message or the input is ill-formed.
474 if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
475 return SkipFieldValue();
476 } else {
477 return SkipFieldMessage();
478 }
479 }
480
481 if (singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) {
482 // Fail if the field is not repeated and it has already been specified.
483 if (!field->is_repeated() && reflection->HasField(*message, field)) {
484 ReportError("Non-repeated field \"" + field_name +
485 "\" is specified multiple times.");
486 return false;
487 }
488 // Fail if the field is a member of a oneof and another member has already
489 // been specified.
490 const OneofDescriptor* oneof = field->containing_oneof();
491 if (oneof != NULL && reflection->HasOneof(*message, oneof)) {
492 const FieldDescriptor* other_field =
493 reflection->GetOneofFieldDescriptor(*message, oneof);
494 ReportError("Field \"" + field_name + "\" is specified along with "
495 "field \"" + other_field->name() + "\", another member "
496 "of oneof \"" + oneof->name() + "\".");
497 return false;
498 }
499 }
500
501 // Perform special handling for embedded message types.
502 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
503 // ':' is optional here.
504 TryConsume(":");
505 } else {
506 // ':' is required here.
507 DO(Consume(":"));
508 }
509
510 if (field->is_repeated() && TryConsume("[")) {
511 // Short repeated format, e.g. "foo: [1, 2, 3]"
512 while (true) {
513 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
514 // Perform special handling for embedded message types.
515 DO(ConsumeFieldMessage(message, reflection, field));
516 } else {
517 DO(ConsumeFieldValue(message, reflection, field));
518 }
519 if (TryConsume("]")) {
520 break;
521 }
522 DO(Consume(","));
523 }
524 } else if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
525 DO(ConsumeFieldMessage(message, reflection, field));
526 } else {
527 DO(ConsumeFieldValue(message, reflection, field));
528 }
529
530 // For historical reasons, fields may optionally be separated by commas or
531 // semicolons.
532 TryConsume(";") || TryConsume(",");
533
534 if (field->options().deprecated()) {
535 ReportWarning("text format contains deprecated field \""
536 + field_name + "\"");
537 }
538
539 // If a parse info tree exists, add the location for the parsed
540 // field.
541 if (parse_info_tree_ != NULL) {
542 RecordLocation(parse_info_tree_, field,
543 ParseLocation(start_line, start_column));
544 }
545
546 return true;
547 }
548
549 // Skips the next field including the field's name and value.
550 bool SkipField() {
551 string field_name;
552 if (TryConsume("[")) {
553 // Extension name.
554 DO(ConsumeFullTypeName(&field_name));
555 DO(Consume("]"));
556 } else {
557 DO(ConsumeIdentifier(&field_name));
558 }
559
560 // Try to guess the type of this field.
561 // If this field is not a message, there should be a ":" between the
562 // field name and the field value and also the field value should not
563 // start with "{" or "<" which indicates the beginning of a message body.
564 // If there is no ":" or there is a "{" or "<" after ":", this field has
565 // to be a message or the input is ill-formed.
566 if (TryConsume(":") && !LookingAt("{") && !LookingAt("<")) {
567 DO(SkipFieldValue());
568 } else {
569 DO(SkipFieldMessage());
570 }
571 // For historical reasons, fields may optionally be separated by commas or
572 // semicolons.
573 TryConsume(";") || TryConsume(",");
574 return true;
575 }
576
577 bool ConsumeFieldMessage(Message* message,
578 const Reflection* reflection,
579 const FieldDescriptor* field) {
580
581 // If the parse information tree is not NULL, create a nested one
582 // for the nested message.
583 ParseInfoTree* parent = parse_info_tree_;
584 if (parent != NULL) {
585 parse_info_tree_ = CreateNested(parent, field);
586 }
587
588 string delimiter;
589 DO(ConsumeMessageDelimiter(&delimiter));
590 if (field->is_repeated()) {
591 DO(ConsumeMessage(reflection->AddMessage(message, field), delimiter));
592 } else {
593 DO(ConsumeMessage(reflection->MutableMessage(message, field),
594 delimiter));
595 }
596
597 // Reset the parse information tree.
598 parse_info_tree_ = parent;
599 return true;
600 }
601
602 // Skips the whole body of a message including the beginning delimiter and
603 // the ending delimiter.
604 bool SkipFieldMessage() {
605 string delimiter;
606 DO(ConsumeMessageDelimiter(&delimiter));
607 while (!LookingAt(">") && !LookingAt("}")) {
608 DO(SkipField());
609 }
610 DO(Consume(delimiter));
611 return true;
612 }
613
614 bool ConsumeFieldValue(Message* message,
615 const Reflection* reflection,
616 const FieldDescriptor* field) {
617
618// Define an easy to use macro for setting fields. This macro checks
619// to see if the field is repeated (in which case we need to use the Add
620// methods or not (in which case we need to use the Set methods).
621#define SET_FIELD(CPPTYPE, VALUE) \
622 if (field->is_repeated()) { \
623 reflection->Add##CPPTYPE(message, field, VALUE); \
624 } else { \
625 reflection->Set##CPPTYPE(message, field, VALUE); \
626 } \
627
628 switch(field->cpp_type()) {
629 case FieldDescriptor::CPPTYPE_INT32: {
630 int64 value;
631 DO(ConsumeSignedInteger(&value, kint32max));
632 SET_FIELD(Int32, static_cast<int32>(value));
633 break;
634 }
635
636 case FieldDescriptor::CPPTYPE_UINT32: {
637 uint64 value;
638 DO(ConsumeUnsignedInteger(&value, kuint32max));
639 SET_FIELD(UInt32, static_cast<uint32>(value));
640 break;
641 }
642
643 case FieldDescriptor::CPPTYPE_INT64: {
644 int64 value;
645 DO(ConsumeSignedInteger(&value, kint64max));
646 SET_FIELD(Int64, value);
647 break;
648 }
649
650 case FieldDescriptor::CPPTYPE_UINT64: {
651 uint64 value;
652 DO(ConsumeUnsignedInteger(&value, kuint64max));
653 SET_FIELD(UInt64, value);
654 break;
655 }
656
657 case FieldDescriptor::CPPTYPE_FLOAT: {
658 double value;
659 DO(ConsumeDouble(&value));
660 SET_FIELD(Float, io::SafeDoubleToFloat(value));
661 break;
662 }
663
664 case FieldDescriptor::CPPTYPE_DOUBLE: {
665 double value;
666 DO(ConsumeDouble(&value));
667 SET_FIELD(Double, value);
668 break;
669 }
670
671 case FieldDescriptor::CPPTYPE_STRING: {
672 string value;
673 DO(ConsumeString(&value));
674 SET_FIELD(String, value);
675 break;
676 }
677
678 case FieldDescriptor::CPPTYPE_BOOL: {
679 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
680 uint64 value;
681 DO(ConsumeUnsignedInteger(&value, 1));
682 SET_FIELD(Bool, value);
683 } else {
684 string value;
685 DO(ConsumeIdentifier(&value));
686 if (value == "true" || value == "True" || value == "t") {
687 SET_FIELD(Bool, true);
688 } else if (value == "false" || value == "False" || value == "f") {
689 SET_FIELD(Bool, false);
690 } else {
691 ReportError("Invalid value for boolean field \"" + field->name()
692 + "\". Value: \"" + value + "\".");
693 return false;
694 }
695 }
696 break;
697 }
698
699 case FieldDescriptor::CPPTYPE_ENUM: {
700 string value;
701 const EnumDescriptor* enum_type = field->enum_type();
702 const EnumValueDescriptor* enum_value = NULL;
703
704 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
705 DO(ConsumeIdentifier(&value));
706 // Find the enumeration value.
707 enum_value = enum_type->FindValueByName(value);
708
709 } else if (LookingAt("-") ||
710 LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
711 int64 int_value;
712 DO(ConsumeSignedInteger(&int_value, kint32max));
713 value = SimpleItoa(int_value); // for error reporting
714 enum_value = enum_type->FindValueByNumber(int_value);
715 } else {
716 ReportError("Expected integer or identifier.");
717 return false;
718 }
719
720 if (enum_value == NULL) {
721 if (!allow_unknown_enum_) {
722 ReportError("Unknown enumeration value of \"" + value + "\" for "
723 "field \"" + field->name() + "\".");
724 return false;
725 } else {
726 ReportWarning("Unknown enumeration value of \"" + value + "\" for "
727 "field \"" + field->name() + "\".");
728 return true;
729 }
730 }
731
732 SET_FIELD(Enum, enum_value);
733 break;
734 }
735
736 case FieldDescriptor::CPPTYPE_MESSAGE: {
737 // We should never get here. Put here instead of a default
738 // so that if new types are added, we get a nice compiler warning.
739 GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
740 break;
741 }
742 }
743#undef SET_FIELD
744 return true;
745 }
746
747 bool SkipFieldValue() {
748 if (LookingAtType(io::Tokenizer::TYPE_STRING)) {
749 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
750 tokenizer_.Next();
751 }
752 return true;
753 }
754 // Possible field values other than string:
755 // 12345 => TYPE_INTEGER
756 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
757 // 1.2345 => TYPE_FLOAT
758 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
759 // inf => TYPE_IDENTIFIER
760 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
761 // TYPE_INTEGER => TYPE_IDENTIFIER
762 // Divides them into two group, one with TYPE_SYMBOL
763 // and the other without:
764 // Group one:
765 // 12345 => TYPE_INTEGER
766 // 1.2345 => TYPE_FLOAT
767 // inf => TYPE_IDENTIFIER
768 // TYPE_INTEGER => TYPE_IDENTIFIER
769 // Group two:
770 // -12345 => TYPE_SYMBOL + TYPE_INTEGER
771 // -1.2345 => TYPE_SYMBOL + TYPE_FLOAT
772 // -inf => TYPE_SYMBOL + TYPE_IDENTIFIER
773 // As we can see, the field value consists of an optional '-' and one of
774 // TYPE_INTEGER, TYPE_FLOAT and TYPE_IDENTIFIER.
775 bool has_minus = TryConsume("-");
776 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER) &&
777 !LookingAtType(io::Tokenizer::TYPE_FLOAT) &&
778 !LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
779 return false;
780 }
781 // Combination of '-' and TYPE_IDENTIFIER may result in an invalid field
782 // value while other combinations all generate valid values.
783 // We check if the value of this combination is valid here.
784 // TYPE_IDENTIFIER after a '-' should be one of the float values listed
785 // below:
786 // inf, inff, infinity, nan
787 if (has_minus && LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
788 string text = tokenizer_.current().text;
789 LowerString(&text);
790 if (text != "inf" &&
791 text != "infinity" &&
792 text != "nan") {
793 ReportError("Invalid float number: " + text);
794 return false;
795 }
796 }
797 tokenizer_.Next();
798 return true;
799 }
800
801 // Returns true if the current token's text is equal to that specified.
802 bool LookingAt(const string& text) {
803 return tokenizer_.current().text == text;
804 }
805
806 // Returns true if the current token's type is equal to that specified.
807 bool LookingAtType(io::Tokenizer::TokenType token_type) {
808 return tokenizer_.current().type == token_type;
809 }
810
811 // Consumes an identifier and saves its value in the identifier parameter.
812 // Returns false if the token is not of type IDENTFIER.
813 bool ConsumeIdentifier(string* identifier) {
814 if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
815 *identifier = tokenizer_.current().text;
816 tokenizer_.Next();
817 return true;
818 }
819
820 // If allow_field_numer_ or allow_unknown_field_ is true, we should able
821 // to parse integer identifiers.
822 if ((allow_field_number_ || allow_unknown_field_)
823 && LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
824 *identifier = tokenizer_.current().text;
825 tokenizer_.Next();
826 return true;
827 }
828
829 ReportError("Expected identifier.");
830 return false;
831 }
832
833 // Consume a string of form "<id1>.<id2>....<idN>".
834 bool ConsumeFullTypeName(string* name) {
835 DO(ConsumeIdentifier(name));
836 while (TryConsume(".")) {
837 string part;
838 DO(ConsumeIdentifier(&part));
839 *name += ".";
840 *name += part;
841 }
842 return true;
843 }
844
845 // Consumes a string and saves its value in the text parameter.
846 // Returns false if the token is not of type STRING.
847 bool ConsumeString(string* text) {
848 if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
849 ReportError("Expected string.");
850 return false;
851 }
852
853 text->clear();
854 while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
855 io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
856
857 tokenizer_.Next();
858 }
859
860 return true;
861 }
862
863 // Consumes a uint64 and saves its value in the value parameter.
864 // Returns false if the token is not of type INTEGER.
865 bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
866 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
867 ReportError("Expected integer.");
868 return false;
869 }
870
871 if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
872 max_value, value)) {
873 ReportError("Integer out of range.");
874 return false;
875 }
876
877 tokenizer_.Next();
878 return true;
879 }
880
881 // Consumes an int64 and saves its value in the value parameter.
882 // Note that since the tokenizer does not support negative numbers,
883 // we actually may consume an additional token (for the minus sign) in this
884 // method. Returns false if the token is not an integer
885 // (signed or otherwise).
886 bool ConsumeSignedInteger(int64* value, uint64 max_value) {
887 bool negative = false;
888
889 if (TryConsume("-")) {
890 negative = true;
891 // Two's complement always allows one more negative integer than
892 // positive.
893 ++max_value;
894 }
895
896 uint64 unsigned_value;
897
898 DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
899
900 *value = static_cast<int64>(unsigned_value);
901
902 if (negative) {
903 *value = -*value;
904 }
905
906 return true;
907 }
908
909 // Consumes a uint64 and saves its value in the value parameter.
910 // Accepts decimal numbers only, rejects hex or oct numbers.
911 bool ConsumeUnsignedDecimalInteger(uint64* value, uint64 max_value) {
912 if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
913 ReportError("Expected integer.");
914 return false;
915 }
916
917 const string& text = tokenizer_.current().text;
918 if (IsHexNumber(text) || IsOctNumber(text)) {
919 ReportError("Expect a decimal number.");
920 return false;
921 }
922
923 if (!io::Tokenizer::ParseInteger(text, max_value, value)) {
924 ReportError("Integer out of range.");
925 return false;
926 }
927
928 tokenizer_.Next();
929 return true;
930 }
931
932 // Consumes a double and saves its value in the value parameter.
933 // Note that since the tokenizer does not support negative numbers,
934 // we actually may consume an additional token (for the minus sign) in this
935 // method. Returns false if the token is not a double
936 // (signed or otherwise).
937 bool ConsumeDouble(double* value) {
938 bool negative = false;
939
940 if (TryConsume("-")) {
941 negative = true;
942 }
943
944 // A double can actually be an integer, according to the tokenizer.
945 // Therefore, we must check both cases here.
946 if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
947 // We have found an integer value for the double.
948 uint64 integer_value;
949 DO(ConsumeUnsignedDecimalInteger(&integer_value, kuint64max));
950
951 *value = static_cast<double>(integer_value);
952 } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
953 // We have found a float value for the double.
954 *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
955
956 // Mark the current token as consumed.
957 tokenizer_.Next();
958 } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
959 string text = tokenizer_.current().text;
960 LowerString(&text);
961 if (text == "inf" ||
962 text == "infinity") {
963 *value = std::numeric_limits<double>::infinity();
964 tokenizer_.Next();
965 } else if (text == "nan") {
966 *value = std::numeric_limits<double>::quiet_NaN();
967 tokenizer_.Next();
968 } else {
969 ReportError("Expected double.");
970 return false;
971 }
972 } else {
973 ReportError("Expected double.");
974 return false;
975 }
976
977 if (negative) {
978 *value = -*value;
979 }
980
981 return true;
982 }
983
984 // Consumes Any::type_url value, of form "type.googleapis.com/full.type.Name"
985 // or "type.googleprod.com/full.type.Name"
986 bool ConsumeAnyTypeUrl(string* full_type_name, string* prefix) {
987 // TODO(saito) Extend Consume() to consume multiple tokens at once, so that
988 // this code can be written as just DO(Consume(kGoogleApisTypePrefix)).
989 string url1, url2, url3;
990 DO(ConsumeIdentifier(&url1)); // type
991 DO(Consume("."));
992 DO(ConsumeIdentifier(&url2)); // googleapis
993 DO(Consume("."));
994 DO(ConsumeIdentifier(&url3)); // com
995 DO(Consume("/"));
996 DO(ConsumeFullTypeName(full_type_name));
997
998 *prefix = url1 + "." + url2 + "." + url3 + "/";
999 if (*prefix != internal::kTypeGoogleApisComPrefix &&
1000 *prefix != internal::kTypeGoogleProdComPrefix) {
1001 ReportError("TextFormat::Parser for Any supports only "
1002 "type.googleapis.com and type.googleprod.com, "
1003 "but found \"" + *prefix + "\"");
1004 return false;
1005 }
1006 return true;
1007 }
1008
1009 // A helper function for reconstructing Any::value. Consumes a text of
1010 // full_type_name, then serializes it into serialized_value. "pool" is used to
1011 // look up and create a temporary object with full_type_name.
1012 bool ConsumeAnyValue(const string& full_type_name, const DescriptorPool* pool,
1013 string* serialized_value) {
1014 const Descriptor* value_descriptor =
1015 pool->FindMessageTypeByName(full_type_name);
1016 if (value_descriptor == NULL) {
1017 ReportError("Could not find type \"" + full_type_name +
1018 "\" stored in google.protobuf.Any.");
1019 return false;
1020 }
1021 DynamicMessageFactory factory;
1022 const Message* value_prototype = factory.GetPrototype(value_descriptor);
1023 if (value_prototype == NULL) {
1024 return false;
1025 }
1026 google::protobuf::scoped_ptr<Message> value(value_prototype->New());
1027 string sub_delimiter;
1028 DO(ConsumeMessageDelimiter(&sub_delimiter));
1029 DO(ConsumeMessage(value.get(), sub_delimiter));
1030
1031 value->AppendToString(serialized_value);
1032 return true;
1033 }
1034
1035 // Consumes a token and confirms that it matches that specified in the
1036 // value parameter. Returns false if the token found does not match that
1037 // which was specified.
1038 bool Consume(const string& value) {
1039 const string& current_value = tokenizer_.current().text;
1040
1041 if (current_value != value) {
1042 ReportError("Expected \"" + value + "\", found \"" + current_value
1043 + "\".");
1044 return false;
1045 }
1046
1047 tokenizer_.Next();
1048
1049 return true;
1050 }
1051
1052 // Attempts to consume the supplied value. Returns false if a the
1053 // token found does not match the value specified.
1054 bool TryConsume(const string& value) {
1055 if (tokenizer_.current().text == value) {
1056 tokenizer_.Next();
1057 return true;
1058 } else {
1059 return false;
1060 }
1061 }
1062
1063 // An internal instance of the Tokenizer's error collector, used to
1064 // collect any base-level parse errors and feed them to the ParserImpl.
1065 class ParserErrorCollector : public io::ErrorCollector {
1066 public:
1067 explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
1068 parser_(parser) { }
1069
1070 virtual ~ParserErrorCollector() { }
1071
1072 virtual void AddError(int line, int column, const string& message) {
1073 parser_->ReportError(line, column, message);
1074 }
1075
1076 virtual void AddWarning(int line, int column, const string& message) {
1077 parser_->ReportWarning(line, column, message);
1078 }
1079
1080 private:
1081 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
1082 TextFormat::Parser::ParserImpl* parser_;
1083 };
1084
1085 io::ErrorCollector* error_collector_;
1086 TextFormat::Finder* finder_;
1087 ParseInfoTree* parse_info_tree_;
1088 ParserErrorCollector tokenizer_error_collector_;
1089 io::Tokenizer tokenizer_;
1090 const Descriptor* root_message_type_;
1091 SingularOverwritePolicy singular_overwrite_policy_;
1092 const bool allow_case_insensitive_field_;
1093 const bool allow_unknown_field_;
1094 const bool allow_unknown_enum_;
1095 const bool allow_field_number_;
1096 bool had_errors_;
1097};
1098
1099#undef DO
1100
1101// ===========================================================================
1102// Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
1103// from the Printer found in //google/protobuf/io/printer.h
1104class TextFormat::Printer::TextGenerator {
1105 public:
1106 explicit TextGenerator(io::ZeroCopyOutputStream* output,
1107 int initial_indent_level)
1108 : output_(output),
1109 buffer_(NULL),
1110 buffer_size_(0),
1111 at_start_of_line_(true),
1112 failed_(false),
1113 indent_(""),
1114 initial_indent_level_(initial_indent_level) {
1115 indent_.resize(initial_indent_level_ * 2, ' ');
1116 }
1117
1118 ~TextGenerator() {
1119 // Only BackUp() if we're sure we've successfully called Next() at least
1120 // once.
1121 if (!failed_ && buffer_size_ > 0) {
1122 output_->BackUp(buffer_size_);
1123 }
1124 }
1125
1126 // Indent text by two spaces. After calling Indent(), two spaces will be
1127 // inserted at the beginning of each line of text. Indent() may be called
1128 // multiple times to produce deeper indents.
1129 void Indent() {
1130 indent_ += " ";
1131 }
1132
1133 // Reduces the current indent level by two spaces, or crashes if the indent
1134 // level is zero.
1135 void Outdent() {
1136 if (indent_.empty() ||
1137 indent_.size() < initial_indent_level_ * 2) {
1138 GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
1139 return;
1140 }
1141
1142 indent_.resize(indent_.size() - 2);
1143 }
1144
1145 // Print text to the output stream.
1146 void Print(const string& str) {
1147 Print(str.data(), str.size());
1148 }
1149
1150 // Print text to the output stream.
1151 void Print(const char* text) {
1152 Print(text, strlen(text));
1153 }
1154
1155 // Print text to the output stream.
1156 void Print(const char* text, int size) {
1157 int pos = 0; // The number of bytes we've written so far.
1158
1159 for (int i = 0; i < size; i++) {
1160 if (text[i] == '\n') {
1161 // Saw newline. If there is more text, we may need to insert an indent
1162 // here. So, write what we have so far, including the '\n'.
1163 Write(text + pos, i - pos + 1);
1164 pos = i + 1;
1165
1166 // Setting this true will cause the next Write() to insert an indent
1167 // first.
1168 at_start_of_line_ = true;
1169 }
1170 }
1171
1172 // Write the rest.
1173 Write(text + pos, size - pos);
1174 }
1175
1176 // True if any write to the underlying stream failed. (We don't just
1177 // crash in this case because this is an I/O failure, not a programming
1178 // error.)
1179 bool failed() const { return failed_; }
1180
1181 private:
1182 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
1183
1184 void Write(const char* data, int size) {
1185 if (failed_) return;
1186 if (size == 0) return;
1187
1188 if (at_start_of_line_) {
1189 // Insert an indent.
1190 at_start_of_line_ = false;
1191 Write(indent_.data(), indent_.size());
1192 if (failed_) return;
1193 }
1194
1195 while (size > buffer_size_) {
1196 // Data exceeds space in the buffer. Copy what we can and request a
1197 // new buffer.
1198 memcpy(buffer_, data, buffer_size_);
1199 data += buffer_size_;
1200 size -= buffer_size_;
1201 void* void_buffer;
1202 failed_ = !output_->Next(&void_buffer, &buffer_size_);
1203 if (failed_) return;
1204 buffer_ = reinterpret_cast<char*>(void_buffer);
1205 }
1206
1207 // Buffer is big enough to receive the data; copy it.
1208 memcpy(buffer_, data, size);
1209 buffer_ += size;
1210 buffer_size_ -= size;
1211 }
1212
1213 io::ZeroCopyOutputStream* const output_;
1214 char* buffer_;
1215 int buffer_size_;
1216 bool at_start_of_line_;
1217 bool failed_;
1218
1219 string indent_;
1220 int initial_indent_level_;
1221};
1222
1223// ===========================================================================
1224
1225TextFormat::Finder::~Finder() {
1226}
1227
1228TextFormat::Parser::Parser()
1229 : error_collector_(NULL),
1230 finder_(NULL),
1231 parse_info_tree_(NULL),
1232 allow_partial_(false),
1233 allow_case_insensitive_field_(false),
1234 allow_unknown_field_(false),
1235 allow_unknown_enum_(false),
1236 allow_field_number_(false),
1237 allow_relaxed_whitespace_(false),
1238 allow_singular_overwrites_(false) {
1239}
1240
1241TextFormat::Parser::~Parser() {}
1242
1243bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
1244 Message* output) {
1245 output->Clear();
1246
1247 ParserImpl::SingularOverwritePolicy overwrites_policy =
1248 allow_singular_overwrites_
1249 ? ParserImpl::ALLOW_SINGULAR_OVERWRITES
1250 : ParserImpl::FORBID_SINGULAR_OVERWRITES;
1251
1252 ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1253 finder_, parse_info_tree_,
1254 overwrites_policy,
1255 allow_case_insensitive_field_, allow_unknown_field_,
1256 allow_unknown_enum_, allow_field_number_,
1257 allow_relaxed_whitespace_);
1258 return MergeUsingImpl(input, output, &parser);
1259}
1260
1261bool TextFormat::Parser::ParseFromString(const string& input,
1262 Message* output) {
1263 io::ArrayInputStream input_stream(input.data(), input.size());
1264 return Parse(&input_stream, output);
1265}
1266
1267bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
1268 Message* output) {
1269 ParserImpl parser(output->GetDescriptor(), input, error_collector_,
1270 finder_, parse_info_tree_,
1271 ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1272 allow_case_insensitive_field_, allow_unknown_field_,
1273 allow_unknown_enum_, allow_field_number_,
1274 allow_relaxed_whitespace_);
1275 return MergeUsingImpl(input, output, &parser);
1276}
1277
1278bool TextFormat::Parser::MergeFromString(const string& input,
1279 Message* output) {
1280 io::ArrayInputStream input_stream(input.data(), input.size());
1281 return Merge(&input_stream, output);
1282}
1283
1284bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* /* input */,
1285 Message* output,
1286 ParserImpl* parser_impl) {
1287 if (!parser_impl->Parse(output)) return false;
1288 if (!allow_partial_ && !output->IsInitialized()) {
1289 vector<string> missing_fields;
1290 output->FindInitializationErrors(&missing_fields);
1291 parser_impl->ReportError(-1, 0, "Message missing required fields: " +
1292 Join(missing_fields, ", "));
1293 return false;
1294 }
1295 return true;
1296}
1297
1298bool TextFormat::Parser::ParseFieldValueFromString(
1299 const string& input,
1300 const FieldDescriptor* field,
1301 Message* output) {
1302 io::ArrayInputStream input_stream(input.data(), input.size());
1303 ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
1304 finder_, parse_info_tree_,
1305 ParserImpl::ALLOW_SINGULAR_OVERWRITES,
1306 allow_case_insensitive_field_, allow_unknown_field_,
1307 allow_unknown_enum_, allow_field_number_,
1308 allow_relaxed_whitespace_);
1309 return parser.ParseField(field, output);
1310}
1311
1312/* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
1313 Message* output) {
1314 return Parser().Parse(input, output);
1315}
1316
1317/* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
1318 Message* output) {
1319 return Parser().Merge(input, output);
1320}
1321
1322/* static */ bool TextFormat::ParseFromString(const string& input,
1323 Message* output) {
1324 return Parser().ParseFromString(input, output);
1325}
1326
1327/* static */ bool TextFormat::MergeFromString(const string& input,
1328 Message* output) {
1329 return Parser().MergeFromString(input, output);
1330}
1331
1332// ===========================================================================
1333
1334// The default implementation for FieldValuePrinter. The base class just
1335// does simple formatting. That way, deriving classes could decide to fallback
1336// to that behavior.
1337TextFormat::FieldValuePrinter::FieldValuePrinter() {}
1338TextFormat::FieldValuePrinter::~FieldValuePrinter() {}
1339string TextFormat::FieldValuePrinter::PrintBool(bool val) const {
1340 return val ? "true" : "false";
1341}
1342string TextFormat::FieldValuePrinter::PrintInt32(int32 val) const {
1343 return SimpleItoa(val);
1344}
1345string TextFormat::FieldValuePrinter::PrintUInt32(uint32 val) const {
1346 return SimpleItoa(val);
1347}
1348string TextFormat::FieldValuePrinter::PrintInt64(int64 val) const {
1349 return SimpleItoa(val);
1350}
1351string TextFormat::FieldValuePrinter::PrintUInt64(uint64 val) const {
1352 return SimpleItoa(val);
1353}
1354string TextFormat::FieldValuePrinter::PrintFloat(float val) const {
1355 return SimpleFtoa(val);
1356}
1357string TextFormat::FieldValuePrinter::PrintDouble(double val) const {
1358 return SimpleDtoa(val);
1359}
1360string TextFormat::FieldValuePrinter::PrintString(const string& val) const {
1361 string printed("\"");
1362 CEscapeAndAppend(val, &printed);
1363 printed.push_back('\"');
1364 return printed;
1365}
1366string TextFormat::FieldValuePrinter::PrintBytes(const string& val) const {
1367 return PrintString(val);
1368}
1369string TextFormat::FieldValuePrinter::PrintEnum(int32 val,
1370 const string& name) const {
1371 return name;
1372}
1373string TextFormat::FieldValuePrinter::PrintFieldName(
1374 const Message& message,
1375 const Reflection* reflection,
1376 const FieldDescriptor* field) const {
1377 if (field->is_extension()) {
1378 // We special-case MessageSet elements for compatibility with proto1.
1379 if (field->containing_type()->options().message_set_wire_format()
1380 && field->type() == FieldDescriptor::TYPE_MESSAGE
1381 && field->is_optional()
1382 && field->extension_scope() == field->message_type()) {
1383 return StrCat("[", field->message_type()->full_name(), "]");
1384 } else {
1385 return StrCat("[", field->full_name(), "]");
1386 }
1387 } else if (field->type() == FieldDescriptor::TYPE_GROUP) {
1388 // Groups must be serialized with their original capitalization.
1389 return field->message_type()->name();
1390 } else {
1391 return field->name();
1392 }
1393}
1394string TextFormat::FieldValuePrinter::PrintMessageStart(
1395 const Message& message,
1396 int field_index,
1397 int field_count,
1398 bool single_line_mode) const {
1399 return single_line_mode ? " { " : " {\n";
1400}
1401string TextFormat::FieldValuePrinter::PrintMessageEnd(
1402 const Message& message,
1403 int field_index,
1404 int field_count,
1405 bool single_line_mode) const {
1406 return single_line_mode ? "} " : "}\n";
1407}
1408
1409namespace {
1410// Our own specialization: for UTF8 escaped strings.
1411class FieldValuePrinterUtf8Escaping : public TextFormat::FieldValuePrinter {
1412 public:
1413 virtual string PrintString(const string& val) const {
1414 return StrCat("\"", strings::Utf8SafeCEscape(val), "\"");
1415 }
1416 virtual string PrintBytes(const string& val) const {
1417 return TextFormat::FieldValuePrinter::PrintString(val);
1418 }
1419};
1420
1421} // namespace
1422
1423TextFormat::Printer::Printer()
1424 : initial_indent_level_(0),
1425 single_line_mode_(false),
1426 use_field_number_(false),
1427 use_short_repeated_primitives_(false),
1428 hide_unknown_fields_(false),
1429 print_message_fields_in_index_order_(false),
1430 expand_any_(false),
1431 truncate_string_field_longer_than_(0LL) {
1432 SetUseUtf8StringEscaping(false);
1433}
1434
1435TextFormat::Printer::~Printer() {
1436 STLDeleteValues(&custom_printers_);
1437}
1438
1439void TextFormat::Printer::SetUseUtf8StringEscaping(bool as_utf8) {
1440 SetDefaultFieldValuePrinter(as_utf8
1441 ? new FieldValuePrinterUtf8Escaping()
1442 : new FieldValuePrinter());
1443}
1444
1445void TextFormat::Printer::SetDefaultFieldValuePrinter(
1446 const FieldValuePrinter* printer) {
1447 default_field_value_printer_.reset(printer);
1448}
1449
1450bool TextFormat::Printer::RegisterFieldValuePrinter(
1451 const FieldDescriptor* field,
1452 const FieldValuePrinter* printer) {
1453 return field != NULL && printer != NULL &&
1454 custom_printers_.insert(std::make_pair(field, printer)).second;
1455}
1456
1457bool TextFormat::Printer::PrintToString(const Message& message,
1458 string* output) const {
1459 GOOGLE_DCHECK(output) << "output specified is NULL";
1460
1461 output->clear();
1462 io::StringOutputStream output_stream(output);
1463
1464 return Print(message, &output_stream);
1465}
1466
1467bool TextFormat::Printer::PrintUnknownFieldsToString(
1468 const UnknownFieldSet& unknown_fields,
1469 string* output) const {
1470 GOOGLE_DCHECK(output) << "output specified is NULL";
1471
1472 output->clear();
1473 io::StringOutputStream output_stream(output);
1474 return PrintUnknownFields(unknown_fields, &output_stream);
1475}
1476
1477bool TextFormat::Printer::Print(const Message& message,
1478 io::ZeroCopyOutputStream* output) const {
1479 TextGenerator generator(output, initial_indent_level_);
1480
1481 Print(message, generator);
1482
1483 // Output false if the generator failed internally.
1484 return !generator.failed();
1485}
1486
1487bool TextFormat::Printer::PrintUnknownFields(
1488 const UnknownFieldSet& unknown_fields,
1489 io::ZeroCopyOutputStream* output) const {
1490 TextGenerator generator(output, initial_indent_level_);
1491
1492 PrintUnknownFields(unknown_fields, generator);
1493
1494 // Output false if the generator failed internally.
1495 return !generator.failed();
1496}
1497
1498namespace {
1499// Comparison functor for sorting FieldDescriptors by field index.
1500struct FieldIndexSorter {
1501 bool operator()(const FieldDescriptor* left,
1502 const FieldDescriptor* right) const {
1503 return left->index() < right->index();
1504 }
1505};
1506
1507} // namespace
1508
1509bool TextFormat::Printer::PrintAny(const Message& message,
1510 TextGenerator& generator) const {
1511 const FieldDescriptor* type_url_field;
1512 const FieldDescriptor* value_field;
1513 if (!internal::GetAnyFieldDescriptors(message, &type_url_field,
1514 &value_field)) {
1515 return false;
1516 }
1517
1518 const Reflection* reflection = message.GetReflection();
1519
1520 // Extract the full type name from the type_url field.
1521 const string& type_url = reflection->GetString(message, type_url_field);
1522 string full_type_name;
1523 if (!internal::ParseAnyTypeUrl(type_url, &full_type_name)) {
1524 return false;
1525 }
1526
1527 // Print the "value" in text.
1528 const google::protobuf::Descriptor* value_descriptor =
1529 message.GetDescriptor()->file()->pool()->FindMessageTypeByName(
1530 full_type_name);
1531 if (value_descriptor == NULL) {
1532 GOOGLE_LOG(WARNING) << "Proto type " << type_url << " not found";
1533 return false;
1534 }
1535 DynamicMessageFactory factory;
1536 google::protobuf::scoped_ptr<google::protobuf::Message> value_message(
1537 factory.GetPrototype(value_descriptor)->New());
1538 string serialized_value = reflection->GetString(message, value_field);
1539 if (!value_message->ParseFromString(serialized_value)) {
1540 GOOGLE_LOG(WARNING) << type_url << ": failed to parse contents";
1541 return false;
1542 }
1543 generator.Print(StrCat("[", type_url, "]"));
1544 const FieldValuePrinter* printer = FindWithDefault(
1545 custom_printers_, value_field, default_field_value_printer_.get());
1546 generator.Print(
1547 printer->PrintMessageStart(message, -1, 0, single_line_mode_));
1548 generator.Indent();
1549 Print(*value_message, generator);
1550 generator.Outdent();
1551 generator.Print(printer->PrintMessageEnd(message, -1, 0, single_line_mode_));
1552 return true;
1553}
1554
1555void TextFormat::Printer::Print(const Message& message,
1556 TextGenerator& generator) const {
1557 const Descriptor* descriptor = message.GetDescriptor();
1558 const Reflection* reflection = message.GetReflection();
1559 if (descriptor->full_name() == internal::kAnyFullTypeName && expand_any_ &&
1560 PrintAny(message, generator)) {
1561 return;
1562 }
1563 vector<const FieldDescriptor*> fields;
1564 reflection->ListFields(message, &fields);
1565 if (print_message_fields_in_index_order_) {
1566 std::sort(fields.begin(), fields.end(), FieldIndexSorter());
1567 }
1568 for (int i = 0; i < fields.size(); i++) {
1569 PrintField(message, reflection, fields[i], generator);
1570 }
1571 if (!hide_unknown_fields_) {
1572 PrintUnknownFields(reflection->GetUnknownFields(message), generator);
1573 }
1574}
1575
1576void TextFormat::Printer::PrintFieldValueToString(
1577 const Message& message,
1578 const FieldDescriptor* field,
1579 int index,
1580 string* output) const {
1581
1582 GOOGLE_DCHECK(output) << "output specified is NULL";
1583
1584 output->clear();
1585 io::StringOutputStream output_stream(output);
1586 TextGenerator generator(&output_stream, initial_indent_level_);
1587
1588 PrintFieldValue(message, message.GetReflection(), field, index, generator);
1589}
1590
1591class MapEntryMessageComparator {
1592 public:
1593 explicit MapEntryMessageComparator(const Descriptor* descriptor)
1594 : field_(descriptor->field(0)) {}
1595
1596 bool operator()(const Message* a, const Message* b) {
1597 const Reflection* reflection = a->GetReflection();
1598 switch (field_->cpp_type()) {
1599 case FieldDescriptor::CPPTYPE_BOOL: {
1600 bool first = reflection->GetBool(*a, field_);
1601 bool second = reflection->GetBool(*b, field_);
1602 return first < second;
1603 }
1604 case FieldDescriptor::CPPTYPE_INT32: {
1605 int32 first = reflection->GetInt32(*a, field_);
1606 int32 second = reflection->GetInt32(*b, field_);
1607 return first < second;
1608 }
1609 case FieldDescriptor::CPPTYPE_INT64: {
1610 int64 first = reflection->GetInt64(*a, field_);
1611 int64 second = reflection->GetInt64(*b, field_);
1612 return first < second;
1613 }
1614 case FieldDescriptor::CPPTYPE_UINT32: {
1615 uint32 first = reflection->GetUInt32(*a, field_);
1616 uint32 second = reflection->GetUInt32(*b, field_);
1617 return first < second;
1618 }
1619 case FieldDescriptor::CPPTYPE_UINT64: {
1620 uint64 first = reflection->GetUInt64(*a, field_);
1621 uint64 second = reflection->GetUInt64(*b, field_);
1622 return first < second;
1623 }
1624 case FieldDescriptor::CPPTYPE_STRING: {
1625 string first = reflection->GetString(*a, field_);
1626 string second = reflection->GetString(*b, field_);
1627 return first < second;
1628 }
1629 default:
1630 GOOGLE_LOG(DFATAL) << "Invalid key for map field.";
1631 return true;
1632 }
1633 }
1634
1635 private:
1636 const FieldDescriptor* field_;
1637};
1638
1639void TextFormat::Printer::PrintField(const Message& message,
1640 const Reflection* reflection,
1641 const FieldDescriptor* field,
1642 TextGenerator& generator) const {
1643 if (use_short_repeated_primitives_ &&
1644 field->is_repeated() &&
1645 field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
1646 field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
1647 PrintShortRepeatedField(message, reflection, field, generator);
1648 return;
1649 }
1650
1651 int count = 0;
1652
1653 if (field->is_repeated()) {
1654 count = reflection->FieldSize(message, field);
1655 } else if (reflection->HasField(message, field)) {
1656 count = 1;
1657 }
1658
1659 std::vector<const Message*> sorted_map_field;
1660 if (field->is_map()) {
1661 const RepeatedPtrField<Message>& map_field =
1662 reflection->GetRepeatedPtrField<Message>(message, field);
1663 for (RepeatedPtrField<Message>::const_pointer_iterator it =
1664 map_field.pointer_begin();
1665 it != map_field.pointer_end(); ++it) {
1666 sorted_map_field.push_back(*it);
1667 }
1668
1669 MapEntryMessageComparator comparator(field->message_type());
1670 std::stable_sort(sorted_map_field.begin(), sorted_map_field.end(),
1671 comparator);
1672 }
1673
1674 for (int j = 0; j < count; ++j) {
1675 const int field_index = field->is_repeated() ? j : -1;
1676
1677 PrintFieldName(message, reflection, field, generator);
1678
1679 if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
1680 const FieldValuePrinter* printer = FindWithDefault(
1681 custom_printers_, field, default_field_value_printer_.get());
1682 const Message& sub_message =
1683 field->is_repeated()
1684 ? (field->is_map()
1685 ? *sorted_map_field[j]
1686 : reflection->GetRepeatedMessage(message, field, j))
1687 : reflection->GetMessage(message, field);
1688 generator.Print(
1689 printer->PrintMessageStart(
1690 sub_message, field_index, count, single_line_mode_));
1691 generator.Indent();
1692 Print(sub_message, generator);
1693 generator.Outdent();
1694 generator.Print(
1695 printer->PrintMessageEnd(
1696 sub_message, field_index, count, single_line_mode_));
1697 } else {
1698 generator.Print(": ");
1699 // Write the field value.
1700 PrintFieldValue(message, reflection, field, field_index, generator);
1701 if (single_line_mode_) {
1702 generator.Print(" ");
1703 } else {
1704 generator.Print("\n");
1705 }
1706 }
1707 }
1708}
1709
1710void TextFormat::Printer::PrintShortRepeatedField(
1711 const Message& message,
1712 const Reflection* reflection,
1713 const FieldDescriptor* field,
1714 TextGenerator& generator) const {
1715 // Print primitive repeated field in short form.
1716 PrintFieldName(message, reflection, field, generator);
1717
1718 int size = reflection->FieldSize(message, field);
1719 generator.Print(": [");
1720 for (int i = 0; i < size; i++) {
1721 if (i > 0) generator.Print(", ");
1722 PrintFieldValue(message, reflection, field, i, generator);
1723 }
1724 if (single_line_mode_) {
1725 generator.Print("] ");
1726 } else {
1727 generator.Print("]\n");
1728 }
1729}
1730
1731void TextFormat::Printer::PrintFieldName(const Message& message,
1732 const Reflection* reflection,
1733 const FieldDescriptor* field,
1734 TextGenerator& generator) const {
1735 // if use_field_number_ is true, prints field number instead
1736 // of field name.
1737 if (use_field_number_) {
1738 generator.Print(SimpleItoa(field->number()));
1739 return;
1740 }
1741
1742 const FieldValuePrinter* printer = FindWithDefault(
1743 custom_printers_, field, default_field_value_printer_.get());
1744 generator.Print(printer->PrintFieldName(message, reflection, field));
1745}
1746
1747void TextFormat::Printer::PrintFieldValue(
1748 const Message& message,
1749 const Reflection* reflection,
1750 const FieldDescriptor* field,
1751 int index,
1752 TextGenerator& generator) const {
1753 GOOGLE_DCHECK(field->is_repeated() || (index == -1))
1754 << "Index must be -1 for non-repeated fields";
1755
1756 const FieldValuePrinter* printer
1757 = FindWithDefault(custom_printers_, field,
1758 default_field_value_printer_.get());
1759
1760 switch (field->cpp_type()) {
1761#define OUTPUT_FIELD(CPPTYPE, METHOD) \
1762 case FieldDescriptor::CPPTYPE_##CPPTYPE: \
1763 generator.Print(printer->Print##METHOD(field->is_repeated() \
1764 ? reflection->GetRepeated##METHOD(message, field, index) \
1765 : reflection->Get##METHOD(message, field))); \
1766 break
1767
1768 OUTPUT_FIELD( INT32, Int32);
1769 OUTPUT_FIELD( INT64, Int64);
1770 OUTPUT_FIELD(UINT32, UInt32);
1771 OUTPUT_FIELD(UINT64, UInt64);
1772 OUTPUT_FIELD( FLOAT, Float);
1773 OUTPUT_FIELD(DOUBLE, Double);
1774 OUTPUT_FIELD( BOOL, Bool);
1775#undef OUTPUT_FIELD
1776
1777 case FieldDescriptor::CPPTYPE_STRING: {
1778 string scratch;
1779 const string& value = field->is_repeated()
1780 ? reflection->GetRepeatedStringReference(
1781 message, field, index, &scratch)
1782 : reflection->GetStringReference(message, field, &scratch);
1783 int64 size = value.size();
1784 if (truncate_string_field_longer_than_ > 0) {
1785 size = std::min(truncate_string_field_longer_than_,
1786 static_cast<int64>(value.size()));
1787 }
1788 string truncated_value(value.substr(0, size) + "...<truncated>...");
1789 const string* value_to_print = &value;
1790 if (size < value.size()) {
1791 value_to_print = &truncated_value;
1792 }
1793 if (field->type() == FieldDescriptor::TYPE_STRING) {
1794 generator.Print(printer->PrintString(*value_to_print));
1795 } else {
1796 GOOGLE_DCHECK_EQ(field->type(), FieldDescriptor::TYPE_BYTES);
1797 generator.Print(printer->PrintBytes(*value_to_print));
1798 }
1799 break;
1800 }
1801
1802 case FieldDescriptor::CPPTYPE_ENUM: {
1803 int enum_value = field->is_repeated()
1804 ? reflection->GetRepeatedEnumValue(message, field, index)
1805 : reflection->GetEnumValue(message, field);
1806 const EnumValueDescriptor* enum_desc =
1807 field->enum_type()->FindValueByNumber(enum_value);
1808 if (enum_desc != NULL) {
1809 generator.Print(printer->PrintEnum(enum_value, enum_desc->name()));
1810 } else {
1811 // Ordinarily, enum_desc should not be null, because proto2 has the
1812 // invariant that set enum field values must be in-range, but with the
1813 // new integer-based API for enums (or the RepeatedField<int> loophole),
1814 // it is possible for the user to force an unknown integer value. So we
1815 // simply use the integer value itself as the enum value name in this
1816 // case.
1817 generator.Print(printer->PrintEnum(enum_value,
1818 StringPrintf("%d", enum_value)));
1819 }
1820 break;
1821 }
1822
1823 case FieldDescriptor::CPPTYPE_MESSAGE:
1824 Print(field->is_repeated()
1825 ? reflection->GetRepeatedMessage(message, field, index)
1826 : reflection->GetMessage(message, field),
1827 generator);
1828 break;
1829 }
1830}
1831
1832/* static */ bool TextFormat::Print(const Message& message,
1833 io::ZeroCopyOutputStream* output) {
1834 return Printer().Print(message, output);
1835}
1836
1837/* static */ bool TextFormat::PrintUnknownFields(
1838 const UnknownFieldSet& unknown_fields,
1839 io::ZeroCopyOutputStream* output) {
1840 return Printer().PrintUnknownFields(unknown_fields, output);
1841}
1842
1843/* static */ bool TextFormat::PrintToString(
1844 const Message& message, string* output) {
1845 return Printer().PrintToString(message, output);
1846}
1847
1848/* static */ bool TextFormat::PrintUnknownFieldsToString(
1849 const UnknownFieldSet& unknown_fields, string* output) {
1850 return Printer().PrintUnknownFieldsToString(unknown_fields, output);
1851}
1852
1853/* static */ void TextFormat::PrintFieldValueToString(
1854 const Message& message,
1855 const FieldDescriptor* field,
1856 int index,
1857 string* output) {
1858 return Printer().PrintFieldValueToString(message, field, index, output);
1859}
1860
1861/* static */ bool TextFormat::ParseFieldValueFromString(
1862 const string& input,
1863 const FieldDescriptor* field,
1864 Message* message) {
1865 return Parser().ParseFieldValueFromString(input, field, message);
1866}
1867
1868// Prints an integer as hex with a fixed number of digits dependent on the
1869// integer type.
1870template<typename IntType>
1871static string PaddedHex(IntType value) {
1872 string result;
1873 result.reserve(sizeof(value) * 2);
1874 for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
1875 result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
1876 }
1877 return result;
1878}
1879
1880void TextFormat::Printer::PrintUnknownFields(
1881 const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
1882 for (int i = 0; i < unknown_fields.field_count(); i++) {
1883 const UnknownField& field = unknown_fields.field(i);
1884 string field_number = SimpleItoa(field.number());
1885
1886 switch (field.type()) {
1887 case UnknownField::TYPE_VARINT:
1888 generator.Print(field_number);
1889 generator.Print(": ");
1890 generator.Print(SimpleItoa(field.varint()));
1891 if (single_line_mode_) {
1892 generator.Print(" ");
1893 } else {
1894 generator.Print("\n");
1895 }
1896 break;
1897 case UnknownField::TYPE_FIXED32: {
1898 generator.Print(field_number);
1899 generator.Print(": 0x");
1900 generator.Print(
1901 StrCat(strings::Hex(field.fixed32(), strings::ZERO_PAD_8)));
1902 if (single_line_mode_) {
1903 generator.Print(" ");
1904 } else {
1905 generator.Print("\n");
1906 }
1907 break;
1908 }
1909 case UnknownField::TYPE_FIXED64: {
1910 generator.Print(field_number);
1911 generator.Print(": 0x");
1912 generator.Print(
1913 StrCat(strings::Hex(field.fixed64(), strings::ZERO_PAD_16)));
1914 if (single_line_mode_) {
1915 generator.Print(" ");
1916 } else {
1917 generator.Print("\n");
1918 }
1919 break;
1920 }
1921 case UnknownField::TYPE_LENGTH_DELIMITED: {
1922 generator.Print(field_number);
1923 const string& value = field.length_delimited();
1924 UnknownFieldSet embedded_unknown_fields;
1925 if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
1926 // This field is parseable as a Message.
1927 // So it is probably an embedded message.
1928 if (single_line_mode_) {
1929 generator.Print(" { ");
1930 } else {
1931 generator.Print(" {\n");
1932 generator.Indent();
1933 }
1934 PrintUnknownFields(embedded_unknown_fields, generator);
1935 if (single_line_mode_) {
1936 generator.Print("} ");
1937 } else {
1938 generator.Outdent();
1939 generator.Print("}\n");
1940 }
1941 } else {
1942 // This field is not parseable as a Message.
1943 // So it is probably just a plain string.
1944 string printed(": \"");
1945 CEscapeAndAppend(value, &printed);
1946 printed.append(single_line_mode_ ? "\" " : "\"\n");
1947 generator.Print(printed);
1948 }
1949 break;
1950 }
1951 case UnknownField::TYPE_GROUP:
1952 generator.Print(field_number);
1953 if (single_line_mode_) {
1954 generator.Print(" { ");
1955 } else {
1956 generator.Print(" {\n");
1957 generator.Indent();
1958 }
1959 PrintUnknownFields(field.group(), generator);
1960 if (single_line_mode_) {
1961 generator.Print("} ");
1962 } else {
1963 generator.Outdent();
1964 generator.Print("}\n");
1965 }
1966 break;
1967 }
1968 }
1969}
1970
1971} // namespace protobuf
1972} // namespace google