blob: 0f70ecde4c3f0050d79d6eea5028f813d3b77e8f [file] [log] [blame]
Brian Silverman9c614bc2016-02-15 20:20:02 -05001// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc. All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9// * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11// * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15// * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32// Based on original Protocol Buffers design by
33// Sanjay Ghemawat, Jeff Dean, and others.
34//
35// This file contains the CodedInputStream and CodedOutputStream classes,
36// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
37// and allow you to read or write individual pieces of data in various
38// formats. In particular, these implement the varint encoding for
39// integers, a simple variable-length encoding in which smaller numbers
40// take fewer bytes.
41//
42// Typically these classes will only be used internally by the protocol
43// buffer library in order to encode and decode protocol buffers. Clients
44// of the library only need to know about this class if they wish to write
45// custom message parsing or serialization procedures.
46//
47// CodedOutputStream example:
48// // Write some data to "myfile". First we write a 4-byte "magic number"
49// // to identify the file type, then write a length-delimited string. The
50// // string is composed of a varint giving the length followed by the raw
51// // bytes.
52// int fd = open("myfile", O_CREAT | O_WRONLY);
53// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
54// CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
55//
56// int magic_number = 1234;
57// char text[] = "Hello world!";
58// coded_output->WriteLittleEndian32(magic_number);
59// coded_output->WriteVarint32(strlen(text));
60// coded_output->WriteRaw(text, strlen(text));
61//
62// delete coded_output;
63// delete raw_output;
64// close(fd);
65//
66// CodedInputStream example:
67// // Read a file created by the above code.
68// int fd = open("myfile", O_RDONLY);
69// ZeroCopyInputStream* raw_input = new FileInputStream(fd);
70// CodedInputStream coded_input = new CodedInputStream(raw_input);
71//
72// coded_input->ReadLittleEndian32(&magic_number);
73// if (magic_number != 1234) {
74// cerr << "File not in expected format." << endl;
75// return;
76// }
77//
78// uint32 size;
79// coded_input->ReadVarint32(&size);
80//
81// char* text = new char[size + 1];
82// coded_input->ReadRaw(buffer, size);
83// text[size] = '\0';
84//
85// delete coded_input;
86// delete raw_input;
87// close(fd);
88//
89// cout << "Text is: " << text << endl;
90// delete [] text;
91//
92// For those who are interested, varint encoding is defined as follows:
93//
94// The encoding operates on unsigned integers of up to 64 bits in length.
95// Each byte of the encoded value has the format:
96// * bits 0-6: Seven bits of the number being encoded.
97// * bit 7: Zero if this is the last byte in the encoding (in which
98// case all remaining bits of the number are zero) or 1 if
99// more bytes follow.
100// The first byte contains the least-significant 7 bits of the number, the
101// second byte (if present) contains the next-least-significant 7 bits,
102// and so on. So, the binary number 1011000101011 would be encoded in two
103// bytes as "10101011 00101100".
104//
105// In theory, varint could be used to encode integers of any length.
106// However, for practicality we set a limit at 64 bits. The maximum encoded
107// length of a number is thus 10 bytes.
108
109#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
110#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
111
112#include <assert.h>
Austin Schuh40c16522018-10-28 20:27:54 -0700113#include <atomic>
114#include <climits>
Brian Silverman9c614bc2016-02-15 20:20:02 -0500115#include <string>
116#include <utility>
117#ifdef _MSC_VER
118 // Assuming windows is always little-endian.
119 #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
120 #define PROTOBUF_LITTLE_ENDIAN 1
121 #endif
122 #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
123 // If MSVC has "/RTCc" set, it will complain about truncating casts at
124 // runtime. This file contains some intentional truncating casts.
125 #pragma runtime_checks("c", off)
126 #endif
127#else
128 #include <sys/param.h> // __BYTE_ORDER
129 #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
130 (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
131 !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
132 #define PROTOBUF_LITTLE_ENDIAN 1
133 #endif
134#endif
135#include <google/protobuf/stubs/common.h>
Austin Schuh40c16522018-10-28 20:27:54 -0700136#include <google/protobuf/stubs/port.h>
137#include <google/protobuf/stubs/port.h>
Brian Silverman9c614bc2016-02-15 20:20:02 -0500138
139namespace google {
140
141namespace protobuf {
142
143class DescriptorPool;
144class MessageFactory;
145
Austin Schuh40c16522018-10-28 20:27:54 -0700146namespace internal { void MapTestForceDeterministic(); }
147
Brian Silverman9c614bc2016-02-15 20:20:02 -0500148namespace io {
149
150// Defined in this file.
151class CodedInputStream;
152class CodedOutputStream;
153
154// Defined in other files.
155class ZeroCopyInputStream; // zero_copy_stream.h
156class ZeroCopyOutputStream; // zero_copy_stream.h
157
158// Class which reads and decodes binary data which is composed of varint-
159// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
160// Most users will not need to deal with CodedInputStream.
161//
162// Most methods of CodedInputStream that return a bool return false if an
163// underlying I/O error occurs or if the data is malformed. Once such a
164// failure occurs, the CodedInputStream is broken and is no longer useful.
165class LIBPROTOBUF_EXPORT CodedInputStream {
166 public:
167 // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
168 explicit CodedInputStream(ZeroCopyInputStream* input);
169
170 // Create a CodedInputStream that reads from the given flat array. This is
171 // faster than using an ArrayInputStream. PushLimit(size) is implied by
172 // this constructor.
173 explicit CodedInputStream(const uint8* buffer, int size);
174
175 // Destroy the CodedInputStream and position the underlying
176 // ZeroCopyInputStream at the first unread byte. If an error occurred while
177 // reading (causing a method to return false), then the exact position of
178 // the input stream may be anywhere between the last value that was read
179 // successfully and the stream's byte limit.
180 ~CodedInputStream();
181
182 // Return true if this CodedInputStream reads from a flat array instead of
183 // a ZeroCopyInputStream.
184 inline bool IsFlat() const;
185
186 // Skips a number of bytes. Returns false if an underlying read error
187 // occurs.
Austin Schuh40c16522018-10-28 20:27:54 -0700188 inline bool Skip(int count);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500189
190 // Sets *data to point directly at the unread part of the CodedInputStream's
191 // underlying buffer, and *size to the size of that buffer, but does not
192 // advance the stream's current position. This will always either produce
193 // a non-empty buffer or return false. If the caller consumes any of
194 // this data, it should then call Skip() to skip over the consumed bytes.
195 // This may be useful for implementing external fast parsing routines for
196 // types of data not covered by the CodedInputStream interface.
197 bool GetDirectBufferPointer(const void** data, int* size);
198
199 // Like GetDirectBufferPointer, but this method is inlined, and does not
200 // attempt to Refresh() if the buffer is currently empty.
Austin Schuh40c16522018-10-28 20:27:54 -0700201 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
202 void GetDirectBufferPointerInline(const void** data, int* size);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500203
204 // Read raw bytes, copying them into the given buffer.
205 bool ReadRaw(void* buffer, int size);
206
207 // Like the above, with inlined optimizations. This should only be used
208 // by the protobuf implementation.
Austin Schuh40c16522018-10-28 20:27:54 -0700209 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
210 bool InternalReadRawInline(void* buffer, int size);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500211
212 // Like ReadRaw, but reads into a string.
Brian Silverman9c614bc2016-02-15 20:20:02 -0500213 bool ReadString(string* buffer, int size);
214 // Like the above, with inlined optimizations. This should only be used
215 // by the protobuf implementation.
Austin Schuh40c16522018-10-28 20:27:54 -0700216 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
217 bool InternalReadStringInline(string* buffer, int size);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500218
219
220 // Read a 32-bit little-endian integer.
221 bool ReadLittleEndian32(uint32* value);
222 // Read a 64-bit little-endian integer.
223 bool ReadLittleEndian64(uint64* value);
224
225 // These methods read from an externally provided buffer. The caller is
226 // responsible for ensuring that the buffer has sufficient space.
227 // Read a 32-bit little-endian integer.
228 static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
229 uint32* value);
230 // Read a 64-bit little-endian integer.
231 static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
232 uint64* value);
233
234 // Read an unsigned integer with Varint encoding, truncating to 32 bits.
235 // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
236 // it to uint32, but may be more efficient.
237 bool ReadVarint32(uint32* value);
238 // Read an unsigned integer with Varint encoding.
239 bool ReadVarint64(uint64* value);
240
Austin Schuh40c16522018-10-28 20:27:54 -0700241 // Reads a varint off the wire into an "int". This should be used for reading
242 // sizes off the wire (sizes of strings, submessages, bytes fields, etc).
243 //
244 // The value from the wire is interpreted as unsigned. If its value exceeds
245 // the representable value of an integer on this platform, instead of
246 // truncating we return false. Truncating (as performed by ReadVarint32()
247 // above) is an acceptable approach for fields representing an integer, but
248 // when we are parsing a size from the wire, truncating the value would result
249 // in us misparsing the payload.
250 bool ReadVarintSizeAsInt(int* value);
251
Brian Silverman9c614bc2016-02-15 20:20:02 -0500252 // Read a tag. This calls ReadVarint32() and returns the result, or returns
Austin Schuh40c16522018-10-28 20:27:54 -0700253 // zero (which is not a valid tag) if ReadVarint32() fails. Also, ReadTag
254 // (but not ReadTagNoLastTag) updates the last tag value, which can be checked
255 // with LastTagWas().
256 //
Brian Silverman9c614bc2016-02-15 20:20:02 -0500257 // Always inline because this is only called in one place per parse loop
258 // but it is called for every iteration of said loop, so it should be fast.
259 // GCC doesn't want to inline this by default.
Austin Schuh40c16522018-10-28 20:27:54 -0700260 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag() {
261 return last_tag_ = ReadTagNoLastTag();
262 }
263
264 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTagNoLastTag();
265
Brian Silverman9c614bc2016-02-15 20:20:02 -0500266
267 // This usually a faster alternative to ReadTag() when cutoff is a manifest
268 // constant. It does particularly well for cutoff >= 127. The first part
269 // of the return value is the tag that was read, though it can also be 0 in
270 // the cases where ReadTag() would return 0. If the second part is true
271 // then the tag is known to be in [0, cutoff]. If not, the tag either is
272 // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
273 // because that can arise in several ways, and for best performance we want
274 // to avoid an extra "is tag == 0?" check here.)
Austin Schuh40c16522018-10-28 20:27:54 -0700275 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
276 std::pair<uint32, bool> ReadTagWithCutoff(uint32 cutoff) {
277 std::pair<uint32, bool> result = ReadTagWithCutoffNoLastTag(cutoff);
278 last_tag_ = result.first;
279 return result;
280 }
281
282 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
283 std::pair<uint32, bool> ReadTagWithCutoffNoLastTag(uint32 cutoff);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500284
285 // Usually returns true if calling ReadVarint32() now would produce the given
286 // value. Will always return false if ReadVarint32() would not return the
287 // given value. If ExpectTag() returns true, it also advances past
288 // the varint. For best performance, use a compile-time constant as the
289 // parameter.
290 // Always inline because this collapses to a small number of instructions
291 // when given a constant parameter, but GCC doesn't want to inline by default.
Austin Schuh40c16522018-10-28 20:27:54 -0700292 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500293
294 // Like above, except this reads from the specified buffer. The caller is
295 // responsible for ensuring that the buffer is large enough to read a varint
296 // of the expected size. For best performance, use a compile-time constant as
297 // the expected tag parameter.
298 //
299 // Returns a pointer beyond the expected tag if it was found, or NULL if it
300 // was not.
Austin Schuh40c16522018-10-28 20:27:54 -0700301 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
302 static const uint8* ExpectTagFromArray(const uint8* buffer, uint32 expected);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500303
304 // Usually returns true if no more bytes can be read. Always returns false
305 // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
306 // call to LastTagWas() will act as if ReadTag() had been called and returned
307 // zero, and ConsumedEntireMessage() will return true.
308 bool ExpectAtEnd();
309
Austin Schuh40c16522018-10-28 20:27:54 -0700310 // If the last call to ReadTag() or ReadTagWithCutoff() returned the given
311 // value, returns true. Otherwise, returns false.
312 // ReadTagNoLastTag/ReadTagWithCutoffNoLastTag do not preserve the last
313 // returned value.
Brian Silverman9c614bc2016-02-15 20:20:02 -0500314 //
315 // This is needed because parsers for some types of embedded messages
316 // (with field type TYPE_GROUP) don't actually know that they've reached the
317 // end of a message until they see an ENDGROUP tag, which was actually part
318 // of the enclosing message. The enclosing message would like to check that
319 // tag to make sure it had the right number, so it calls LastTagWas() on
320 // return from the embedded parser to check.
321 bool LastTagWas(uint32 expected);
Austin Schuh40c16522018-10-28 20:27:54 -0700322 void SetLastTag(uint32 tag) { last_tag_ = tag; }
Brian Silverman9c614bc2016-02-15 20:20:02 -0500323
324 // When parsing message (but NOT a group), this method must be called
325 // immediately after MergeFromCodedStream() returns (if it returns true)
326 // to further verify that the message ended in a legitimate way. For
327 // example, this verifies that parsing did not end on an end-group tag.
328 // It also checks for some cases where, due to optimizations,
329 // MergeFromCodedStream() can incorrectly return true.
330 bool ConsumedEntireMessage();
331
332 // Limits ----------------------------------------------------------
333 // Limits are used when parsing length-delimited embedded messages.
334 // After the message's length is read, PushLimit() is used to prevent
335 // the CodedInputStream from reading beyond that length. Once the
336 // embedded message has been parsed, PopLimit() is called to undo the
337 // limit.
338
339 // Opaque type used with PushLimit() and PopLimit(). Do not modify
340 // values of this type yourself. The only reason that this isn't a
341 // struct with private internals is for efficiency.
342 typedef int Limit;
343
344 // Places a limit on the number of bytes that the stream may read,
345 // starting from the current position. Once the stream hits this limit,
346 // it will act like the end of the input has been reached until PopLimit()
347 // is called.
348 //
349 // As the names imply, the stream conceptually has a stack of limits. The
350 // shortest limit on the stack is always enforced, even if it is not the
351 // top limit.
352 //
353 // The value returned by PushLimit() is opaque to the caller, and must
354 // be passed unchanged to the corresponding call to PopLimit().
355 Limit PushLimit(int byte_limit);
356
357 // Pops the last limit pushed by PushLimit(). The input must be the value
358 // returned by that call to PushLimit().
359 void PopLimit(Limit limit);
360
361 // Returns the number of bytes left until the nearest limit on the
362 // stack is hit, or -1 if no limits are in place.
363 int BytesUntilLimit() const;
364
365 // Returns current position relative to the beginning of the input stream.
366 int CurrentPosition() const;
367
368 // Total Bytes Limit -----------------------------------------------
369 // To prevent malicious users from sending excessively large messages
Austin Schuh40c16522018-10-28 20:27:54 -0700370 // and causing memory exhaustion, CodedInputStream imposes a hard limit on
371 // the total number of bytes it will read.
Brian Silverman9c614bc2016-02-15 20:20:02 -0500372
373 // Sets the maximum number of bytes that this CodedInputStream will read
Austin Schuh40c16522018-10-28 20:27:54 -0700374 // before refusing to continue. To prevent servers from allocating enormous
375 // amounts of memory to hold parsed messages, the maximum message length
376 // should be limited to the shortest length that will not harm usability.
377 // The default limit is INT_MAX (~2GB) and apps should set shorter limits
378 // if possible. An error will always be printed to stderr if the limit is
379 // reached.
380 //
381 // Note: setting a limit less than the current read position is interpreted
382 // as a limit on the current position.
Brian Silverman9c614bc2016-02-15 20:20:02 -0500383 //
384 // This is unrelated to PushLimit()/PopLimit().
Austin Schuh40c16522018-10-28 20:27:54 -0700385 void SetTotalBytesLimit(int total_bytes_limit);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500386
Austin Schuh40c16522018-10-28 20:27:54 -0700387 PROTOBUF_RUNTIME_DEPRECATED(
388 "Please use the single parameter version of SetTotalBytesLimit(). The "
389 "second parameter is ignored.")
390 void SetTotalBytesLimit(int total_bytes_limit, int) {
391 SetTotalBytesLimit(total_bytes_limit);
392 }
393
394 // The Total Bytes Limit minus the Current Position, or -1 if the total bytes
395 // limit is INT_MAX.
Brian Silverman9c614bc2016-02-15 20:20:02 -0500396 int BytesUntilTotalBytesLimit() const;
397
398 // Recursion Limit -------------------------------------------------
399 // To prevent corrupt or malicious messages from causing stack overflows,
400 // we must keep track of the depth of recursion when parsing embedded
401 // messages and groups. CodedInputStream keeps track of this because it
402 // is the only object that is passed down the stack during parsing.
403
404 // Sets the maximum recursion depth. The default is 100.
405 void SetRecursionLimit(int limit);
406
407
408 // Increments the current recursion depth. Returns true if the depth is
409 // under the limit, false if it has gone over.
410 bool IncrementRecursionDepth();
411
412 // Decrements the recursion depth if possible.
413 void DecrementRecursionDepth();
414
415 // Decrements the recursion depth blindly. This is faster than
416 // DecrementRecursionDepth(). It should be used only if all previous
417 // increments to recursion depth were successful.
418 void UnsafeDecrementRecursionDepth();
419
420 // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
421 // Using this can reduce code size and complexity in some cases. The caller
422 // is expected to check that the second part of the result is non-negative (to
423 // bail out if the depth of recursion is too high) and, if all is well, to
424 // later pass the first part of the result to PopLimit() or similar.
425 std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
426 int byte_limit);
427
428 // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
429 Limit ReadLengthAndPushLimit();
430
431 // Helper that is equivalent to: {
432 // bool result = ConsumedEntireMessage();
433 // PopLimit(limit);
434 // UnsafeDecrementRecursionDepth();
435 // return result; }
436 // Using this can reduce code size and complexity in some cases.
437 // Do not use unless the current recursion depth is greater than zero.
438 bool DecrementRecursionDepthAndPopLimit(Limit limit);
439
440 // Helper that is equivalent to: {
441 // bool result = ConsumedEntireMessage();
442 // PopLimit(limit);
443 // return result; }
444 // Using this can reduce code size and complexity in some cases.
445 bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
446
447 // Extension Registry ----------------------------------------------
448 // ADVANCED USAGE: 99.9% of people can ignore this section.
449 //
450 // By default, when parsing extensions, the parser looks for extension
451 // definitions in the pool which owns the outer message's Descriptor.
452 // However, you may call SetExtensionRegistry() to provide an alternative
453 // pool instead. This makes it possible, for example, to parse a message
454 // using a generated class, but represent some extensions using
455 // DynamicMessage.
456
457 // Set the pool used to look up extensions. Most users do not need to call
458 // this as the correct pool will be chosen automatically.
459 //
460 // WARNING: It is very easy to misuse this. Carefully read the requirements
461 // below. Do not use this unless you are sure you need it. Almost no one
462 // does.
463 //
464 // Let's say you are parsing a message into message object m, and you want
465 // to take advantage of SetExtensionRegistry(). You must follow these
466 // requirements:
467 //
468 // The given DescriptorPool must contain m->GetDescriptor(). It is not
469 // sufficient for it to simply contain a descriptor that has the same name
470 // and content -- it must be the *exact object*. In other words:
471 // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
472 // m->GetDescriptor());
473 // There are two ways to satisfy this requirement:
474 // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
475 // because this is the pool that would be used anyway if you didn't call
476 // SetExtensionRegistry() at all.
477 // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
478 // "underlay". Read the documentation for DescriptorPool for more
479 // information about underlays.
480 //
481 // You must also provide a MessageFactory. This factory will be used to
482 // construct Message objects representing extensions. The factory's
483 // GetPrototype() MUST return non-NULL for any Descriptor which can be found
484 // through the provided pool.
485 //
486 // If the provided factory might return instances of protocol-compiler-
487 // generated (i.e. compiled-in) types, or if the outer message object m is
488 // a generated type, then the given factory MUST have this property: If
489 // GetPrototype() is given a Descriptor which resides in
490 // DescriptorPool::generated_pool(), the factory MUST return the same
491 // prototype which MessageFactory::generated_factory() would return. That
492 // is, given a descriptor for a generated type, the factory must return an
493 // instance of the generated class (NOT DynamicMessage). However, when
494 // given a descriptor for a type that is NOT in generated_pool, the factory
495 // is free to return any implementation.
496 //
497 // The reason for this requirement is that generated sub-objects may be
498 // accessed via the standard (non-reflection) extension accessor methods,
499 // and these methods will down-cast the object to the generated class type.
500 // If the object is not actually of that type, the results would be undefined.
501 // On the other hand, if an extension is not compiled in, then there is no
502 // way the code could end up accessing it via the standard accessors -- the
503 // only way to access the extension is via reflection. When using reflection,
504 // DynamicMessage and generated messages are indistinguishable, so it's fine
505 // if these objects are represented using DynamicMessage.
506 //
507 // Using DynamicMessageFactory on which you have called
508 // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
509 // above requirement.
510 //
511 // If either pool or factory is NULL, both must be NULL.
512 //
513 // Note that this feature is ignored when parsing "lite" messages as they do
514 // not have descriptors.
515 void SetExtensionRegistry(const DescriptorPool* pool,
516 MessageFactory* factory);
517
518 // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
519 // has been provided.
520 const DescriptorPool* GetExtensionPool();
521
522 // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
523 // factory has been provided.
524 MessageFactory* GetExtensionFactory();
525
526 private:
527 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
528
529 const uint8* buffer_;
530 const uint8* buffer_end_; // pointer to the end of the buffer.
531 ZeroCopyInputStream* input_;
532 int total_bytes_read_; // total bytes read from input_, including
533 // the current buffer
534
535 // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
536 // so that we can BackUp() on destruction.
537 int overflow_bytes_;
538
539 // LastTagWas() stuff.
540 uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
541
542 // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
543 // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
544 // reach the end of a message and attempt to read another tag.
545 bool legitimate_message_end_;
546
547 // See EnableAliasing().
548 bool aliasing_enabled_;
549
550 // Limits
551 Limit current_limit_; // if position = -1, no limit is applied
552
553 // For simplicity, if the current buffer crosses a limit (either a normal
554 // limit created by PushLimit() or the total bytes limit), buffer_size_
555 // only tracks the number of bytes before that limit. This field
556 // contains the number of bytes after it. Note that this implies that if
557 // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
558 // hit a limit. However, if both are zero, it doesn't necessarily mean
559 // we aren't at a limit -- the buffer may have ended exactly at the limit.
560 int buffer_size_after_limit_;
561
562 // Maximum number of bytes to read, period. This is unrelated to
563 // current_limit_. Set using SetTotalBytesLimit().
564 int total_bytes_limit_;
565
Brian Silverman9c614bc2016-02-15 20:20:02 -0500566 // Current recursion budget, controlled by IncrementRecursionDepth() and
567 // similar. Starts at recursion_limit_ and goes down: if this reaches
568 // -1 we are over budget.
569 int recursion_budget_;
570 // Recursion depth limit, set by SetRecursionLimit().
571 int recursion_limit_;
572
573 // See SetExtensionRegistry().
574 const DescriptorPool* extension_pool_;
575 MessageFactory* extension_factory_;
576
577 // Private member functions.
578
Austin Schuh40c16522018-10-28 20:27:54 -0700579 // Fallback when Skip() goes past the end of the current buffer.
580 bool SkipFallback(int count, int original_buffer_size);
581
Brian Silverman9c614bc2016-02-15 20:20:02 -0500582 // Advance the buffer by a given number of bytes.
583 void Advance(int amount);
584
585 // Back up input_ to the current buffer position.
586 void BackUpInputToCurrentPosition();
587
588 // Recomputes the value of buffer_size_after_limit_. Must be called after
589 // current_limit_ or total_bytes_limit_ changes.
590 void RecomputeBufferLimits();
591
592 // Writes an error message saying that we hit total_bytes_limit_.
593 void PrintTotalBytesLimitError();
594
595 // Called when the buffer runs out to request more data. Implies an
596 // Advance(BufferSize()).
597 bool Refresh();
598
599 // When parsing varints, we optimize for the common case of small values, and
600 // then optimize for the case when the varint fits within the current buffer
601 // piece. The Fallback method is used when we can't use the one-byte
602 // optimization. The Slow method is yet another fallback when the buffer is
603 // not large enough. Making the slow path out-of-line speeds up the common
604 // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
605 // message crosses multiple buffers. Note: ReadVarint32Fallback() and
606 // ReadVarint64Fallback() are called frequently and generally not inlined, so
607 // they have been optimized to avoid "out" parameters. The former returns -1
608 // if it fails and the uint32 it read otherwise. The latter has a bool
609 // indicating success or failure as part of its return type.
610 int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
Austin Schuh40c16522018-10-28 20:27:54 -0700611 int ReadVarintSizeAsIntFallback();
Brian Silverman9c614bc2016-02-15 20:20:02 -0500612 std::pair<uint64, bool> ReadVarint64Fallback();
613 bool ReadVarint32Slow(uint32* value);
614 bool ReadVarint64Slow(uint64* value);
Austin Schuh40c16522018-10-28 20:27:54 -0700615 int ReadVarintSizeAsIntSlow();
Brian Silverman9c614bc2016-02-15 20:20:02 -0500616 bool ReadLittleEndian32Fallback(uint32* value);
617 bool ReadLittleEndian64Fallback(uint64* value);
Austin Schuh40c16522018-10-28 20:27:54 -0700618
Brian Silverman9c614bc2016-02-15 20:20:02 -0500619 // Fallback/slow methods for reading tags. These do not update last_tag_,
620 // but will set legitimate_message_end_ if we are at the end of the input
621 // stream.
622 uint32 ReadTagFallback(uint32 first_byte_or_zero);
623 uint32 ReadTagSlow();
624 bool ReadStringFallback(string* buffer, int size);
625
626 // Return the size of the buffer.
627 int BufferSize() const;
628
Austin Schuh40c16522018-10-28 20:27:54 -0700629 static const int kDefaultTotalBytesLimit = INT_MAX;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500630
631 static int default_recursion_limit_; // 100 by default.
632};
633
634// Class which encodes and writes binary data which is composed of varint-
635// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
636// Most users will not need to deal with CodedOutputStream.
637//
638// Most methods of CodedOutputStream which return a bool return false if an
639// underlying I/O error occurs. Once such a failure occurs, the
640// CodedOutputStream is broken and is no longer useful. The Write* methods do
641// not return the stream status, but will invalidate the stream if an error
642// occurs. The client can probe HadError() to determine the status.
643//
644// Note that every method of CodedOutputStream which writes some data has
645// a corresponding static "ToArray" version. These versions write directly
646// to the provided buffer, returning a pointer past the last written byte.
647// They require that the buffer has sufficient capacity for the encoded data.
648// This allows an optimization where we check if an output stream has enough
649// space for an entire message before we start writing and, if there is, we
650// call only the ToArray methods to avoid doing bound checks for each
651// individual value.
652// i.e., in the example above:
653//
654// CodedOutputStream coded_output = new CodedOutputStream(raw_output);
655// int magic_number = 1234;
656// char text[] = "Hello world!";
657//
658// int coded_size = sizeof(magic_number) +
659// CodedOutputStream::VarintSize32(strlen(text)) +
660// strlen(text);
661//
662// uint8* buffer =
663// coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
664// if (buffer != NULL) {
665// // The output stream has enough space in the buffer: write directly to
666// // the array.
667// buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
668// buffer);
669// buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
670// buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
671// } else {
672// // Make bound-checked writes, which will ask the underlying stream for
673// // more space as needed.
674// coded_output->WriteLittleEndian32(magic_number);
675// coded_output->WriteVarint32(strlen(text));
676// coded_output->WriteRaw(text, strlen(text));
677// }
678//
679// delete coded_output;
680class LIBPROTOBUF_EXPORT CodedOutputStream {
681 public:
682 // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
683 explicit CodedOutputStream(ZeroCopyOutputStream* output);
684 CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
685
686 // Destroy the CodedOutputStream and position the underlying
687 // ZeroCopyOutputStream immediately after the last byte written.
688 ~CodedOutputStream();
689
690 // Trims any unused space in the underlying buffer so that its size matches
691 // the number of bytes written by this stream. The underlying buffer will
692 // automatically be trimmed when this stream is destroyed; this call is only
693 // necessary if the underlying buffer is accessed *before* the stream is
694 // destroyed.
695 void Trim();
696
697 // Skips a number of bytes, leaving the bytes unmodified in the underlying
698 // buffer. Returns false if an underlying write error occurs. This is
699 // mainly useful with GetDirectBufferPointer().
700 bool Skip(int count);
701
702 // Sets *data to point directly at the unwritten part of the
703 // CodedOutputStream's underlying buffer, and *size to the size of that
704 // buffer, but does not advance the stream's current position. This will
705 // always either produce a non-empty buffer or return false. If the caller
706 // writes any data to this buffer, it should then call Skip() to skip over
707 // the consumed bytes. This may be useful for implementing external fast
708 // serialization routines for types of data not covered by the
709 // CodedOutputStream interface.
710 bool GetDirectBufferPointer(void** data, int* size);
711
712 // If there are at least "size" bytes available in the current buffer,
713 // returns a pointer directly into the buffer and advances over these bytes.
714 // The caller may then write directly into this buffer (e.g. using the
715 // *ToArray static methods) rather than go through CodedOutputStream. If
716 // there are not enough bytes available, returns NULL. The return pointer is
717 // invalidated as soon as any other non-const method of CodedOutputStream
718 // is called.
719 inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
720
721 // Write raw bytes, copying them from the given buffer.
722 void WriteRaw(const void* buffer, int size);
723 // Like WriteRaw() but will try to write aliased data if aliasing is
724 // turned on.
725 void WriteRawMaybeAliased(const void* data, int size);
726 // Like WriteRaw() but writing directly to the target array.
727 // This is _not_ inlined, as the compiler often optimizes memcpy into inline
728 // copy loops. Since this gets called by every field with string or bytes
729 // type, inlining may lead to a significant amount of code bloat, with only a
730 // minor performance gain.
731 static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
732
733 // Equivalent to WriteRaw(str.data(), str.size()).
734 void WriteString(const string& str);
735 // Like WriteString() but writing directly to the target array.
736 static uint8* WriteStringToArray(const string& str, uint8* target);
737 // Write the varint-encoded size of str followed by str.
738 static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
739
740
741 // Instructs the CodedOutputStream to allow the underlying
742 // ZeroCopyOutputStream to hold pointers to the original structure instead of
743 // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
744 // underlying stream does not support aliasing, then enabling it has no
745 // affect. For now, this only affects the behavior of
746 // WriteRawMaybeAliased().
747 //
748 // NOTE: It is caller's responsibility to ensure that the chunk of memory
749 // remains live until all of the data has been consumed from the stream.
750 void EnableAliasing(bool enabled);
751
752 // Write a 32-bit little-endian integer.
753 void WriteLittleEndian32(uint32 value);
754 // Like WriteLittleEndian32() but writing directly to the target array.
755 static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
756 // Write a 64-bit little-endian integer.
757 void WriteLittleEndian64(uint64 value);
758 // Like WriteLittleEndian64() but writing directly to the target array.
759 static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
760
761 // Write an unsigned integer with Varint encoding. Writing a 32-bit value
762 // is equivalent to casting it to uint64 and writing it as a 64-bit value,
763 // but may be more efficient.
764 void WriteVarint32(uint32 value);
765 // Like WriteVarint32() but writing directly to the target array.
766 static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
767 // Write an unsigned integer with Varint encoding.
768 void WriteVarint64(uint64 value);
769 // Like WriteVarint64() but writing directly to the target array.
770 static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
771
772 // Equivalent to WriteVarint32() except when the value is negative,
773 // in which case it must be sign-extended to a full 10 bytes.
774 void WriteVarint32SignExtended(int32 value);
775 // Like WriteVarint32SignExtended() but writing directly to the target array.
776 static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
777
778 // This is identical to WriteVarint32(), but optimized for writing tags.
779 // In particular, if the input is a compile-time constant, this method
780 // compiles down to a couple instructions.
781 // Always inline because otherwise the aformentioned optimization can't work,
782 // but GCC by default doesn't want to inline this.
783 void WriteTag(uint32 value);
784 // Like WriteTag() but writing directly to the target array.
Austin Schuh40c16522018-10-28 20:27:54 -0700785 GOOGLE_PROTOBUF_ATTRIBUTE_ALWAYS_INLINE
786 static uint8* WriteTagToArray(uint32 value, uint8* target);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500787
788 // Returns the number of bytes needed to encode the given value as a varint.
Austin Schuh40c16522018-10-28 20:27:54 -0700789 static size_t VarintSize32(uint32 value);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500790 // Returns the number of bytes needed to encode the given value as a varint.
Austin Schuh40c16522018-10-28 20:27:54 -0700791 static size_t VarintSize64(uint64 value);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500792
793 // If negative, 10 bytes. Otheriwse, same as VarintSize32().
Austin Schuh40c16522018-10-28 20:27:54 -0700794 static size_t VarintSize32SignExtended(int32 value);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500795
796 // Compile-time equivalent of VarintSize32().
797 template <uint32 Value>
798 struct StaticVarintSize32 {
Austin Schuh40c16522018-10-28 20:27:54 -0700799 static const size_t value =
Brian Silverman9c614bc2016-02-15 20:20:02 -0500800 (Value < (1 << 7))
801 ? 1
802 : (Value < (1 << 14))
803 ? 2
804 : (Value < (1 << 21))
805 ? 3
806 : (Value < (1 << 28))
807 ? 4
808 : 5;
809 };
810
811 // Returns the total number of bytes written since this object was created.
812 inline int ByteCount() const;
813
814 // Returns true if there was an underlying I/O error since this object was
815 // created.
816 bool HadError() const { return had_error_; }
817
Austin Schuh40c16522018-10-28 20:27:54 -0700818 // Deterministic serialization, if requested, guarantees that for a given
819 // binary, equal messages will always be serialized to the same bytes. This
820 // implies:
821 // . repeated serialization of a message will return the same bytes
822 // . different processes of the same binary (which may be executing on
823 // different machines) will serialize equal messages to the same bytes.
824 //
825 // Note the deterministic serialization is NOT canonical across languages; it
826 // is also unstable across different builds with schema changes due to unknown
827 // fields. Users who need canonical serialization, e.g., persistent storage in
828 // a canonical form, fingerprinting, etc., should define their own
829 // canonicalization specification and implement the serializer using
830 // reflection APIs rather than relying on this API.
831 //
832 // If deterministic serialization is requested, the serializer will
833 // sort map entries by keys in lexicographical order or numerical order.
834 // (This is an implementation detail and may subject to change.)
835 //
836 // There are two ways to determine whether serialization should be
837 // deterministic for this CodedOutputStream. If SetSerializationDeterministic
838 // has not yet been called, then the default comes from the global default,
839 // which is false, until SetDefaultSerializationDeterministic has been called.
840 // Otherwise, SetSerializationDeterministic has been called, and the last
841 // value passed to it is all that matters.
842 void SetSerializationDeterministic(bool value) {
843 is_serialization_deterministic_ = value;
844 }
845 // See above. Also, note that users of this CodedOutputStream may need to
846 // call IsSerializationDeterministic() to serialize in the intended way. This
847 // CodedOutputStream cannot enforce a desire for deterministic serialization
848 // by itself.
849 bool IsSerializationDeterministic() const {
850 return is_serialization_deterministic_;
851 }
852
853 static bool IsDefaultSerializationDeterministic() {
854 return default_serialization_deterministic_.load(std::memory_order_relaxed) != 0;
855 }
856
Brian Silverman9c614bc2016-02-15 20:20:02 -0500857 private:
858 GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
859
860 ZeroCopyOutputStream* output_;
861 uint8* buffer_;
862 int buffer_size_;
863 int total_bytes_; // Sum of sizes of all buffers seen so far.
864 bool had_error_; // Whether an error occurred during output.
865 bool aliasing_enabled_; // See EnableAliasing().
Austin Schuh40c16522018-10-28 20:27:54 -0700866 bool is_serialization_deterministic_;
867 static std::atomic<bool> default_serialization_deterministic_;
Brian Silverman9c614bc2016-02-15 20:20:02 -0500868
869 // Advance the buffer by a given number of bytes.
870 void Advance(int amount);
871
872 // Called when the buffer runs out to request more data. Implies an
873 // Advance(buffer_size_).
874 bool Refresh();
875
876 // Like WriteRaw() but may avoid copying if the underlying
877 // ZeroCopyOutputStream supports it.
878 void WriteAliasedRaw(const void* buffer, int size);
879
880 // If this write might cross the end of the buffer, we compose the bytes first
881 // then use WriteRaw().
882 void WriteVarint32SlowPath(uint32 value);
Austin Schuh40c16522018-10-28 20:27:54 -0700883 void WriteVarint64SlowPath(uint64 value);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500884
Austin Schuh40c16522018-10-28 20:27:54 -0700885 // See above. Other projects may use "friend" to allow them to call this.
886 // After SetDefaultSerializationDeterministic() completes, all protocol
887 // buffer serializations will be deterministic by default. Thread safe.
888 // However, the meaning of "after" is subtle here: to be safe, each thread
889 // that wants deterministic serialization by default needs to call
890 // SetDefaultSerializationDeterministic() or ensure on its own that another
891 // thread has done so.
892 friend void ::google::protobuf::internal::MapTestForceDeterministic();
893 static void SetDefaultSerializationDeterministic() {
894 default_serialization_deterministic_.store(true, std::memory_order_relaxed);
895 }
Brian Silverman9c614bc2016-02-15 20:20:02 -0500896};
897
898// inline methods ====================================================
899// The vast majority of varints are only one byte. These inline
900// methods optimize for that case.
901
902inline bool CodedInputStream::ReadVarint32(uint32* value) {
903 uint32 v = 0;
904 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
905 v = *buffer_;
906 if (v < 0x80) {
907 *value = v;
908 Advance(1);
909 return true;
910 }
911 }
912 int64 result = ReadVarint32Fallback(v);
913 *value = static_cast<uint32>(result);
914 return result >= 0;
915}
916
917inline bool CodedInputStream::ReadVarint64(uint64* value) {
918 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
919 *value = *buffer_;
920 Advance(1);
921 return true;
922 }
923 std::pair<uint64, bool> p = ReadVarint64Fallback();
924 *value = p.first;
925 return p.second;
926}
927
Austin Schuh40c16522018-10-28 20:27:54 -0700928inline bool CodedInputStream::ReadVarintSizeAsInt(int* value) {
929 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
930 int v = *buffer_;
931 if (v < 0x80) {
932 *value = v;
933 Advance(1);
934 return true;
935 }
936 }
937 *value = ReadVarintSizeAsIntFallback();
938 return *value >= 0;
939}
940
Brian Silverman9c614bc2016-02-15 20:20:02 -0500941// static
942inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
943 const uint8* buffer,
944 uint32* value) {
945#if defined(PROTOBUF_LITTLE_ENDIAN)
946 memcpy(value, buffer, sizeof(*value));
947 return buffer + sizeof(*value);
948#else
949 *value = (static_cast<uint32>(buffer[0]) ) |
950 (static_cast<uint32>(buffer[1]) << 8) |
951 (static_cast<uint32>(buffer[2]) << 16) |
952 (static_cast<uint32>(buffer[3]) << 24);
953 return buffer + sizeof(*value);
954#endif
955}
956// static
957inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
958 const uint8* buffer,
959 uint64* value) {
960#if defined(PROTOBUF_LITTLE_ENDIAN)
961 memcpy(value, buffer, sizeof(*value));
962 return buffer + sizeof(*value);
963#else
964 uint32 part0 = (static_cast<uint32>(buffer[0]) ) |
965 (static_cast<uint32>(buffer[1]) << 8) |
966 (static_cast<uint32>(buffer[2]) << 16) |
967 (static_cast<uint32>(buffer[3]) << 24);
968 uint32 part1 = (static_cast<uint32>(buffer[4]) ) |
969 (static_cast<uint32>(buffer[5]) << 8) |
970 (static_cast<uint32>(buffer[6]) << 16) |
971 (static_cast<uint32>(buffer[7]) << 24);
972 *value = static_cast<uint64>(part0) |
973 (static_cast<uint64>(part1) << 32);
974 return buffer + sizeof(*value);
975#endif
976}
977
978inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
979#if defined(PROTOBUF_LITTLE_ENDIAN)
980 if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
Austin Schuh40c16522018-10-28 20:27:54 -0700981 buffer_ = ReadLittleEndian32FromArray(buffer_, value);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500982 return true;
983 } else {
984 return ReadLittleEndian32Fallback(value);
985 }
986#else
987 return ReadLittleEndian32Fallback(value);
988#endif
989}
990
991inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
992#if defined(PROTOBUF_LITTLE_ENDIAN)
993 if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
Austin Schuh40c16522018-10-28 20:27:54 -0700994 buffer_ = ReadLittleEndian64FromArray(buffer_, value);
Brian Silverman9c614bc2016-02-15 20:20:02 -0500995 return true;
996 } else {
997 return ReadLittleEndian64Fallback(value);
998 }
999#else
1000 return ReadLittleEndian64Fallback(value);
1001#endif
1002}
1003
Austin Schuh40c16522018-10-28 20:27:54 -07001004inline uint32 CodedInputStream::ReadTagNoLastTag() {
Brian Silverman9c614bc2016-02-15 20:20:02 -05001005 uint32 v = 0;
1006 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
1007 v = *buffer_;
1008 if (v < 0x80) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05001009 Advance(1);
1010 return v;
1011 }
1012 }
Austin Schuh40c16522018-10-28 20:27:54 -07001013 v = ReadTagFallback(v);
1014 return v;
Brian Silverman9c614bc2016-02-15 20:20:02 -05001015}
1016
Austin Schuh40c16522018-10-28 20:27:54 -07001017inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoffNoLastTag(
Brian Silverman9c614bc2016-02-15 20:20:02 -05001018 uint32 cutoff) {
1019 // In performance-sensitive code we can expect cutoff to be a compile-time
1020 // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
1021 // compile time.
1022 uint32 first_byte_or_zero = 0;
1023 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
1024 // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
1025 // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
1026 // is large enough then is it better to check for the two-byte case first?
1027 first_byte_or_zero = buffer_[0];
1028 if (static_cast<int8>(buffer_[0]) > 0) {
1029 const uint32 kMax1ByteVarint = 0x7f;
Austin Schuh40c16522018-10-28 20:27:54 -07001030 uint32 tag = buffer_[0];
Brian Silverman9c614bc2016-02-15 20:20:02 -05001031 Advance(1);
1032 return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
1033 }
1034 // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
1035 // and tag is two bytes. The latter is tested by bitwise-and-not of the
1036 // first byte and the second byte.
Austin Schuh40c16522018-10-28 20:27:54 -07001037 if (cutoff >= 0x80 && GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
Brian Silverman9c614bc2016-02-15 20:20:02 -05001038 GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
1039 const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
Austin Schuh40c16522018-10-28 20:27:54 -07001040 uint32 tag = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001041 Advance(2);
1042 // It might make sense to test for tag == 0 now, but it is so rare that
1043 // that we don't bother. A varint-encoded 0 should be one byte unless
1044 // the encoder lost its mind. The second part of the return value of
1045 // this function is allowed to be either true or false if the tag is 0,
1046 // so we don't have to check for tag == 0. We may need to check whether
1047 // it exceeds cutoff.
1048 bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
1049 return std::make_pair(tag, at_or_below_cutoff);
1050 }
1051 }
1052 // Slow path
Austin Schuh40c16522018-10-28 20:27:54 -07001053 const uint32 tag = ReadTagFallback(first_byte_or_zero);
1054 return std::make_pair(tag, static_cast<uint32>(tag - 1) < cutoff);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001055}
1056
1057inline bool CodedInputStream::LastTagWas(uint32 expected) {
1058 return last_tag_ == expected;
1059}
1060
1061inline bool CodedInputStream::ConsumedEntireMessage() {
1062 return legitimate_message_end_;
1063}
1064
1065inline bool CodedInputStream::ExpectTag(uint32 expected) {
1066 if (expected < (1 << 7)) {
1067 if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
1068 Advance(1);
1069 return true;
1070 } else {
1071 return false;
1072 }
1073 } else if (expected < (1 << 14)) {
1074 if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
1075 buffer_[0] == static_cast<uint8>(expected | 0x80) &&
1076 buffer_[1] == static_cast<uint8>(expected >> 7)) {
1077 Advance(2);
1078 return true;
1079 } else {
1080 return false;
1081 }
1082 } else {
1083 // Don't bother optimizing for larger values.
1084 return false;
1085 }
1086}
1087
1088inline const uint8* CodedInputStream::ExpectTagFromArray(
1089 const uint8* buffer, uint32 expected) {
1090 if (expected < (1 << 7)) {
1091 if (buffer[0] == expected) {
1092 return buffer + 1;
1093 }
1094 } else if (expected < (1 << 14)) {
1095 if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
1096 buffer[1] == static_cast<uint8>(expected >> 7)) {
1097 return buffer + 2;
1098 }
1099 }
1100 return NULL;
1101}
1102
1103inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
1104 int* size) {
1105 *data = buffer_;
1106 *size = static_cast<int>(buffer_end_ - buffer_);
1107}
1108
1109inline bool CodedInputStream::ExpectAtEnd() {
1110 // If we are at a limit we know no more bytes can be read. Otherwise, it's
1111 // hard to say without calling Refresh(), and we'd rather not do that.
1112
1113 if (buffer_ == buffer_end_ &&
1114 ((buffer_size_after_limit_ != 0) ||
1115 (total_bytes_read_ == current_limit_))) {
1116 last_tag_ = 0; // Pretend we called ReadTag()...
1117 legitimate_message_end_ = true; // ... and it hit EOF.
1118 return true;
1119 } else {
1120 return false;
1121 }
1122}
1123
1124inline int CodedInputStream::CurrentPosition() const {
1125 return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
1126}
1127
1128inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
1129 if (buffer_size_ < size) {
1130 return NULL;
1131 } else {
1132 uint8* result = buffer_;
1133 Advance(size);
1134 return result;
1135 }
1136}
1137
1138inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
1139 uint8* target) {
1140 while (value >= 0x80) {
1141 *target = static_cast<uint8>(value | 0x80);
1142 value >>= 7;
1143 ++target;
1144 }
1145 *target = static_cast<uint8>(value);
1146 return target + 1;
1147}
1148
Austin Schuh40c16522018-10-28 20:27:54 -07001149inline uint8* CodedOutputStream::WriteVarint64ToArray(uint64 value,
1150 uint8* target) {
1151 while (value >= 0x80) {
1152 *target = static_cast<uint8>(value | 0x80);
1153 value >>= 7;
1154 ++target;
Brian Silverman9c614bc2016-02-15 20:20:02 -05001155 }
Austin Schuh40c16522018-10-28 20:27:54 -07001156 *target = static_cast<uint8>(value);
1157 return target + 1;
1158}
1159
1160inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
1161 WriteVarint64(static_cast<uint64>(value));
Brian Silverman9c614bc2016-02-15 20:20:02 -05001162}
1163
1164inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
1165 int32 value, uint8* target) {
Austin Schuh40c16522018-10-28 20:27:54 -07001166 return WriteVarint64ToArray(static_cast<uint64>(value), target);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001167}
1168
1169inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
1170 uint8* target) {
1171#if defined(PROTOBUF_LITTLE_ENDIAN)
1172 memcpy(target, &value, sizeof(value));
1173#else
1174 target[0] = static_cast<uint8>(value);
1175 target[1] = static_cast<uint8>(value >> 8);
1176 target[2] = static_cast<uint8>(value >> 16);
1177 target[3] = static_cast<uint8>(value >> 24);
1178#endif
1179 return target + sizeof(value);
1180}
1181
1182inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
1183 uint8* target) {
1184#if defined(PROTOBUF_LITTLE_ENDIAN)
1185 memcpy(target, &value, sizeof(value));
1186#else
1187 uint32 part0 = static_cast<uint32>(value);
1188 uint32 part1 = static_cast<uint32>(value >> 32);
1189
1190 target[0] = static_cast<uint8>(part0);
1191 target[1] = static_cast<uint8>(part0 >> 8);
1192 target[2] = static_cast<uint8>(part0 >> 16);
1193 target[3] = static_cast<uint8>(part0 >> 24);
1194 target[4] = static_cast<uint8>(part1);
1195 target[5] = static_cast<uint8>(part1 >> 8);
1196 target[6] = static_cast<uint8>(part1 >> 16);
1197 target[7] = static_cast<uint8>(part1 >> 24);
1198#endif
1199 return target + sizeof(value);
1200}
1201
1202inline void CodedOutputStream::WriteVarint32(uint32 value) {
1203 if (buffer_size_ >= 5) {
1204 // Fast path: We have enough bytes left in the buffer to guarantee that
1205 // this write won't cross the end, so we can skip the checks.
1206 uint8* target = buffer_;
1207 uint8* end = WriteVarint32ToArray(value, target);
Austin Schuh40c16522018-10-28 20:27:54 -07001208 int size = static_cast<int>(end - target);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001209 Advance(size);
1210 } else {
1211 WriteVarint32SlowPath(value);
1212 }
1213}
1214
Austin Schuh40c16522018-10-28 20:27:54 -07001215inline void CodedOutputStream::WriteVarint64(uint64 value) {
1216 if (buffer_size_ >= 10) {
1217 // Fast path: We have enough bytes left in the buffer to guarantee that
1218 // this write won't cross the end, so we can skip the checks.
1219 uint8* target = buffer_;
1220 uint8* end = WriteVarint64ToArray(value, target);
1221 int size = static_cast<int>(end - target);
1222 Advance(size);
1223 } else {
1224 WriteVarint64SlowPath(value);
1225 }
1226}
1227
Brian Silverman9c614bc2016-02-15 20:20:02 -05001228inline void CodedOutputStream::WriteTag(uint32 value) {
1229 WriteVarint32(value);
1230}
1231
1232inline uint8* CodedOutputStream::WriteTagToArray(
1233 uint32 value, uint8* target) {
1234 return WriteVarint32ToArray(value, target);
1235}
1236
Austin Schuh40c16522018-10-28 20:27:54 -07001237inline size_t CodedOutputStream::VarintSize32(uint32 value) {
1238 // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
1239 // Use an explicit multiplication to implement the divide of
1240 // a number in the 1..31 range.
1241 // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
1242 // undefined.
1243 uint32 log2value = Bits::Log2FloorNonZero(value | 0x1);
1244 return static_cast<size_t>((log2value * 9 + 73) / 64);
Brian Silverman9c614bc2016-02-15 20:20:02 -05001245}
1246
Austin Schuh40c16522018-10-28 20:27:54 -07001247inline size_t CodedOutputStream::VarintSize64(uint64 value) {
1248 // This computes value == 0 ? 1 : floor(log2(value)) / 7 + 1
1249 // Use an explicit multiplication to implement the divide of
1250 // a number in the 1..63 range.
1251 // Explicit OR 0x1 to avoid calling Bits::Log2FloorNonZero(0), which is
1252 // undefined.
1253 uint32 log2value = Bits::Log2FloorNonZero64(value | 0x1);
1254 return static_cast<size_t>((log2value * 9 + 73) / 64);
1255}
1256
1257inline size_t CodedOutputStream::VarintSize32SignExtended(int32 value) {
Brian Silverman9c614bc2016-02-15 20:20:02 -05001258 if (value < 0) {
1259 return 10; // TODO(kenton): Make this a symbolic constant.
1260 } else {
1261 return VarintSize32(static_cast<uint32>(value));
1262 }
1263}
1264
1265inline void CodedOutputStream::WriteString(const string& str) {
1266 WriteRaw(str.data(), static_cast<int>(str.size()));
1267}
1268
1269inline void CodedOutputStream::WriteRawMaybeAliased(
1270 const void* data, int size) {
1271 if (aliasing_enabled_) {
1272 WriteAliasedRaw(data, size);
1273 } else {
1274 WriteRaw(data, size);
1275 }
1276}
1277
1278inline uint8* CodedOutputStream::WriteStringToArray(
1279 const string& str, uint8* target) {
1280 return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
1281}
1282
1283inline int CodedOutputStream::ByteCount() const {
1284 return total_bytes_ - buffer_size_;
1285}
1286
1287inline void CodedInputStream::Advance(int amount) {
1288 buffer_ += amount;
1289}
1290
1291inline void CodedOutputStream::Advance(int amount) {
1292 buffer_ += amount;
1293 buffer_size_ -= amount;
1294}
1295
1296inline void CodedInputStream::SetRecursionLimit(int limit) {
1297 recursion_budget_ += limit - recursion_limit_;
1298 recursion_limit_ = limit;
1299}
1300
1301inline bool CodedInputStream::IncrementRecursionDepth() {
1302 --recursion_budget_;
1303 return recursion_budget_ >= 0;
1304}
1305
1306inline void CodedInputStream::DecrementRecursionDepth() {
1307 if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
1308}
1309
1310inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
1311 assert(recursion_budget_ < recursion_limit_);
1312 ++recursion_budget_;
1313}
1314
1315inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
1316 MessageFactory* factory) {
1317 extension_pool_ = pool;
1318 extension_factory_ = factory;
1319}
1320
1321inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
1322 return extension_pool_;
1323}
1324
1325inline MessageFactory* CodedInputStream::GetExtensionFactory() {
1326 return extension_factory_;
1327}
1328
1329inline int CodedInputStream::BufferSize() const {
1330 return static_cast<int>(buffer_end_ - buffer_);
1331}
1332
1333inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
1334 : buffer_(NULL),
1335 buffer_end_(NULL),
1336 input_(input),
1337 total_bytes_read_(0),
1338 overflow_bytes_(0),
1339 last_tag_(0),
1340 legitimate_message_end_(false),
1341 aliasing_enabled_(false),
1342 current_limit_(kint32max),
1343 buffer_size_after_limit_(0),
1344 total_bytes_limit_(kDefaultTotalBytesLimit),
Brian Silverman9c614bc2016-02-15 20:20:02 -05001345 recursion_budget_(default_recursion_limit_),
1346 recursion_limit_(default_recursion_limit_),
1347 extension_pool_(NULL),
1348 extension_factory_(NULL) {
1349 // Eagerly Refresh() so buffer space is immediately available.
1350 Refresh();
1351}
1352
1353inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
1354 : buffer_(buffer),
1355 buffer_end_(buffer + size),
1356 input_(NULL),
1357 total_bytes_read_(size),
1358 overflow_bytes_(0),
1359 last_tag_(0),
1360 legitimate_message_end_(false),
1361 aliasing_enabled_(false),
1362 current_limit_(size),
1363 buffer_size_after_limit_(0),
1364 total_bytes_limit_(kDefaultTotalBytesLimit),
Brian Silverman9c614bc2016-02-15 20:20:02 -05001365 recursion_budget_(default_recursion_limit_),
1366 recursion_limit_(default_recursion_limit_),
1367 extension_pool_(NULL),
1368 extension_factory_(NULL) {
1369 // Note that setting current_limit_ == size is important to prevent some
1370 // code paths from trying to access input_ and segfaulting.
1371}
1372
1373inline bool CodedInputStream::IsFlat() const {
1374 return input_ == NULL;
1375}
1376
Austin Schuh40c16522018-10-28 20:27:54 -07001377inline bool CodedInputStream::Skip(int count) {
1378 if (count < 0) return false; // security: count is often user-supplied
1379
1380 const int original_buffer_size = BufferSize();
1381
1382 if (count <= original_buffer_size) {
1383 // Just skipping within the current buffer. Easy.
1384 Advance(count);
1385 return true;
1386 }
1387
1388 return SkipFallback(count, original_buffer_size);
1389}
1390
Brian Silverman9c614bc2016-02-15 20:20:02 -05001391} // namespace io
1392} // namespace protobuf
1393
1394
Austin Schuh40c16522018-10-28 20:27:54 -07001395#if defined(_MSC_VER) && _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
Brian Silverman9c614bc2016-02-15 20:20:02 -05001396 #pragma runtime_checks("c", restore)
1397#endif // _MSC_VER && !defined(__INTEL_COMPILER)
1398
1399} // namespace google
1400#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__