Squashed 'third_party/protobuf/' content from commit e35e248
Change-Id: I6cbe123d09fe50fdcad0e51466665daeee7433c7
git-subtree-dir: third_party/protobuf
git-subtree-split: e35e24800fb8d694bdeea5fd63dc7d1b14d68723
diff --git a/src/google/protobuf/io/coded_stream.cc b/src/google/protobuf/io/coded_stream.cc
new file mode 100644
index 0000000..e3a34d0
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream.cc
@@ -0,0 +1,917 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This implementation is heavily optimized to make reads and writes
+// of small values (especially varints) as fast as possible. In
+// particular, we optimize for the common case that a read or a write
+// will not cross the end of the buffer, since we can avoid a lot
+// of branching in this case.
+
+#include <google/protobuf/io/coded_stream_inl.h>
+#include <algorithm>
+#include <utility>
+#include <limits.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/arena.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/stl_util.h>
+
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+namespace {
+
+static const int kMaxVarintBytes = 10;
+static const int kMaxVarint32Bytes = 5;
+
+
+inline bool NextNonEmpty(ZeroCopyInputStream* input,
+ const void** data, int* size) {
+ bool success;
+ do {
+ success = input->Next(data, size);
+ } while (success && *size == 0);
+ return success;
+}
+
+} // namespace
+
+// CodedInputStream ==================================================
+
+CodedInputStream::~CodedInputStream() {
+ if (input_ != NULL) {
+ BackUpInputToCurrentPosition();
+ }
+
+ if (total_bytes_warning_threshold_ == -2) {
+ GOOGLE_LOG(WARNING) << "The total number of bytes read was " << total_bytes_read_;
+ }
+}
+
+// Static.
+int CodedInputStream::default_recursion_limit_ = 100;
+
+
+void CodedOutputStream::EnableAliasing(bool enabled) {
+ aliasing_enabled_ = enabled && output_->AllowsAliasing();
+}
+
+void CodedInputStream::BackUpInputToCurrentPosition() {
+ int backup_bytes = BufferSize() + buffer_size_after_limit_ + overflow_bytes_;
+ if (backup_bytes > 0) {
+ input_->BackUp(backup_bytes);
+
+ // total_bytes_read_ doesn't include overflow_bytes_.
+ total_bytes_read_ -= BufferSize() + buffer_size_after_limit_;
+ buffer_end_ = buffer_;
+ buffer_size_after_limit_ = 0;
+ overflow_bytes_ = 0;
+ }
+}
+
+inline void CodedInputStream::RecomputeBufferLimits() {
+ buffer_end_ += buffer_size_after_limit_;
+ int closest_limit = min(current_limit_, total_bytes_limit_);
+ if (closest_limit < total_bytes_read_) {
+ // The limit position is in the current buffer. We must adjust
+ // the buffer size accordingly.
+ buffer_size_after_limit_ = total_bytes_read_ - closest_limit;
+ buffer_end_ -= buffer_size_after_limit_;
+ } else {
+ buffer_size_after_limit_ = 0;
+ }
+}
+
+CodedInputStream::Limit CodedInputStream::PushLimit(int byte_limit) {
+ // Current position relative to the beginning of the stream.
+ int current_position = CurrentPosition();
+
+ Limit old_limit = current_limit_;
+
+ // security: byte_limit is possibly evil, so check for negative values
+ // and overflow.
+ if (byte_limit >= 0 &&
+ byte_limit <= INT_MAX - current_position) {
+ current_limit_ = current_position + byte_limit;
+ } else {
+ // Negative or overflow.
+ current_limit_ = INT_MAX;
+ }
+
+ // We need to enforce all limits, not just the new one, so if the previous
+ // limit was before the new requested limit, we continue to enforce the
+ // previous limit.
+ current_limit_ = min(current_limit_, old_limit);
+
+ RecomputeBufferLimits();
+ return old_limit;
+}
+
+void CodedInputStream::PopLimit(Limit limit) {
+ // The limit passed in is actually the *old* limit, which we returned from
+ // PushLimit().
+ current_limit_ = limit;
+ RecomputeBufferLimits();
+
+ // We may no longer be at a legitimate message end. ReadTag() needs to be
+ // called again to find out.
+ legitimate_message_end_ = false;
+}
+
+std::pair<CodedInputStream::Limit, int>
+CodedInputStream::IncrementRecursionDepthAndPushLimit(int byte_limit) {
+ return std::make_pair(PushLimit(byte_limit), --recursion_budget_);
+}
+
+CodedInputStream::Limit CodedInputStream::ReadLengthAndPushLimit() {
+ uint32 length;
+ return PushLimit(ReadVarint32(&length) ? length : 0);
+}
+
+bool CodedInputStream::DecrementRecursionDepthAndPopLimit(Limit limit) {
+ bool result = ConsumedEntireMessage();
+ PopLimit(limit);
+ GOOGLE_DCHECK_LT(recursion_budget_, recursion_limit_);
+ ++recursion_budget_;
+ return result;
+}
+
+bool CodedInputStream::CheckEntireMessageConsumedAndPopLimit(Limit limit) {
+ bool result = ConsumedEntireMessage();
+ PopLimit(limit);
+ return result;
+}
+
+int CodedInputStream::BytesUntilLimit() const {
+ if (current_limit_ == INT_MAX) return -1;
+ int current_position = CurrentPosition();
+
+ return current_limit_ - current_position;
+}
+
+void CodedInputStream::SetTotalBytesLimit(
+ int total_bytes_limit, int warning_threshold) {
+ // Make sure the limit isn't already past, since this could confuse other
+ // code.
+ int current_position = CurrentPosition();
+ total_bytes_limit_ = max(current_position, total_bytes_limit);
+ if (warning_threshold >= 0) {
+ total_bytes_warning_threshold_ = warning_threshold;
+ } else {
+ // warning_threshold is negative
+ total_bytes_warning_threshold_ = -1;
+ }
+ RecomputeBufferLimits();
+}
+
+int CodedInputStream::BytesUntilTotalBytesLimit() const {
+ if (total_bytes_limit_ == INT_MAX) return -1;
+ return total_bytes_limit_ - CurrentPosition();
+}
+
+void CodedInputStream::PrintTotalBytesLimitError() {
+ GOOGLE_LOG(ERROR) << "A protocol message was rejected because it was too "
+ "big (more than " << total_bytes_limit_
+ << " bytes). To increase the limit (or to disable these "
+ "warnings), see CodedInputStream::SetTotalBytesLimit() "
+ "in google/protobuf/io/coded_stream.h.";
+}
+
+bool CodedInputStream::Skip(int count) {
+ if (count < 0) return false; // security: count is often user-supplied
+
+ const int original_buffer_size = BufferSize();
+
+ if (count <= original_buffer_size) {
+ // Just skipping within the current buffer. Easy.
+ Advance(count);
+ return true;
+ }
+
+ if (buffer_size_after_limit_ > 0) {
+ // We hit a limit inside this buffer. Advance to the limit and fail.
+ Advance(original_buffer_size);
+ return false;
+ }
+
+ count -= original_buffer_size;
+ buffer_ = NULL;
+ buffer_end_ = buffer_;
+
+ // Make sure this skip doesn't try to skip past the current limit.
+ int closest_limit = min(current_limit_, total_bytes_limit_);
+ int bytes_until_limit = closest_limit - total_bytes_read_;
+ if (bytes_until_limit < count) {
+ // We hit the limit. Skip up to it then fail.
+ if (bytes_until_limit > 0) {
+ total_bytes_read_ = closest_limit;
+ input_->Skip(bytes_until_limit);
+ }
+ return false;
+ }
+
+ total_bytes_read_ += count;
+ return input_->Skip(count);
+}
+
+bool CodedInputStream::GetDirectBufferPointer(const void** data, int* size) {
+ if (BufferSize() == 0 && !Refresh()) return false;
+
+ *data = buffer_;
+ *size = BufferSize();
+ return true;
+}
+
+bool CodedInputStream::ReadRaw(void* buffer, int size) {
+ return InternalReadRawInline(buffer, size);
+}
+
+bool CodedInputStream::ReadString(string* buffer, int size) {
+ if (size < 0) return false; // security: size is often user-supplied
+ return InternalReadStringInline(buffer, size);
+}
+
+bool CodedInputStream::ReadStringFallback(string* buffer, int size) {
+ if (!buffer->empty()) {
+ buffer->clear();
+ }
+
+ int closest_limit = min(current_limit_, total_bytes_limit_);
+ if (closest_limit != INT_MAX) {
+ int bytes_to_limit = closest_limit - CurrentPosition();
+ if (bytes_to_limit > 0 && size > 0 && size <= bytes_to_limit) {
+ buffer->reserve(size);
+ }
+ }
+
+ int current_buffer_size;
+ while ((current_buffer_size = BufferSize()) < size) {
+ // Some STL implementations "helpfully" crash on buffer->append(NULL, 0).
+ if (current_buffer_size != 0) {
+ // Note: string1.append(string2) is O(string2.size()) (as opposed to
+ // O(string1.size() + string2.size()), which would be bad).
+ buffer->append(reinterpret_cast<const char*>(buffer_),
+ current_buffer_size);
+ }
+ size -= current_buffer_size;
+ Advance(current_buffer_size);
+ if (!Refresh()) return false;
+ }
+
+ buffer->append(reinterpret_cast<const char*>(buffer_), size);
+ Advance(size);
+
+ return true;
+}
+
+
+bool CodedInputStream::ReadLittleEndian32Fallback(uint32* value) {
+ uint8 bytes[sizeof(*value)];
+
+ const uint8* ptr;
+ if (BufferSize() >= sizeof(*value)) {
+ // Fast path: Enough bytes in the buffer to read directly.
+ ptr = buffer_;
+ Advance(sizeof(*value));
+ } else {
+ // Slow path: Had to read past the end of the buffer.
+ if (!ReadRaw(bytes, sizeof(*value))) return false;
+ ptr = bytes;
+ }
+ ReadLittleEndian32FromArray(ptr, value);
+ return true;
+}
+
+bool CodedInputStream::ReadLittleEndian64Fallback(uint64* value) {
+ uint8 bytes[sizeof(*value)];
+
+ const uint8* ptr;
+ if (BufferSize() >= sizeof(*value)) {
+ // Fast path: Enough bytes in the buffer to read directly.
+ ptr = buffer_;
+ Advance(sizeof(*value));
+ } else {
+ // Slow path: Had to read past the end of the buffer.
+ if (!ReadRaw(bytes, sizeof(*value))) return false;
+ ptr = bytes;
+ }
+ ReadLittleEndian64FromArray(ptr, value);
+ return true;
+}
+
+namespace {
+
+// Read a varint from the given buffer, write it to *value, and return a pair.
+// The first part of the pair is true iff the read was successful. The second
+// part is buffer + (number of bytes read). This function is always inlined,
+// so returning a pair is costless.
+GOOGLE_ATTRIBUTE_ALWAYS_INLINE ::std::pair<bool, const uint8*> ReadVarint32FromArray(
+ uint32 first_byte, const uint8* buffer,
+ uint32* value);
+inline ::std::pair<bool, const uint8*> ReadVarint32FromArray(
+ uint32 first_byte, const uint8* buffer, uint32* value) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+ GOOGLE_DCHECK_EQ(*buffer, first_byte);
+ GOOGLE_DCHECK_EQ(first_byte & 0x80, 0x80) << first_byte;
+ const uint8* ptr = buffer;
+ uint32 b;
+ uint32 result = first_byte - 0x80;
+ ++ptr; // We just processed the first byte. Move on to the second.
+ b = *(ptr++); result += b << 7; if (!(b & 0x80)) goto done;
+ result -= 0x80 << 7;
+ b = *(ptr++); result += b << 14; if (!(b & 0x80)) goto done;
+ result -= 0x80 << 14;
+ b = *(ptr++); result += b << 21; if (!(b & 0x80)) goto done;
+ result -= 0x80 << 21;
+ b = *(ptr++); result += b << 28; if (!(b & 0x80)) goto done;
+ // "result -= 0x80 << 28" is irrevelant.
+
+ // If the input is larger than 32 bits, we still need to read it all
+ // and discard the high-order bits.
+ for (int i = 0; i < kMaxVarintBytes - kMaxVarint32Bytes; i++) {
+ b = *(ptr++); if (!(b & 0x80)) goto done;
+ }
+
+ // We have overrun the maximum size of a varint (10 bytes). Assume
+ // the data is corrupt.
+ return std::make_pair(false, ptr);
+
+ done:
+ *value = result;
+ return std::make_pair(true, ptr);
+}
+
+} // namespace
+
+bool CodedInputStream::ReadVarint32Slow(uint32* value) {
+ // Directly invoke ReadVarint64Fallback, since we already tried to optimize
+ // for one-byte varints.
+ std::pair<uint64, bool> p = ReadVarint64Fallback();
+ *value = static_cast<uint32>(p.first);
+ return p.second;
+}
+
+int64 CodedInputStream::ReadVarint32Fallback(uint32 first_byte_or_zero) {
+ if (BufferSize() >= kMaxVarintBytes ||
+ // Optimization: We're also safe if the buffer is non-empty and it ends
+ // with a byte that would terminate a varint.
+ (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+ GOOGLE_DCHECK_NE(first_byte_or_zero, 0)
+ << "Caller should provide us with *buffer_ when buffer is non-empty";
+ uint32 temp;
+ ::std::pair<bool, const uint8*> p =
+ ReadVarint32FromArray(first_byte_or_zero, buffer_, &temp);
+ if (!p.first) return -1;
+ buffer_ = p.second;
+ return temp;
+ } else {
+ // Really slow case: we will incur the cost of an extra function call here,
+ // but moving this out of line reduces the size of this function, which
+ // improves the common case. In micro benchmarks, this is worth about 10-15%
+ uint32 temp;
+ return ReadVarint32Slow(&temp) ? static_cast<int64>(temp) : -1;
+ }
+}
+
+uint32 CodedInputStream::ReadTagSlow() {
+ if (buffer_ == buffer_end_) {
+ // Call refresh.
+ if (!Refresh()) {
+ // Refresh failed. Make sure that it failed due to EOF, not because
+ // we hit total_bytes_limit_, which, unlike normal limits, is not a
+ // valid place to end a message.
+ int current_position = total_bytes_read_ - buffer_size_after_limit_;
+ if (current_position >= total_bytes_limit_) {
+ // Hit total_bytes_limit_. But if we also hit the normal limit,
+ // we're still OK.
+ legitimate_message_end_ = current_limit_ == total_bytes_limit_;
+ } else {
+ legitimate_message_end_ = true;
+ }
+ return 0;
+ }
+ }
+
+ // For the slow path, just do a 64-bit read. Try to optimize for one-byte tags
+ // again, since we have now refreshed the buffer.
+ uint64 result = 0;
+ if (!ReadVarint64(&result)) return 0;
+ return static_cast<uint32>(result);
+}
+
+uint32 CodedInputStream::ReadTagFallback(uint32 first_byte_or_zero) {
+ const int buf_size = BufferSize();
+ if (buf_size >= kMaxVarintBytes ||
+ // Optimization: We're also safe if the buffer is non-empty and it ends
+ // with a byte that would terminate a varint.
+ (buf_size > 0 && !(buffer_end_[-1] & 0x80))) {
+ GOOGLE_DCHECK_EQ(first_byte_or_zero, buffer_[0]);
+ if (first_byte_or_zero == 0) {
+ ++buffer_;
+ return 0;
+ }
+ uint32 tag;
+ ::std::pair<bool, const uint8*> p =
+ ReadVarint32FromArray(first_byte_or_zero, buffer_, &tag);
+ if (!p.first) {
+ return 0;
+ }
+ buffer_ = p.second;
+ return tag;
+ } else {
+ // We are commonly at a limit when attempting to read tags. Try to quickly
+ // detect this case without making another function call.
+ if ((buf_size == 0) &&
+ ((buffer_size_after_limit_ > 0) ||
+ (total_bytes_read_ == current_limit_)) &&
+ // Make sure that the limit we hit is not total_bytes_limit_, since
+ // in that case we still need to call Refresh() so that it prints an
+ // error.
+ total_bytes_read_ - buffer_size_after_limit_ < total_bytes_limit_) {
+ // We hit a byte limit.
+ legitimate_message_end_ = true;
+ return 0;
+ }
+ return ReadTagSlow();
+ }
+}
+
+bool CodedInputStream::ReadVarint64Slow(uint64* value) {
+ // Slow path: This read might cross the end of the buffer, so we
+ // need to check and refresh the buffer if and when it does.
+
+ uint64 result = 0;
+ int count = 0;
+ uint32 b;
+
+ do {
+ if (count == kMaxVarintBytes) return false;
+ while (buffer_ == buffer_end_) {
+ if (!Refresh()) return false;
+ }
+ b = *buffer_;
+ result |= static_cast<uint64>(b & 0x7F) << (7 * count);
+ Advance(1);
+ ++count;
+ } while (b & 0x80);
+
+ *value = result;
+ return true;
+}
+
+std::pair<uint64, bool> CodedInputStream::ReadVarint64Fallback() {
+ if (BufferSize() >= kMaxVarintBytes ||
+ // Optimization: We're also safe if the buffer is non-empty and it ends
+ // with a byte that would terminate a varint.
+ (buffer_end_ > buffer_ && !(buffer_end_[-1] & 0x80))) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this read won't cross the end, so we can skip the checks.
+
+ const uint8* ptr = buffer_;
+ uint32 b;
+
+ // Splitting into 32-bit pieces gives better performance on 32-bit
+ // processors.
+ uint32 part0 = 0, part1 = 0, part2 = 0;
+
+ b = *(ptr++); part0 = b ; if (!(b & 0x80)) goto done;
+ part0 -= 0x80;
+ b = *(ptr++); part0 += b << 7; if (!(b & 0x80)) goto done;
+ part0 -= 0x80 << 7;
+ b = *(ptr++); part0 += b << 14; if (!(b & 0x80)) goto done;
+ part0 -= 0x80 << 14;
+ b = *(ptr++); part0 += b << 21; if (!(b & 0x80)) goto done;
+ part0 -= 0x80 << 21;
+ b = *(ptr++); part1 = b ; if (!(b & 0x80)) goto done;
+ part1 -= 0x80;
+ b = *(ptr++); part1 += b << 7; if (!(b & 0x80)) goto done;
+ part1 -= 0x80 << 7;
+ b = *(ptr++); part1 += b << 14; if (!(b & 0x80)) goto done;
+ part1 -= 0x80 << 14;
+ b = *(ptr++); part1 += b << 21; if (!(b & 0x80)) goto done;
+ part1 -= 0x80 << 21;
+ b = *(ptr++); part2 = b ; if (!(b & 0x80)) goto done;
+ part2 -= 0x80;
+ b = *(ptr++); part2 += b << 7; if (!(b & 0x80)) goto done;
+ // "part2 -= 0x80 << 7" is irrelevant because (0x80 << 7) << 56 is 0.
+
+ // We have overrun the maximum size of a varint (10 bytes). The data
+ // must be corrupt.
+ return std::make_pair(0, false);
+
+ done:
+ Advance(ptr - buffer_);
+ return std::make_pair((static_cast<uint64>(part0)) |
+ (static_cast<uint64>(part1) << 28) |
+ (static_cast<uint64>(part2) << 56),
+ true);
+ } else {
+ uint64 temp;
+ bool success = ReadVarint64Slow(&temp);
+ return std::make_pair(temp, success);
+ }
+}
+
+bool CodedInputStream::Refresh() {
+ GOOGLE_DCHECK_EQ(0, BufferSize());
+
+ if (buffer_size_after_limit_ > 0 || overflow_bytes_ > 0 ||
+ total_bytes_read_ == current_limit_) {
+ // We've hit a limit. Stop.
+ int current_position = total_bytes_read_ - buffer_size_after_limit_;
+
+ if (current_position >= total_bytes_limit_ &&
+ total_bytes_limit_ != current_limit_) {
+ // Hit total_bytes_limit_.
+ PrintTotalBytesLimitError();
+ }
+
+ return false;
+ }
+
+ if (total_bytes_warning_threshold_ >= 0 &&
+ total_bytes_read_ >= total_bytes_warning_threshold_) {
+ GOOGLE_LOG(WARNING) << "Reading dangerously large protocol message. If the "
+ "message turns out to be larger than "
+ << total_bytes_limit_ << " bytes, parsing will be halted "
+ "for security reasons. To increase the limit (or to "
+ "disable these warnings), see "
+ "CodedInputStream::SetTotalBytesLimit() in "
+ "google/protobuf/io/coded_stream.h.";
+
+ // Don't warn again for this stream, and print total size at the end.
+ total_bytes_warning_threshold_ = -2;
+ }
+
+ const void* void_buffer;
+ int buffer_size;
+ if (NextNonEmpty(input_, &void_buffer, &buffer_size)) {
+ buffer_ = reinterpret_cast<const uint8*>(void_buffer);
+ buffer_end_ = buffer_ + buffer_size;
+ GOOGLE_CHECK_GE(buffer_size, 0);
+
+ if (total_bytes_read_ <= INT_MAX - buffer_size) {
+ total_bytes_read_ += buffer_size;
+ } else {
+ // Overflow. Reset buffer_end_ to not include the bytes beyond INT_MAX.
+ // We can't get that far anyway, because total_bytes_limit_ is guaranteed
+ // to be less than it. We need to keep track of the number of bytes
+ // we discarded, though, so that we can call input_->BackUp() to back
+ // up over them on destruction.
+
+ // The following line is equivalent to:
+ // overflow_bytes_ = total_bytes_read_ + buffer_size - INT_MAX;
+ // except that it avoids overflows. Signed integer overflow has
+ // undefined results according to the C standard.
+ overflow_bytes_ = total_bytes_read_ - (INT_MAX - buffer_size);
+ buffer_end_ -= overflow_bytes_;
+ total_bytes_read_ = INT_MAX;
+ }
+
+ RecomputeBufferLimits();
+ return true;
+ } else {
+ buffer_ = NULL;
+ buffer_end_ = NULL;
+ return false;
+ }
+}
+
+// CodedOutputStream =================================================
+
+CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output)
+ : output_(output),
+ buffer_(NULL),
+ buffer_size_(0),
+ total_bytes_(0),
+ had_error_(false),
+ aliasing_enabled_(false) {
+ // Eagerly Refresh() so buffer space is immediately available.
+ Refresh();
+ // The Refresh() may have failed. If the client doesn't write any data,
+ // though, don't consider this an error. If the client does write data, then
+ // another Refresh() will be attempted and it will set the error once again.
+ had_error_ = false;
+}
+
+CodedOutputStream::CodedOutputStream(ZeroCopyOutputStream* output,
+ bool do_eager_refresh)
+ : output_(output),
+ buffer_(NULL),
+ buffer_size_(0),
+ total_bytes_(0),
+ had_error_(false),
+ aliasing_enabled_(false) {
+ if (do_eager_refresh) {
+ // Eagerly Refresh() so buffer space is immediately available.
+ Refresh();
+ // The Refresh() may have failed. If the client doesn't write any data,
+ // though, don't consider this an error. If the client does write data, then
+ // another Refresh() will be attempted and it will set the error once again.
+ had_error_ = false;
+ }
+}
+
+CodedOutputStream::~CodedOutputStream() {
+ Trim();
+}
+
+void CodedOutputStream::Trim() {
+ if (buffer_size_ > 0) {
+ output_->BackUp(buffer_size_);
+ total_bytes_ -= buffer_size_;
+ buffer_size_ = 0;
+ buffer_ = NULL;
+ }
+}
+
+bool CodedOutputStream::Skip(int count) {
+ if (count < 0) return false;
+
+ while (count > buffer_size_) {
+ count -= buffer_size_;
+ if (!Refresh()) return false;
+ }
+
+ Advance(count);
+ return true;
+}
+
+bool CodedOutputStream::GetDirectBufferPointer(void** data, int* size) {
+ if (buffer_size_ == 0 && !Refresh()) return false;
+
+ *data = buffer_;
+ *size = buffer_size_;
+ return true;
+}
+
+void CodedOutputStream::WriteRaw(const void* data, int size) {
+ while (buffer_size_ < size) {
+ memcpy(buffer_, data, buffer_size_);
+ size -= buffer_size_;
+ data = reinterpret_cast<const uint8*>(data) + buffer_size_;
+ if (!Refresh()) return;
+ }
+
+ memcpy(buffer_, data, size);
+ Advance(size);
+}
+
+uint8* CodedOutputStream::WriteRawToArray(
+ const void* data, int size, uint8* target) {
+ memcpy(target, data, size);
+ return target + size;
+}
+
+
+void CodedOutputStream::WriteAliasedRaw(const void* data, int size) {
+ if (size < buffer_size_
+ ) {
+ WriteRaw(data, size);
+ } else {
+ Trim();
+
+ total_bytes_ += size;
+ had_error_ |= !output_->WriteAliasedRaw(data, size);
+ }
+}
+
+void CodedOutputStream::WriteLittleEndian32(uint32 value) {
+ uint8 bytes[sizeof(value)];
+
+ bool use_fast = buffer_size_ >= sizeof(value);
+ uint8* ptr = use_fast ? buffer_ : bytes;
+
+ WriteLittleEndian32ToArray(value, ptr);
+
+ if (use_fast) {
+ Advance(sizeof(value));
+ } else {
+ WriteRaw(bytes, sizeof(value));
+ }
+}
+
+void CodedOutputStream::WriteLittleEndian64(uint64 value) {
+ uint8 bytes[sizeof(value)];
+
+ bool use_fast = buffer_size_ >= sizeof(value);
+ uint8* ptr = use_fast ? buffer_ : bytes;
+
+ WriteLittleEndian64ToArray(value, ptr);
+
+ if (use_fast) {
+ Advance(sizeof(value));
+ } else {
+ WriteRaw(bytes, sizeof(value));
+ }
+}
+
+void CodedOutputStream::WriteVarint32SlowPath(uint32 value) {
+ uint8 bytes[kMaxVarint32Bytes];
+ uint8* target = &bytes[0];
+ uint8* end = WriteVarint32ToArray(value, target);
+ int size = end - target;
+ WriteRaw(bytes, size);
+}
+
+inline uint8* CodedOutputStream::WriteVarint64ToArrayInline(
+ uint64 value, uint8* target) {
+ // Splitting into 32-bit pieces gives better performance on 32-bit
+ // processors.
+ uint32 part0 = static_cast<uint32>(value );
+ uint32 part1 = static_cast<uint32>(value >> 28);
+ uint32 part2 = static_cast<uint32>(value >> 56);
+
+ int size;
+
+ // Here we can't really optimize for small numbers, since the value is
+ // split into three parts. Cheking for numbers < 128, for instance,
+ // would require three comparisons, since you'd have to make sure part1
+ // and part2 are zero. However, if the caller is using 64-bit integers,
+ // it is likely that they expect the numbers to often be very large, so
+ // we probably don't want to optimize for small numbers anyway. Thus,
+ // we end up with a hardcoded binary search tree...
+ if (part2 == 0) {
+ if (part1 == 0) {
+ if (part0 < (1 << 14)) {
+ if (part0 < (1 << 7)) {
+ size = 1; goto size1;
+ } else {
+ size = 2; goto size2;
+ }
+ } else {
+ if (part0 < (1 << 21)) {
+ size = 3; goto size3;
+ } else {
+ size = 4; goto size4;
+ }
+ }
+ } else {
+ if (part1 < (1 << 14)) {
+ if (part1 < (1 << 7)) {
+ size = 5; goto size5;
+ } else {
+ size = 6; goto size6;
+ }
+ } else {
+ if (part1 < (1 << 21)) {
+ size = 7; goto size7;
+ } else {
+ size = 8; goto size8;
+ }
+ }
+ }
+ } else {
+ if (part2 < (1 << 7)) {
+ size = 9; goto size9;
+ } else {
+ size = 10; goto size10;
+ }
+ }
+
+ GOOGLE_LOG(FATAL) << "Can't get here.";
+
+ size10: target[9] = static_cast<uint8>((part2 >> 7) | 0x80);
+ size9 : target[8] = static_cast<uint8>((part2 ) | 0x80);
+ size8 : target[7] = static_cast<uint8>((part1 >> 21) | 0x80);
+ size7 : target[6] = static_cast<uint8>((part1 >> 14) | 0x80);
+ size6 : target[5] = static_cast<uint8>((part1 >> 7) | 0x80);
+ size5 : target[4] = static_cast<uint8>((part1 ) | 0x80);
+ size4 : target[3] = static_cast<uint8>((part0 >> 21) | 0x80);
+ size3 : target[2] = static_cast<uint8>((part0 >> 14) | 0x80);
+ size2 : target[1] = static_cast<uint8>((part0 >> 7) | 0x80);
+ size1 : target[0] = static_cast<uint8>((part0 ) | 0x80);
+
+ target[size-1] &= 0x7F;
+ return target + size;
+}
+
+void CodedOutputStream::WriteVarint64(uint64 value) {
+ if (buffer_size_ >= kMaxVarintBytes) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this write won't cross the end, so we can skip the checks.
+ uint8* target = buffer_;
+
+ uint8* end = WriteVarint64ToArrayInline(value, target);
+ int size = end - target;
+ Advance(size);
+ } else {
+ // Slow path: This write might cross the end of the buffer, so we
+ // compose the bytes first then use WriteRaw().
+ uint8 bytes[kMaxVarintBytes];
+ int size = 0;
+ while (value > 0x7F) {
+ bytes[size++] = (static_cast<uint8>(value) & 0x7F) | 0x80;
+ value >>= 7;
+ }
+ bytes[size++] = static_cast<uint8>(value) & 0x7F;
+ WriteRaw(bytes, size);
+ }
+}
+
+uint8* CodedOutputStream::WriteVarint64ToArray(
+ uint64 value, uint8* target) {
+ return WriteVarint64ToArrayInline(value, target);
+}
+
+bool CodedOutputStream::Refresh() {
+ void* void_buffer;
+ if (output_->Next(&void_buffer, &buffer_size_)) {
+ buffer_ = reinterpret_cast<uint8*>(void_buffer);
+ total_bytes_ += buffer_size_;
+ return true;
+ } else {
+ buffer_ = NULL;
+ buffer_size_ = 0;
+ had_error_ = true;
+ return false;
+ }
+}
+
+int CodedOutputStream::VarintSize32Fallback(uint32 value) {
+ if (value < (1 << 7)) {
+ return 1;
+ } else if (value < (1 << 14)) {
+ return 2;
+ } else if (value < (1 << 21)) {
+ return 3;
+ } else if (value < (1 << 28)) {
+ return 4;
+ } else {
+ return 5;
+ }
+}
+
+int CodedOutputStream::VarintSize64(uint64 value) {
+ if (value < (1ull << 35)) {
+ if (value < (1ull << 7)) {
+ return 1;
+ } else if (value < (1ull << 14)) {
+ return 2;
+ } else if (value < (1ull << 21)) {
+ return 3;
+ } else if (value < (1ull << 28)) {
+ return 4;
+ } else {
+ return 5;
+ }
+ } else {
+ if (value < (1ull << 42)) {
+ return 6;
+ } else if (value < (1ull << 49)) {
+ return 7;
+ } else if (value < (1ull << 56)) {
+ return 8;
+ } else if (value < (1ull << 63)) {
+ return 9;
+ } else {
+ return 10;
+ }
+ }
+}
+
+uint8* CodedOutputStream::WriteStringWithSizeToArray(const string& str,
+ uint8* target) {
+ GOOGLE_DCHECK_LE(str.size(), kuint32max);
+ target = WriteVarint32ToArray(str.size(), target);
+ return WriteStringToArray(str, target);
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/coded_stream.h b/src/google/protobuf/io/coded_stream.h
new file mode 100644
index 0000000..e377100
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream.h
@@ -0,0 +1,1294 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the CodedInputStream and CodedOutputStream classes,
+// which wrap a ZeroCopyInputStream or ZeroCopyOutputStream, respectively,
+// and allow you to read or write individual pieces of data in various
+// formats. In particular, these implement the varint encoding for
+// integers, a simple variable-length encoding in which smaller numbers
+// take fewer bytes.
+//
+// Typically these classes will only be used internally by the protocol
+// buffer library in order to encode and decode protocol buffers. Clients
+// of the library only need to know about this class if they wish to write
+// custom message parsing or serialization procedures.
+//
+// CodedOutputStream example:
+// // Write some data to "myfile". First we write a 4-byte "magic number"
+// // to identify the file type, then write a length-delimited string. The
+// // string is composed of a varint giving the length followed by the raw
+// // bytes.
+// int fd = open("myfile", O_CREAT | O_WRONLY);
+// ZeroCopyOutputStream* raw_output = new FileOutputStream(fd);
+// CodedOutputStream* coded_output = new CodedOutputStream(raw_output);
+//
+// int magic_number = 1234;
+// char text[] = "Hello world!";
+// coded_output->WriteLittleEndian32(magic_number);
+// coded_output->WriteVarint32(strlen(text));
+// coded_output->WriteRaw(text, strlen(text));
+//
+// delete coded_output;
+// delete raw_output;
+// close(fd);
+//
+// CodedInputStream example:
+// // Read a file created by the above code.
+// int fd = open("myfile", O_RDONLY);
+// ZeroCopyInputStream* raw_input = new FileInputStream(fd);
+// CodedInputStream coded_input = new CodedInputStream(raw_input);
+//
+// coded_input->ReadLittleEndian32(&magic_number);
+// if (magic_number != 1234) {
+// cerr << "File not in expected format." << endl;
+// return;
+// }
+//
+// uint32 size;
+// coded_input->ReadVarint32(&size);
+//
+// char* text = new char[size + 1];
+// coded_input->ReadRaw(buffer, size);
+// text[size] = '\0';
+//
+// delete coded_input;
+// delete raw_input;
+// close(fd);
+//
+// cout << "Text is: " << text << endl;
+// delete [] text;
+//
+// For those who are interested, varint encoding is defined as follows:
+//
+// The encoding operates on unsigned integers of up to 64 bits in length.
+// Each byte of the encoded value has the format:
+// * bits 0-6: Seven bits of the number being encoded.
+// * bit 7: Zero if this is the last byte in the encoding (in which
+// case all remaining bits of the number are zero) or 1 if
+// more bytes follow.
+// The first byte contains the least-significant 7 bits of the number, the
+// second byte (if present) contains the next-least-significant 7 bits,
+// and so on. So, the binary number 1011000101011 would be encoded in two
+// bytes as "10101011 00101100".
+//
+// In theory, varint could be used to encode integers of any length.
+// However, for practicality we set a limit at 64 bits. The maximum encoded
+// length of a number is thus 10 bytes.
+
+#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
+
+#include <assert.h>
+#include <string>
+#include <utility>
+#ifdef _MSC_VER
+ // Assuming windows is always little-endian.
+ #if !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
+ #define PROTOBUF_LITTLE_ENDIAN 1
+ #endif
+ #if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
+ // If MSVC has "/RTCc" set, it will complain about truncating casts at
+ // runtime. This file contains some intentional truncating casts.
+ #pragma runtime_checks("c", off)
+ #endif
+#else
+ #include <sys/param.h> // __BYTE_ORDER
+ #if ((defined(__LITTLE_ENDIAN__) && !defined(__BIG_ENDIAN__)) || \
+ (defined(__BYTE_ORDER) && __BYTE_ORDER == __LITTLE_ENDIAN)) && \
+ !defined(PROTOBUF_DISABLE_LITTLE_ENDIAN_OPT_FOR_TEST)
+ #define PROTOBUF_LITTLE_ENDIAN 1
+ #endif
+#endif
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+
+namespace protobuf {
+
+class DescriptorPool;
+class MessageFactory;
+
+namespace io {
+
+// Defined in this file.
+class CodedInputStream;
+class CodedOutputStream;
+
+// Defined in other files.
+class ZeroCopyInputStream; // zero_copy_stream.h
+class ZeroCopyOutputStream; // zero_copy_stream.h
+
+// Class which reads and decodes binary data which is composed of varint-
+// encoded integers and fixed-width pieces. Wraps a ZeroCopyInputStream.
+// Most users will not need to deal with CodedInputStream.
+//
+// Most methods of CodedInputStream that return a bool return false if an
+// underlying I/O error occurs or if the data is malformed. Once such a
+// failure occurs, the CodedInputStream is broken and is no longer useful.
+class LIBPROTOBUF_EXPORT CodedInputStream {
+ public:
+ // Create a CodedInputStream that reads from the given ZeroCopyInputStream.
+ explicit CodedInputStream(ZeroCopyInputStream* input);
+
+ // Create a CodedInputStream that reads from the given flat array. This is
+ // faster than using an ArrayInputStream. PushLimit(size) is implied by
+ // this constructor.
+ explicit CodedInputStream(const uint8* buffer, int size);
+
+ // Destroy the CodedInputStream and position the underlying
+ // ZeroCopyInputStream at the first unread byte. If an error occurred while
+ // reading (causing a method to return false), then the exact position of
+ // the input stream may be anywhere between the last value that was read
+ // successfully and the stream's byte limit.
+ ~CodedInputStream();
+
+ // Return true if this CodedInputStream reads from a flat array instead of
+ // a ZeroCopyInputStream.
+ inline bool IsFlat() const;
+
+ // Skips a number of bytes. Returns false if an underlying read error
+ // occurs.
+ bool Skip(int count);
+
+ // Sets *data to point directly at the unread part of the CodedInputStream's
+ // underlying buffer, and *size to the size of that buffer, but does not
+ // advance the stream's current position. This will always either produce
+ // a non-empty buffer or return false. If the caller consumes any of
+ // this data, it should then call Skip() to skip over the consumed bytes.
+ // This may be useful for implementing external fast parsing routines for
+ // types of data not covered by the CodedInputStream interface.
+ bool GetDirectBufferPointer(const void** data, int* size);
+
+ // Like GetDirectBufferPointer, but this method is inlined, and does not
+ // attempt to Refresh() if the buffer is currently empty.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE void GetDirectBufferPointerInline(const void** data,
+ int* size);
+
+ // Read raw bytes, copying them into the given buffer.
+ bool ReadRaw(void* buffer, int size);
+
+ // Like the above, with inlined optimizations. This should only be used
+ // by the protobuf implementation.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadRawInline(void* buffer, int size);
+
+ // Like ReadRaw, but reads into a string.
+ //
+ // Implementation Note: ReadString() grows the string gradually as it
+ // reads in the data, rather than allocating the entire requested size
+ // upfront. This prevents denial-of-service attacks in which a client
+ // could claim that a string is going to be MAX_INT bytes long in order to
+ // crash the server because it can't allocate this much space at once.
+ bool ReadString(string* buffer, int size);
+ // Like the above, with inlined optimizations. This should only be used
+ // by the protobuf implementation.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool InternalReadStringInline(string* buffer,
+ int size);
+
+
+ // Read a 32-bit little-endian integer.
+ bool ReadLittleEndian32(uint32* value);
+ // Read a 64-bit little-endian integer.
+ bool ReadLittleEndian64(uint64* value);
+
+ // These methods read from an externally provided buffer. The caller is
+ // responsible for ensuring that the buffer has sufficient space.
+ // Read a 32-bit little-endian integer.
+ static const uint8* ReadLittleEndian32FromArray(const uint8* buffer,
+ uint32* value);
+ // Read a 64-bit little-endian integer.
+ static const uint8* ReadLittleEndian64FromArray(const uint8* buffer,
+ uint64* value);
+
+ // Read an unsigned integer with Varint encoding, truncating to 32 bits.
+ // Reading a 32-bit value is equivalent to reading a 64-bit one and casting
+ // it to uint32, but may be more efficient.
+ bool ReadVarint32(uint32* value);
+ // Read an unsigned integer with Varint encoding.
+ bool ReadVarint64(uint64* value);
+
+ // Read a tag. This calls ReadVarint32() and returns the result, or returns
+ // zero (which is not a valid tag) if ReadVarint32() fails. Also, it updates
+ // the last tag value, which can be checked with LastTagWas().
+ // Always inline because this is only called in one place per parse loop
+ // but it is called for every iteration of said loop, so it should be fast.
+ // GCC doesn't want to inline this by default.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE uint32 ReadTag();
+
+ // This usually a faster alternative to ReadTag() when cutoff is a manifest
+ // constant. It does particularly well for cutoff >= 127. The first part
+ // of the return value is the tag that was read, though it can also be 0 in
+ // the cases where ReadTag() would return 0. If the second part is true
+ // then the tag is known to be in [0, cutoff]. If not, the tag either is
+ // above cutoff or is 0. (There's intentional wiggle room when tag is 0,
+ // because that can arise in several ways, and for best performance we want
+ // to avoid an extra "is tag == 0?" check here.)
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE std::pair<uint32, bool> ReadTagWithCutoff(
+ uint32 cutoff);
+
+ // Usually returns true if calling ReadVarint32() now would produce the given
+ // value. Will always return false if ReadVarint32() would not return the
+ // given value. If ExpectTag() returns true, it also advances past
+ // the varint. For best performance, use a compile-time constant as the
+ // parameter.
+ // Always inline because this collapses to a small number of instructions
+ // when given a constant parameter, but GCC doesn't want to inline by default.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE bool ExpectTag(uint32 expected);
+
+ // Like above, except this reads from the specified buffer. The caller is
+ // responsible for ensuring that the buffer is large enough to read a varint
+ // of the expected size. For best performance, use a compile-time constant as
+ // the expected tag parameter.
+ //
+ // Returns a pointer beyond the expected tag if it was found, or NULL if it
+ // was not.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE static const uint8* ExpectTagFromArray(
+ const uint8* buffer,
+ uint32 expected);
+
+ // Usually returns true if no more bytes can be read. Always returns false
+ // if more bytes can be read. If ExpectAtEnd() returns true, a subsequent
+ // call to LastTagWas() will act as if ReadTag() had been called and returned
+ // zero, and ConsumedEntireMessage() will return true.
+ bool ExpectAtEnd();
+
+ // If the last call to ReadTag() or ReadTagWithCutoff() returned the
+ // given value, returns true. Otherwise, returns false;
+ //
+ // This is needed because parsers for some types of embedded messages
+ // (with field type TYPE_GROUP) don't actually know that they've reached the
+ // end of a message until they see an ENDGROUP tag, which was actually part
+ // of the enclosing message. The enclosing message would like to check that
+ // tag to make sure it had the right number, so it calls LastTagWas() on
+ // return from the embedded parser to check.
+ bool LastTagWas(uint32 expected);
+
+ // When parsing message (but NOT a group), this method must be called
+ // immediately after MergeFromCodedStream() returns (if it returns true)
+ // to further verify that the message ended in a legitimate way. For
+ // example, this verifies that parsing did not end on an end-group tag.
+ // It also checks for some cases where, due to optimizations,
+ // MergeFromCodedStream() can incorrectly return true.
+ bool ConsumedEntireMessage();
+
+ // Limits ----------------------------------------------------------
+ // Limits are used when parsing length-delimited embedded messages.
+ // After the message's length is read, PushLimit() is used to prevent
+ // the CodedInputStream from reading beyond that length. Once the
+ // embedded message has been parsed, PopLimit() is called to undo the
+ // limit.
+
+ // Opaque type used with PushLimit() and PopLimit(). Do not modify
+ // values of this type yourself. The only reason that this isn't a
+ // struct with private internals is for efficiency.
+ typedef int Limit;
+
+ // Places a limit on the number of bytes that the stream may read,
+ // starting from the current position. Once the stream hits this limit,
+ // it will act like the end of the input has been reached until PopLimit()
+ // is called.
+ //
+ // As the names imply, the stream conceptually has a stack of limits. The
+ // shortest limit on the stack is always enforced, even if it is not the
+ // top limit.
+ //
+ // The value returned by PushLimit() is opaque to the caller, and must
+ // be passed unchanged to the corresponding call to PopLimit().
+ Limit PushLimit(int byte_limit);
+
+ // Pops the last limit pushed by PushLimit(). The input must be the value
+ // returned by that call to PushLimit().
+ void PopLimit(Limit limit);
+
+ // Returns the number of bytes left until the nearest limit on the
+ // stack is hit, or -1 if no limits are in place.
+ int BytesUntilLimit() const;
+
+ // Returns current position relative to the beginning of the input stream.
+ int CurrentPosition() const;
+
+ // Total Bytes Limit -----------------------------------------------
+ // To prevent malicious users from sending excessively large messages
+ // and causing integer overflows or memory exhaustion, CodedInputStream
+ // imposes a hard limit on the total number of bytes it will read.
+
+ // Sets the maximum number of bytes that this CodedInputStream will read
+ // before refusing to continue. To prevent integer overflows in the
+ // protocol buffers implementation, as well as to prevent servers from
+ // allocating enormous amounts of memory to hold parsed messages, the
+ // maximum message length should be limited to the shortest length that
+ // will not harm usability. The theoretical shortest message that could
+ // cause integer overflows is 512MB. The default limit is 64MB. Apps
+ // should set shorter limits if possible. If warning_threshold is not -1,
+ // a warning will be printed to stderr after warning_threshold bytes are
+ // read. For backwards compatibility all negative values get squashed to -1,
+ // as other negative values might have special internal meanings.
+ // An error will always be printed to stderr if the limit is reached.
+ //
+ // This is unrelated to PushLimit()/PopLimit().
+ //
+ // Hint: If you are reading this because your program is printing a
+ // warning about dangerously large protocol messages, you may be
+ // confused about what to do next. The best option is to change your
+ // design such that excessively large messages are not necessary.
+ // For example, try to design file formats to consist of many small
+ // messages rather than a single large one. If this is infeasible,
+ // you will need to increase the limit. Chances are, though, that
+ // your code never constructs a CodedInputStream on which the limit
+ // can be set. You probably parse messages by calling things like
+ // Message::ParseFromString(). In this case, you will need to change
+ // your code to instead construct some sort of ZeroCopyInputStream
+ // (e.g. an ArrayInputStream), construct a CodedInputStream around
+ // that, then call Message::ParseFromCodedStream() instead. Then
+ // you can adjust the limit. Yes, it's more work, but you're doing
+ // something unusual.
+ void SetTotalBytesLimit(int total_bytes_limit, int warning_threshold);
+
+ // The Total Bytes Limit minus the Current Position, or -1 if there
+ // is no Total Bytes Limit.
+ int BytesUntilTotalBytesLimit() const;
+
+ // Recursion Limit -------------------------------------------------
+ // To prevent corrupt or malicious messages from causing stack overflows,
+ // we must keep track of the depth of recursion when parsing embedded
+ // messages and groups. CodedInputStream keeps track of this because it
+ // is the only object that is passed down the stack during parsing.
+
+ // Sets the maximum recursion depth. The default is 100.
+ void SetRecursionLimit(int limit);
+
+
+ // Increments the current recursion depth. Returns true if the depth is
+ // under the limit, false if it has gone over.
+ bool IncrementRecursionDepth();
+
+ // Decrements the recursion depth if possible.
+ void DecrementRecursionDepth();
+
+ // Decrements the recursion depth blindly. This is faster than
+ // DecrementRecursionDepth(). It should be used only if all previous
+ // increments to recursion depth were successful.
+ void UnsafeDecrementRecursionDepth();
+
+ // Shorthand for make_pair(PushLimit(byte_limit), --recursion_budget_).
+ // Using this can reduce code size and complexity in some cases. The caller
+ // is expected to check that the second part of the result is non-negative (to
+ // bail out if the depth of recursion is too high) and, if all is well, to
+ // later pass the first part of the result to PopLimit() or similar.
+ std::pair<CodedInputStream::Limit, int> IncrementRecursionDepthAndPushLimit(
+ int byte_limit);
+
+ // Shorthand for PushLimit(ReadVarint32(&length) ? length : 0).
+ Limit ReadLengthAndPushLimit();
+
+ // Helper that is equivalent to: {
+ // bool result = ConsumedEntireMessage();
+ // PopLimit(limit);
+ // UnsafeDecrementRecursionDepth();
+ // return result; }
+ // Using this can reduce code size and complexity in some cases.
+ // Do not use unless the current recursion depth is greater than zero.
+ bool DecrementRecursionDepthAndPopLimit(Limit limit);
+
+ // Helper that is equivalent to: {
+ // bool result = ConsumedEntireMessage();
+ // PopLimit(limit);
+ // return result; }
+ // Using this can reduce code size and complexity in some cases.
+ bool CheckEntireMessageConsumedAndPopLimit(Limit limit);
+
+ // Extension Registry ----------------------------------------------
+ // ADVANCED USAGE: 99.9% of people can ignore this section.
+ //
+ // By default, when parsing extensions, the parser looks for extension
+ // definitions in the pool which owns the outer message's Descriptor.
+ // However, you may call SetExtensionRegistry() to provide an alternative
+ // pool instead. This makes it possible, for example, to parse a message
+ // using a generated class, but represent some extensions using
+ // DynamicMessage.
+
+ // Set the pool used to look up extensions. Most users do not need to call
+ // this as the correct pool will be chosen automatically.
+ //
+ // WARNING: It is very easy to misuse this. Carefully read the requirements
+ // below. Do not use this unless you are sure you need it. Almost no one
+ // does.
+ //
+ // Let's say you are parsing a message into message object m, and you want
+ // to take advantage of SetExtensionRegistry(). You must follow these
+ // requirements:
+ //
+ // The given DescriptorPool must contain m->GetDescriptor(). It is not
+ // sufficient for it to simply contain a descriptor that has the same name
+ // and content -- it must be the *exact object*. In other words:
+ // assert(pool->FindMessageTypeByName(m->GetDescriptor()->full_name()) ==
+ // m->GetDescriptor());
+ // There are two ways to satisfy this requirement:
+ // 1) Use m->GetDescriptor()->pool() as the pool. This is generally useless
+ // because this is the pool that would be used anyway if you didn't call
+ // SetExtensionRegistry() at all.
+ // 2) Use a DescriptorPool which has m->GetDescriptor()->pool() as an
+ // "underlay". Read the documentation for DescriptorPool for more
+ // information about underlays.
+ //
+ // You must also provide a MessageFactory. This factory will be used to
+ // construct Message objects representing extensions. The factory's
+ // GetPrototype() MUST return non-NULL for any Descriptor which can be found
+ // through the provided pool.
+ //
+ // If the provided factory might return instances of protocol-compiler-
+ // generated (i.e. compiled-in) types, or if the outer message object m is
+ // a generated type, then the given factory MUST have this property: If
+ // GetPrototype() is given a Descriptor which resides in
+ // DescriptorPool::generated_pool(), the factory MUST return the same
+ // prototype which MessageFactory::generated_factory() would return. That
+ // is, given a descriptor for a generated type, the factory must return an
+ // instance of the generated class (NOT DynamicMessage). However, when
+ // given a descriptor for a type that is NOT in generated_pool, the factory
+ // is free to return any implementation.
+ //
+ // The reason for this requirement is that generated sub-objects may be
+ // accessed via the standard (non-reflection) extension accessor methods,
+ // and these methods will down-cast the object to the generated class type.
+ // If the object is not actually of that type, the results would be undefined.
+ // On the other hand, if an extension is not compiled in, then there is no
+ // way the code could end up accessing it via the standard accessors -- the
+ // only way to access the extension is via reflection. When using reflection,
+ // DynamicMessage and generated messages are indistinguishable, so it's fine
+ // if these objects are represented using DynamicMessage.
+ //
+ // Using DynamicMessageFactory on which you have called
+ // SetDelegateToGeneratedFactory(true) should be sufficient to satisfy the
+ // above requirement.
+ //
+ // If either pool or factory is NULL, both must be NULL.
+ //
+ // Note that this feature is ignored when parsing "lite" messages as they do
+ // not have descriptors.
+ void SetExtensionRegistry(const DescriptorPool* pool,
+ MessageFactory* factory);
+
+ // Get the DescriptorPool set via SetExtensionRegistry(), or NULL if no pool
+ // has been provided.
+ const DescriptorPool* GetExtensionPool();
+
+ // Get the MessageFactory set via SetExtensionRegistry(), or NULL if no
+ // factory has been provided.
+ MessageFactory* GetExtensionFactory();
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedInputStream);
+
+ const uint8* buffer_;
+ const uint8* buffer_end_; // pointer to the end of the buffer.
+ ZeroCopyInputStream* input_;
+ int total_bytes_read_; // total bytes read from input_, including
+ // the current buffer
+
+ // If total_bytes_read_ surpasses INT_MAX, we record the extra bytes here
+ // so that we can BackUp() on destruction.
+ int overflow_bytes_;
+
+ // LastTagWas() stuff.
+ uint32 last_tag_; // result of last ReadTag() or ReadTagWithCutoff().
+
+ // This is set true by ReadTag{Fallback/Slow}() if it is called when exactly
+ // at EOF, or by ExpectAtEnd() when it returns true. This happens when we
+ // reach the end of a message and attempt to read another tag.
+ bool legitimate_message_end_;
+
+ // See EnableAliasing().
+ bool aliasing_enabled_;
+
+ // Limits
+ Limit current_limit_; // if position = -1, no limit is applied
+
+ // For simplicity, if the current buffer crosses a limit (either a normal
+ // limit created by PushLimit() or the total bytes limit), buffer_size_
+ // only tracks the number of bytes before that limit. This field
+ // contains the number of bytes after it. Note that this implies that if
+ // buffer_size_ == 0 and buffer_size_after_limit_ > 0, we know we've
+ // hit a limit. However, if both are zero, it doesn't necessarily mean
+ // we aren't at a limit -- the buffer may have ended exactly at the limit.
+ int buffer_size_after_limit_;
+
+ // Maximum number of bytes to read, period. This is unrelated to
+ // current_limit_. Set using SetTotalBytesLimit().
+ int total_bytes_limit_;
+
+ // If positive/0: Limit for bytes read after which a warning due to size
+ // should be logged.
+ // If -1: Printing of warning disabled. Can be set by client.
+ // If -2: Internal: Limit has been reached, print full size when destructing.
+ int total_bytes_warning_threshold_;
+
+ // Current recursion budget, controlled by IncrementRecursionDepth() and
+ // similar. Starts at recursion_limit_ and goes down: if this reaches
+ // -1 we are over budget.
+ int recursion_budget_;
+ // Recursion depth limit, set by SetRecursionLimit().
+ int recursion_limit_;
+
+ // See SetExtensionRegistry().
+ const DescriptorPool* extension_pool_;
+ MessageFactory* extension_factory_;
+
+ // Private member functions.
+
+ // Advance the buffer by a given number of bytes.
+ void Advance(int amount);
+
+ // Back up input_ to the current buffer position.
+ void BackUpInputToCurrentPosition();
+
+ // Recomputes the value of buffer_size_after_limit_. Must be called after
+ // current_limit_ or total_bytes_limit_ changes.
+ void RecomputeBufferLimits();
+
+ // Writes an error message saying that we hit total_bytes_limit_.
+ void PrintTotalBytesLimitError();
+
+ // Called when the buffer runs out to request more data. Implies an
+ // Advance(BufferSize()).
+ bool Refresh();
+
+ // When parsing varints, we optimize for the common case of small values, and
+ // then optimize for the case when the varint fits within the current buffer
+ // piece. The Fallback method is used when we can't use the one-byte
+ // optimization. The Slow method is yet another fallback when the buffer is
+ // not large enough. Making the slow path out-of-line speeds up the common
+ // case by 10-15%. The slow path is fairly uncommon: it only triggers when a
+ // message crosses multiple buffers. Note: ReadVarint32Fallback() and
+ // ReadVarint64Fallback() are called frequently and generally not inlined, so
+ // they have been optimized to avoid "out" parameters. The former returns -1
+ // if it fails and the uint32 it read otherwise. The latter has a bool
+ // indicating success or failure as part of its return type.
+ int64 ReadVarint32Fallback(uint32 first_byte_or_zero);
+ std::pair<uint64, bool> ReadVarint64Fallback();
+ bool ReadVarint32Slow(uint32* value);
+ bool ReadVarint64Slow(uint64* value);
+ bool ReadLittleEndian32Fallback(uint32* value);
+ bool ReadLittleEndian64Fallback(uint64* value);
+ // Fallback/slow methods for reading tags. These do not update last_tag_,
+ // but will set legitimate_message_end_ if we are at the end of the input
+ // stream.
+ uint32 ReadTagFallback(uint32 first_byte_or_zero);
+ uint32 ReadTagSlow();
+ bool ReadStringFallback(string* buffer, int size);
+
+ // Return the size of the buffer.
+ int BufferSize() const;
+
+ static const int kDefaultTotalBytesLimit = 64 << 20; // 64MB
+
+ static const int kDefaultTotalBytesWarningThreshold = 32 << 20; // 32MB
+
+ static int default_recursion_limit_; // 100 by default.
+};
+
+// Class which encodes and writes binary data which is composed of varint-
+// encoded integers and fixed-width pieces. Wraps a ZeroCopyOutputStream.
+// Most users will not need to deal with CodedOutputStream.
+//
+// Most methods of CodedOutputStream which return a bool return false if an
+// underlying I/O error occurs. Once such a failure occurs, the
+// CodedOutputStream is broken and is no longer useful. The Write* methods do
+// not return the stream status, but will invalidate the stream if an error
+// occurs. The client can probe HadError() to determine the status.
+//
+// Note that every method of CodedOutputStream which writes some data has
+// a corresponding static "ToArray" version. These versions write directly
+// to the provided buffer, returning a pointer past the last written byte.
+// They require that the buffer has sufficient capacity for the encoded data.
+// This allows an optimization where we check if an output stream has enough
+// space for an entire message before we start writing and, if there is, we
+// call only the ToArray methods to avoid doing bound checks for each
+// individual value.
+// i.e., in the example above:
+//
+// CodedOutputStream coded_output = new CodedOutputStream(raw_output);
+// int magic_number = 1234;
+// char text[] = "Hello world!";
+//
+// int coded_size = sizeof(magic_number) +
+// CodedOutputStream::VarintSize32(strlen(text)) +
+// strlen(text);
+//
+// uint8* buffer =
+// coded_output->GetDirectBufferForNBytesAndAdvance(coded_size);
+// if (buffer != NULL) {
+// // The output stream has enough space in the buffer: write directly to
+// // the array.
+// buffer = CodedOutputStream::WriteLittleEndian32ToArray(magic_number,
+// buffer);
+// buffer = CodedOutputStream::WriteVarint32ToArray(strlen(text), buffer);
+// buffer = CodedOutputStream::WriteRawToArray(text, strlen(text), buffer);
+// } else {
+// // Make bound-checked writes, which will ask the underlying stream for
+// // more space as needed.
+// coded_output->WriteLittleEndian32(magic_number);
+// coded_output->WriteVarint32(strlen(text));
+// coded_output->WriteRaw(text, strlen(text));
+// }
+//
+// delete coded_output;
+class LIBPROTOBUF_EXPORT CodedOutputStream {
+ public:
+ // Create an CodedOutputStream that writes to the given ZeroCopyOutputStream.
+ explicit CodedOutputStream(ZeroCopyOutputStream* output);
+ CodedOutputStream(ZeroCopyOutputStream* output, bool do_eager_refresh);
+
+ // Destroy the CodedOutputStream and position the underlying
+ // ZeroCopyOutputStream immediately after the last byte written.
+ ~CodedOutputStream();
+
+ // Trims any unused space in the underlying buffer so that its size matches
+ // the number of bytes written by this stream. The underlying buffer will
+ // automatically be trimmed when this stream is destroyed; this call is only
+ // necessary if the underlying buffer is accessed *before* the stream is
+ // destroyed.
+ void Trim();
+
+ // Skips a number of bytes, leaving the bytes unmodified in the underlying
+ // buffer. Returns false if an underlying write error occurs. This is
+ // mainly useful with GetDirectBufferPointer().
+ bool Skip(int count);
+
+ // Sets *data to point directly at the unwritten part of the
+ // CodedOutputStream's underlying buffer, and *size to the size of that
+ // buffer, but does not advance the stream's current position. This will
+ // always either produce a non-empty buffer or return false. If the caller
+ // writes any data to this buffer, it should then call Skip() to skip over
+ // the consumed bytes. This may be useful for implementing external fast
+ // serialization routines for types of data not covered by the
+ // CodedOutputStream interface.
+ bool GetDirectBufferPointer(void** data, int* size);
+
+ // If there are at least "size" bytes available in the current buffer,
+ // returns a pointer directly into the buffer and advances over these bytes.
+ // The caller may then write directly into this buffer (e.g. using the
+ // *ToArray static methods) rather than go through CodedOutputStream. If
+ // there are not enough bytes available, returns NULL. The return pointer is
+ // invalidated as soon as any other non-const method of CodedOutputStream
+ // is called.
+ inline uint8* GetDirectBufferForNBytesAndAdvance(int size);
+
+ // Write raw bytes, copying them from the given buffer.
+ void WriteRaw(const void* buffer, int size);
+ // Like WriteRaw() but will try to write aliased data if aliasing is
+ // turned on.
+ void WriteRawMaybeAliased(const void* data, int size);
+ // Like WriteRaw() but writing directly to the target array.
+ // This is _not_ inlined, as the compiler often optimizes memcpy into inline
+ // copy loops. Since this gets called by every field with string or bytes
+ // type, inlining may lead to a significant amount of code bloat, with only a
+ // minor performance gain.
+ static uint8* WriteRawToArray(const void* buffer, int size, uint8* target);
+
+ // Equivalent to WriteRaw(str.data(), str.size()).
+ void WriteString(const string& str);
+ // Like WriteString() but writing directly to the target array.
+ static uint8* WriteStringToArray(const string& str, uint8* target);
+ // Write the varint-encoded size of str followed by str.
+ static uint8* WriteStringWithSizeToArray(const string& str, uint8* target);
+
+
+ // Instructs the CodedOutputStream to allow the underlying
+ // ZeroCopyOutputStream to hold pointers to the original structure instead of
+ // copying, if it supports it (i.e. output->AllowsAliasing() is true). If the
+ // underlying stream does not support aliasing, then enabling it has no
+ // affect. For now, this only affects the behavior of
+ // WriteRawMaybeAliased().
+ //
+ // NOTE: It is caller's responsibility to ensure that the chunk of memory
+ // remains live until all of the data has been consumed from the stream.
+ void EnableAliasing(bool enabled);
+
+ // Write a 32-bit little-endian integer.
+ void WriteLittleEndian32(uint32 value);
+ // Like WriteLittleEndian32() but writing directly to the target array.
+ static uint8* WriteLittleEndian32ToArray(uint32 value, uint8* target);
+ // Write a 64-bit little-endian integer.
+ void WriteLittleEndian64(uint64 value);
+ // Like WriteLittleEndian64() but writing directly to the target array.
+ static uint8* WriteLittleEndian64ToArray(uint64 value, uint8* target);
+
+ // Write an unsigned integer with Varint encoding. Writing a 32-bit value
+ // is equivalent to casting it to uint64 and writing it as a 64-bit value,
+ // but may be more efficient.
+ void WriteVarint32(uint32 value);
+ // Like WriteVarint32() but writing directly to the target array.
+ static uint8* WriteVarint32ToArray(uint32 value, uint8* target);
+ // Write an unsigned integer with Varint encoding.
+ void WriteVarint64(uint64 value);
+ // Like WriteVarint64() but writing directly to the target array.
+ static uint8* WriteVarint64ToArray(uint64 value, uint8* target);
+
+ // Equivalent to WriteVarint32() except when the value is negative,
+ // in which case it must be sign-extended to a full 10 bytes.
+ void WriteVarint32SignExtended(int32 value);
+ // Like WriteVarint32SignExtended() but writing directly to the target array.
+ static uint8* WriteVarint32SignExtendedToArray(int32 value, uint8* target);
+
+ // This is identical to WriteVarint32(), but optimized for writing tags.
+ // In particular, if the input is a compile-time constant, this method
+ // compiles down to a couple instructions.
+ // Always inline because otherwise the aformentioned optimization can't work,
+ // but GCC by default doesn't want to inline this.
+ void WriteTag(uint32 value);
+ // Like WriteTag() but writing directly to the target array.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteTagToArray(uint32 value,
+ uint8* target);
+
+ // Returns the number of bytes needed to encode the given value as a varint.
+ static int VarintSize32(uint32 value);
+ // Returns the number of bytes needed to encode the given value as a varint.
+ static int VarintSize64(uint64 value);
+
+ // If negative, 10 bytes. Otheriwse, same as VarintSize32().
+ static int VarintSize32SignExtended(int32 value);
+
+ // Compile-time equivalent of VarintSize32().
+ template <uint32 Value>
+ struct StaticVarintSize32 {
+ static const int value =
+ (Value < (1 << 7))
+ ? 1
+ : (Value < (1 << 14))
+ ? 2
+ : (Value < (1 << 21))
+ ? 3
+ : (Value < (1 << 28))
+ ? 4
+ : 5;
+ };
+
+ // Returns the total number of bytes written since this object was created.
+ inline int ByteCount() const;
+
+ // Returns true if there was an underlying I/O error since this object was
+ // created.
+ bool HadError() const { return had_error_; }
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CodedOutputStream);
+
+ ZeroCopyOutputStream* output_;
+ uint8* buffer_;
+ int buffer_size_;
+ int total_bytes_; // Sum of sizes of all buffers seen so far.
+ bool had_error_; // Whether an error occurred during output.
+ bool aliasing_enabled_; // See EnableAliasing().
+
+ // Advance the buffer by a given number of bytes.
+ void Advance(int amount);
+
+ // Called when the buffer runs out to request more data. Implies an
+ // Advance(buffer_size_).
+ bool Refresh();
+
+ // Like WriteRaw() but may avoid copying if the underlying
+ // ZeroCopyOutputStream supports it.
+ void WriteAliasedRaw(const void* buffer, int size);
+
+ // If this write might cross the end of the buffer, we compose the bytes first
+ // then use WriteRaw().
+ void WriteVarint32SlowPath(uint32 value);
+
+ // Always-inlined versions of WriteVarint* functions so that code can be
+ // reused, while still controlling size. For instance, WriteVarint32ToArray()
+ // should not directly call this: since it is inlined itself, doing so
+ // would greatly increase the size of generated code. Instead, it should call
+ // WriteVarint32FallbackToArray. Meanwhile, WriteVarint32() is already
+ // out-of-line, so it should just invoke this directly to avoid any extra
+ // function call overhead.
+ GOOGLE_ATTRIBUTE_ALWAYS_INLINE static uint8* WriteVarint64ToArrayInline(
+ uint64 value, uint8* target);
+
+ static int VarintSize32Fallback(uint32 value);
+};
+
+// inline methods ====================================================
+// The vast majority of varints are only one byte. These inline
+// methods optimize for that case.
+
+inline bool CodedInputStream::ReadVarint32(uint32* value) {
+ uint32 v = 0;
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+ v = *buffer_;
+ if (v < 0x80) {
+ *value = v;
+ Advance(1);
+ return true;
+ }
+ }
+ int64 result = ReadVarint32Fallback(v);
+ *value = static_cast<uint32>(result);
+ return result >= 0;
+}
+
+inline bool CodedInputStream::ReadVarint64(uint64* value) {
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && *buffer_ < 0x80) {
+ *value = *buffer_;
+ Advance(1);
+ return true;
+ }
+ std::pair<uint64, bool> p = ReadVarint64Fallback();
+ *value = p.first;
+ return p.second;
+}
+
+// static
+inline const uint8* CodedInputStream::ReadLittleEndian32FromArray(
+ const uint8* buffer,
+ uint32* value) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+ memcpy(value, buffer, sizeof(*value));
+ return buffer + sizeof(*value);
+#else
+ *value = (static_cast<uint32>(buffer[0]) ) |
+ (static_cast<uint32>(buffer[1]) << 8) |
+ (static_cast<uint32>(buffer[2]) << 16) |
+ (static_cast<uint32>(buffer[3]) << 24);
+ return buffer + sizeof(*value);
+#endif
+}
+// static
+inline const uint8* CodedInputStream::ReadLittleEndian64FromArray(
+ const uint8* buffer,
+ uint64* value) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+ memcpy(value, buffer, sizeof(*value));
+ return buffer + sizeof(*value);
+#else
+ uint32 part0 = (static_cast<uint32>(buffer[0]) ) |
+ (static_cast<uint32>(buffer[1]) << 8) |
+ (static_cast<uint32>(buffer[2]) << 16) |
+ (static_cast<uint32>(buffer[3]) << 24);
+ uint32 part1 = (static_cast<uint32>(buffer[4]) ) |
+ (static_cast<uint32>(buffer[5]) << 8) |
+ (static_cast<uint32>(buffer[6]) << 16) |
+ (static_cast<uint32>(buffer[7]) << 24);
+ *value = static_cast<uint64>(part0) |
+ (static_cast<uint64>(part1) << 32);
+ return buffer + sizeof(*value);
+#endif
+}
+
+inline bool CodedInputStream::ReadLittleEndian32(uint32* value) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
+ memcpy(value, buffer_, sizeof(*value));
+ Advance(sizeof(*value));
+ return true;
+ } else {
+ return ReadLittleEndian32Fallback(value);
+ }
+#else
+ return ReadLittleEndian32Fallback(value);
+#endif
+}
+
+inline bool CodedInputStream::ReadLittleEndian64(uint64* value) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= static_cast<int>(sizeof(*value)))) {
+ memcpy(value, buffer_, sizeof(*value));
+ Advance(sizeof(*value));
+ return true;
+ } else {
+ return ReadLittleEndian64Fallback(value);
+ }
+#else
+ return ReadLittleEndian64Fallback(value);
+#endif
+}
+
+inline uint32 CodedInputStream::ReadTag() {
+ uint32 v = 0;
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+ v = *buffer_;
+ if (v < 0x80) {
+ last_tag_ = v;
+ Advance(1);
+ return v;
+ }
+ }
+ last_tag_ = ReadTagFallback(v);
+ return last_tag_;
+}
+
+inline std::pair<uint32, bool> CodedInputStream::ReadTagWithCutoff(
+ uint32 cutoff) {
+ // In performance-sensitive code we can expect cutoff to be a compile-time
+ // constant, and things like "cutoff >= kMax1ByteVarint" to be evaluated at
+ // compile time.
+ uint32 first_byte_or_zero = 0;
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_)) {
+ // Hot case: buffer_ non_empty, buffer_[0] in [1, 128).
+ // TODO(gpike): Is it worth rearranging this? E.g., if the number of fields
+ // is large enough then is it better to check for the two-byte case first?
+ first_byte_or_zero = buffer_[0];
+ if (static_cast<int8>(buffer_[0]) > 0) {
+ const uint32 kMax1ByteVarint = 0x7f;
+ uint32 tag = last_tag_ = buffer_[0];
+ Advance(1);
+ return std::make_pair(tag, cutoff >= kMax1ByteVarint || tag <= cutoff);
+ }
+ // Other hot case: cutoff >= 0x80, buffer_ has at least two bytes available,
+ // and tag is two bytes. The latter is tested by bitwise-and-not of the
+ // first byte and the second byte.
+ if (cutoff >= 0x80 &&
+ GOOGLE_PREDICT_TRUE(buffer_ + 1 < buffer_end_) &&
+ GOOGLE_PREDICT_TRUE((buffer_[0] & ~buffer_[1]) >= 0x80)) {
+ const uint32 kMax2ByteVarint = (0x7f << 7) + 0x7f;
+ uint32 tag = last_tag_ = (1u << 7) * buffer_[1] + (buffer_[0] - 0x80);
+ Advance(2);
+ // It might make sense to test for tag == 0 now, but it is so rare that
+ // that we don't bother. A varint-encoded 0 should be one byte unless
+ // the encoder lost its mind. The second part of the return value of
+ // this function is allowed to be either true or false if the tag is 0,
+ // so we don't have to check for tag == 0. We may need to check whether
+ // it exceeds cutoff.
+ bool at_or_below_cutoff = cutoff >= kMax2ByteVarint || tag <= cutoff;
+ return std::make_pair(tag, at_or_below_cutoff);
+ }
+ }
+ // Slow path
+ last_tag_ = ReadTagFallback(first_byte_or_zero);
+ return std::make_pair(last_tag_, static_cast<uint32>(last_tag_ - 1) < cutoff);
+}
+
+inline bool CodedInputStream::LastTagWas(uint32 expected) {
+ return last_tag_ == expected;
+}
+
+inline bool CodedInputStream::ConsumedEntireMessage() {
+ return legitimate_message_end_;
+}
+
+inline bool CodedInputStream::ExpectTag(uint32 expected) {
+ if (expected < (1 << 7)) {
+ if (GOOGLE_PREDICT_TRUE(buffer_ < buffer_end_) && buffer_[0] == expected) {
+ Advance(1);
+ return true;
+ } else {
+ return false;
+ }
+ } else if (expected < (1 << 14)) {
+ if (GOOGLE_PREDICT_TRUE(BufferSize() >= 2) &&
+ buffer_[0] == static_cast<uint8>(expected | 0x80) &&
+ buffer_[1] == static_cast<uint8>(expected >> 7)) {
+ Advance(2);
+ return true;
+ } else {
+ return false;
+ }
+ } else {
+ // Don't bother optimizing for larger values.
+ return false;
+ }
+}
+
+inline const uint8* CodedInputStream::ExpectTagFromArray(
+ const uint8* buffer, uint32 expected) {
+ if (expected < (1 << 7)) {
+ if (buffer[0] == expected) {
+ return buffer + 1;
+ }
+ } else if (expected < (1 << 14)) {
+ if (buffer[0] == static_cast<uint8>(expected | 0x80) &&
+ buffer[1] == static_cast<uint8>(expected >> 7)) {
+ return buffer + 2;
+ }
+ }
+ return NULL;
+}
+
+inline void CodedInputStream::GetDirectBufferPointerInline(const void** data,
+ int* size) {
+ *data = buffer_;
+ *size = static_cast<int>(buffer_end_ - buffer_);
+}
+
+inline bool CodedInputStream::ExpectAtEnd() {
+ // If we are at a limit we know no more bytes can be read. Otherwise, it's
+ // hard to say without calling Refresh(), and we'd rather not do that.
+
+ if (buffer_ == buffer_end_ &&
+ ((buffer_size_after_limit_ != 0) ||
+ (total_bytes_read_ == current_limit_))) {
+ last_tag_ = 0; // Pretend we called ReadTag()...
+ legitimate_message_end_ = true; // ... and it hit EOF.
+ return true;
+ } else {
+ return false;
+ }
+}
+
+inline int CodedInputStream::CurrentPosition() const {
+ return total_bytes_read_ - (BufferSize() + buffer_size_after_limit_);
+}
+
+inline uint8* CodedOutputStream::GetDirectBufferForNBytesAndAdvance(int size) {
+ if (buffer_size_ < size) {
+ return NULL;
+ } else {
+ uint8* result = buffer_;
+ Advance(size);
+ return result;
+ }
+}
+
+inline uint8* CodedOutputStream::WriteVarint32ToArray(uint32 value,
+ uint8* target) {
+ while (value >= 0x80) {
+ *target = static_cast<uint8>(value | 0x80);
+ value >>= 7;
+ ++target;
+ }
+ *target = static_cast<uint8>(value);
+ return target + 1;
+}
+
+inline void CodedOutputStream::WriteVarint32SignExtended(int32 value) {
+ if (value < 0) {
+ WriteVarint64(static_cast<uint64>(value));
+ } else {
+ WriteVarint32(static_cast<uint32>(value));
+ }
+}
+
+inline uint8* CodedOutputStream::WriteVarint32SignExtendedToArray(
+ int32 value, uint8* target) {
+ if (value < 0) {
+ return WriteVarint64ToArray(static_cast<uint64>(value), target);
+ } else {
+ return WriteVarint32ToArray(static_cast<uint32>(value), target);
+ }
+}
+
+inline uint8* CodedOutputStream::WriteLittleEndian32ToArray(uint32 value,
+ uint8* target) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+ memcpy(target, &value, sizeof(value));
+#else
+ target[0] = static_cast<uint8>(value);
+ target[1] = static_cast<uint8>(value >> 8);
+ target[2] = static_cast<uint8>(value >> 16);
+ target[3] = static_cast<uint8>(value >> 24);
+#endif
+ return target + sizeof(value);
+}
+
+inline uint8* CodedOutputStream::WriteLittleEndian64ToArray(uint64 value,
+ uint8* target) {
+#if defined(PROTOBUF_LITTLE_ENDIAN)
+ memcpy(target, &value, sizeof(value));
+#else
+ uint32 part0 = static_cast<uint32>(value);
+ uint32 part1 = static_cast<uint32>(value >> 32);
+
+ target[0] = static_cast<uint8>(part0);
+ target[1] = static_cast<uint8>(part0 >> 8);
+ target[2] = static_cast<uint8>(part0 >> 16);
+ target[3] = static_cast<uint8>(part0 >> 24);
+ target[4] = static_cast<uint8>(part1);
+ target[5] = static_cast<uint8>(part1 >> 8);
+ target[6] = static_cast<uint8>(part1 >> 16);
+ target[7] = static_cast<uint8>(part1 >> 24);
+#endif
+ return target + sizeof(value);
+}
+
+inline void CodedOutputStream::WriteVarint32(uint32 value) {
+ if (buffer_size_ >= 5) {
+ // Fast path: We have enough bytes left in the buffer to guarantee that
+ // this write won't cross the end, so we can skip the checks.
+ uint8* target = buffer_;
+ uint8* end = WriteVarint32ToArray(value, target);
+ int size = end - target;
+ Advance(size);
+ } else {
+ WriteVarint32SlowPath(value);
+ }
+}
+
+inline void CodedOutputStream::WriteTag(uint32 value) {
+ WriteVarint32(value);
+}
+
+inline uint8* CodedOutputStream::WriteTagToArray(
+ uint32 value, uint8* target) {
+ return WriteVarint32ToArray(value, target);
+}
+
+inline int CodedOutputStream::VarintSize32(uint32 value) {
+ if (value < (1 << 7)) {
+ return 1;
+ } else {
+ return VarintSize32Fallback(value);
+ }
+}
+
+inline int CodedOutputStream::VarintSize32SignExtended(int32 value) {
+ if (value < 0) {
+ return 10; // TODO(kenton): Make this a symbolic constant.
+ } else {
+ return VarintSize32(static_cast<uint32>(value));
+ }
+}
+
+inline void CodedOutputStream::WriteString(const string& str) {
+ WriteRaw(str.data(), static_cast<int>(str.size()));
+}
+
+inline void CodedOutputStream::WriteRawMaybeAliased(
+ const void* data, int size) {
+ if (aliasing_enabled_) {
+ WriteAliasedRaw(data, size);
+ } else {
+ WriteRaw(data, size);
+ }
+}
+
+inline uint8* CodedOutputStream::WriteStringToArray(
+ const string& str, uint8* target) {
+ return WriteRawToArray(str.data(), static_cast<int>(str.size()), target);
+}
+
+inline int CodedOutputStream::ByteCount() const {
+ return total_bytes_ - buffer_size_;
+}
+
+inline void CodedInputStream::Advance(int amount) {
+ buffer_ += amount;
+}
+
+inline void CodedOutputStream::Advance(int amount) {
+ buffer_ += amount;
+ buffer_size_ -= amount;
+}
+
+inline void CodedInputStream::SetRecursionLimit(int limit) {
+ recursion_budget_ += limit - recursion_limit_;
+ recursion_limit_ = limit;
+}
+
+inline bool CodedInputStream::IncrementRecursionDepth() {
+ --recursion_budget_;
+ return recursion_budget_ >= 0;
+}
+
+inline void CodedInputStream::DecrementRecursionDepth() {
+ if (recursion_budget_ < recursion_limit_) ++recursion_budget_;
+}
+
+inline void CodedInputStream::UnsafeDecrementRecursionDepth() {
+ assert(recursion_budget_ < recursion_limit_);
+ ++recursion_budget_;
+}
+
+inline void CodedInputStream::SetExtensionRegistry(const DescriptorPool* pool,
+ MessageFactory* factory) {
+ extension_pool_ = pool;
+ extension_factory_ = factory;
+}
+
+inline const DescriptorPool* CodedInputStream::GetExtensionPool() {
+ return extension_pool_;
+}
+
+inline MessageFactory* CodedInputStream::GetExtensionFactory() {
+ return extension_factory_;
+}
+
+inline int CodedInputStream::BufferSize() const {
+ return static_cast<int>(buffer_end_ - buffer_);
+}
+
+inline CodedInputStream::CodedInputStream(ZeroCopyInputStream* input)
+ : buffer_(NULL),
+ buffer_end_(NULL),
+ input_(input),
+ total_bytes_read_(0),
+ overflow_bytes_(0),
+ last_tag_(0),
+ legitimate_message_end_(false),
+ aliasing_enabled_(false),
+ current_limit_(kint32max),
+ buffer_size_after_limit_(0),
+ total_bytes_limit_(kDefaultTotalBytesLimit),
+ total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
+ recursion_budget_(default_recursion_limit_),
+ recursion_limit_(default_recursion_limit_),
+ extension_pool_(NULL),
+ extension_factory_(NULL) {
+ // Eagerly Refresh() so buffer space is immediately available.
+ Refresh();
+}
+
+inline CodedInputStream::CodedInputStream(const uint8* buffer, int size)
+ : buffer_(buffer),
+ buffer_end_(buffer + size),
+ input_(NULL),
+ total_bytes_read_(size),
+ overflow_bytes_(0),
+ last_tag_(0),
+ legitimate_message_end_(false),
+ aliasing_enabled_(false),
+ current_limit_(size),
+ buffer_size_after_limit_(0),
+ total_bytes_limit_(kDefaultTotalBytesLimit),
+ total_bytes_warning_threshold_(kDefaultTotalBytesWarningThreshold),
+ recursion_budget_(default_recursion_limit_),
+ recursion_limit_(default_recursion_limit_),
+ extension_pool_(NULL),
+ extension_factory_(NULL) {
+ // Note that setting current_limit_ == size is important to prevent some
+ // code paths from trying to access input_ and segfaulting.
+}
+
+inline bool CodedInputStream::IsFlat() const {
+ return input_ == NULL;
+}
+
+} // namespace io
+} // namespace protobuf
+
+
+#if _MSC_VER >= 1300 && !defined(__INTEL_COMPILER)
+ #pragma runtime_checks("c", restore)
+#endif // _MSC_VER && !defined(__INTEL_COMPILER)
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_H__
diff --git a/src/google/protobuf/io/coded_stream_inl.h b/src/google/protobuf/io/coded_stream_inl.h
new file mode 100644
index 0000000..d95b06e
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream_inl.h
@@ -0,0 +1,90 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: jasonh@google.com (Jason Hsueh)
+//
+// Implements methods of coded_stream.h that need to be inlined for performance
+// reasons, but should not be defined in a public header.
+
+#ifndef GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
+#define GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
+
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/io/coded_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+#include <string>
+#include <google/protobuf/stubs/stl_util.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+inline bool CodedInputStream::InternalReadStringInline(string* buffer,
+ int size) {
+ if (size < 0) return false; // security: size is often user-supplied
+
+ if (BufferSize() >= size) {
+ STLStringResizeUninitialized(buffer, size);
+ std::pair<char*, bool> z = as_string_data(buffer);
+ if (z.second) {
+ // Oddly enough, memcpy() requires its first two args to be non-NULL even
+ // if we copy 0 bytes. So, we have ensured that z.first is non-NULL here.
+ GOOGLE_DCHECK(z.first != NULL);
+ memcpy(z.first, buffer_, size);
+ Advance(size);
+ }
+ return true;
+ }
+
+ return ReadStringFallback(buffer, size);
+}
+
+inline bool CodedInputStream::InternalReadRawInline(void* buffer, int size) {
+ int current_buffer_size;
+ while ((current_buffer_size = BufferSize()) < size) {
+ // Reading past end of buffer. Copy what we have, then refresh.
+ memcpy(buffer, buffer_, current_buffer_size);
+ buffer = reinterpret_cast<uint8*>(buffer) + current_buffer_size;
+ size -= current_buffer_size;
+ Advance(current_buffer_size);
+ if (!Refresh()) return false;
+ }
+
+ memcpy(buffer, buffer_, size);
+ Advance(size);
+
+ return true;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_CODED_STREAM_INL_H__
diff --git a/src/google/protobuf/io/coded_stream_unittest.cc b/src/google/protobuf/io/coded_stream_unittest.cc
new file mode 100644
index 0000000..d1782e3
--- /dev/null
+++ b/src/google/protobuf/io/coded_stream_unittest.cc
@@ -0,0 +1,1385 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains tests and benchmarks.
+
+#include <memory>
+#ifndef _SHARED_PTR_H
+#include <google/protobuf/stubs/shared_ptr.h>
+#endif
+#include <vector>
+
+#include <google/protobuf/io/coded_stream.h>
+
+#include <limits.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/scoped_ptr.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+
+// This declares an unsigned long long integer literal in a portable way.
+// (The original macro is way too big and ruins my formatting.)
+#undef ULL
+#define ULL(x) GOOGLE_ULONGLONG(x)
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// ===================================================================
+// Data-Driven Test Infrastructure
+
+// TEST_1D and TEST_2D are macros I'd eventually like to see added to
+// gTest. These macros can be used to declare tests which should be
+// run multiple times, once for each item in some input array. TEST_1D
+// tests all cases in a single input array. TEST_2D tests all
+// combinations of cases from two arrays. The arrays must be statically
+// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example:
+//
+// int kCases[] = {1, 2, 3, 4}
+// TEST_1D(MyFixture, MyTest, kCases) {
+// EXPECT_GT(kCases_case, 0);
+// }
+//
+// This test iterates through the numbers 1, 2, 3, and 4 and tests that
+// they are all grater than zero. In case of failure, the exact case
+// which failed will be printed. The case type must be printable using
+// ostream::operator<<.
+
+// TODO(kenton): gTest now supports "parameterized tests" which would be
+// a better way to accomplish this. Rewrite when time permits.
+
+#define TEST_1D(FIXTURE, NAME, CASES) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType> \
+ void DoSingleCase(const CaseType& CASES##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES " case #" << i << ": " << CASES[i]); \
+ DoSingleCase(CASES[i]); \
+ } \
+ } \
+ \
+ template <typename CaseType> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)
+
+#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType1, typename CaseType2> \
+ void DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \
+ for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \
+ << #CASES2 " case #" << j << ": " << CASES2[j]); \
+ DoSingleCase(CASES1[i], CASES2[j]); \
+ } \
+ } \
+ } \
+ \
+ template <typename CaseType1, typename CaseType2> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case)
+
+// ===================================================================
+
+class CodedStreamTest : public testing::Test {
+ protected:
+ // Helper method used by tests for bytes warning. See implementation comment
+ // for further information.
+ static void SetupTotalBytesLimitWarningTest(
+ int total_bytes_limit, int warning_threshold,
+ vector<string>* out_errors, vector<string>* out_warnings);
+
+ // Buffer used during most of the tests. This assumes tests run sequentially.
+ static const int kBufferSize = 1024 * 64;
+ static uint8 buffer_[kBufferSize];
+};
+
+uint8 CodedStreamTest::buffer_[CodedStreamTest::kBufferSize];
+
+// We test each operation over a variety of block sizes to insure that
+// we test cases where reads or writes cross buffer boundaries, cases
+// where they don't, and cases where there is so much buffer left that
+// we can use special optimized paths that don't worry about bounds
+// checks.
+const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
+
+
+// -------------------------------------------------------------------
+// Varint tests.
+
+struct VarintCase {
+ uint8 bytes[10]; // Encoded bytes.
+ int size; // Encoded size, in bytes.
+ uint64 value; // Parsed value.
+};
+
+inline std::ostream& operator<<(std::ostream& os, const VarintCase& c) {
+ return os << c.value;
+}
+
+VarintCase kVarintCases[] = {
+ // 32-bit values
+ {{0x00} , 1, 0},
+ {{0x01} , 1, 1},
+ {{0x7f} , 1, 127},
+ {{0xa2, 0x74}, 2, (0x22 << 0) | (0x74 << 7)}, // 14882
+ {{0xbe, 0xf7, 0x92, 0x84, 0x0b}, 5, // 2961488830
+ (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) |
+ (ULL(0x0b) << 28)},
+
+ // 64-bit
+ {{0xbe, 0xf7, 0x92, 0x84, 0x1b}, 5, // 7256456126
+ (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) |
+ (ULL(0x1b) << 28)},
+ {{0x80, 0xe6, 0xeb, 0x9c, 0xc3, 0xc9, 0xa4, 0x49}, 8, // 41256202580718336
+ (0x00 << 0) | (0x66 << 7) | (0x6b << 14) | (0x1c << 21) |
+ (ULL(0x43) << 28) | (ULL(0x49) << 35) | (ULL(0x24) << 42) |
+ (ULL(0x49) << 49)},
+ // 11964378330978735131
+ {{0x9b, 0xa8, 0xf9, 0xc2, 0xbb, 0xd6, 0x80, 0x85, 0xa6, 0x01}, 10,
+ (0x1b << 0) | (0x28 << 7) | (0x79 << 14) | (0x42 << 21) |
+ (ULL(0x3b) << 28) | (ULL(0x56) << 35) | (ULL(0x00) << 42) |
+ (ULL(0x05) << 49) | (ULL(0x26) << 56) | (ULL(0x01) << 63)},
+};
+
+TEST_2D(CodedStreamTest, ReadVarint32, kVarintCases, kBlockSizes) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint32 value;
+ EXPECT_TRUE(coded_input.ReadVarint32(&value));
+ EXPECT_EQ(static_cast<uint32>(kVarintCases_case.value), value);
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, ReadTag, kVarintCases, kBlockSizes) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint32 expected_value = static_cast<uint32>(kVarintCases_case.value);
+ EXPECT_EQ(expected_value, coded_input.ReadTag());
+
+ EXPECT_TRUE(coded_input.LastTagWas(expected_value));
+ EXPECT_FALSE(coded_input.LastTagWas(expected_value + 1));
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
+}
+
+// This is the regression test that verifies that there is no issues
+// with the empty input buffers handling.
+TEST_F(CodedStreamTest, EmptyInputBeforeEos) {
+ class In : public ZeroCopyInputStream {
+ public:
+ In() : count_(0) {}
+ private:
+ virtual bool Next(const void** data, int* size) {
+ *data = NULL;
+ *size = 0;
+ return count_++ < 2;
+ }
+ virtual void BackUp(int count) {
+ GOOGLE_LOG(FATAL) << "Tests never call this.";
+ }
+ virtual bool Skip(int count) {
+ GOOGLE_LOG(FATAL) << "Tests never call this.";
+ return false;
+ }
+ virtual int64 ByteCount() const { return 0; }
+ int count_;
+ } in;
+ CodedInputStream input(&in);
+ input.ReadTag();
+ EXPECT_TRUE(input.ConsumedEntireMessage());
+}
+
+TEST_1D(CodedStreamTest, ExpectTag, kVarintCases) {
+ // Leave one byte at the beginning of the buffer so we can read it
+ // to force the first buffer to be loaded.
+ buffer_[0] = '\0';
+ memcpy(buffer_ + 1, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+
+ {
+ CodedInputStream coded_input(&input);
+
+ // Read one byte to force coded_input.Refill() to be called. Otherwise,
+ // ExpectTag() will return a false negative.
+ uint8 dummy;
+ coded_input.ReadRaw(&dummy, 1);
+ EXPECT_EQ((uint)'\0', (uint)dummy);
+
+ uint32 expected_value = static_cast<uint32>(kVarintCases_case.value);
+
+ // ExpectTag() produces false negatives for large values.
+ if (kVarintCases_case.size <= 2) {
+ EXPECT_FALSE(coded_input.ExpectTag(expected_value + 1));
+ EXPECT_TRUE(coded_input.ExpectTag(expected_value));
+ } else {
+ EXPECT_FALSE(coded_input.ExpectTag(expected_value));
+ }
+ }
+
+ if (kVarintCases_case.size <= 2) {
+ EXPECT_EQ(kVarintCases_case.size + 1, input.ByteCount());
+ } else {
+ EXPECT_EQ(1, input.ByteCount());
+ }
+}
+
+TEST_1D(CodedStreamTest, ExpectTagFromArray, kVarintCases) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+
+ const uint32 expected_value = static_cast<uint32>(kVarintCases_case.value);
+
+ // If the expectation succeeds, it should return a pointer past the tag.
+ if (kVarintCases_case.size <= 2) {
+ EXPECT_TRUE(NULL ==
+ CodedInputStream::ExpectTagFromArray(buffer_,
+ expected_value + 1));
+ EXPECT_TRUE(buffer_ + kVarintCases_case.size ==
+ CodedInputStream::ExpectTagFromArray(buffer_, expected_value));
+ } else {
+ EXPECT_TRUE(NULL ==
+ CodedInputStream::ExpectTagFromArray(buffer_, expected_value));
+ }
+}
+
+TEST_2D(CodedStreamTest, ReadVarint64, kVarintCases, kBlockSizes) {
+ memcpy(buffer_, kVarintCases_case.bytes, kVarintCases_case.size);
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_TRUE(coded_input.ReadVarint64(&value));
+ EXPECT_EQ(kVarintCases_case.value, value);
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, WriteVarint32, kVarintCases, kBlockSizes) {
+ if (kVarintCases_case.value > ULL(0x00000000FFFFFFFF)) {
+ // Skip this test for the 64-bit values.
+ return;
+ }
+
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ coded_output.WriteVarint32(static_cast<uint32>(kVarintCases_case.value));
+ EXPECT_FALSE(coded_output.HadError());
+
+ EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, output.ByteCount());
+ EXPECT_EQ(0,
+ memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size));
+}
+
+TEST_2D(CodedStreamTest, WriteVarint64, kVarintCases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ coded_output.WriteVarint64(kVarintCases_case.value);
+ EXPECT_FALSE(coded_output.HadError());
+
+ EXPECT_EQ(kVarintCases_case.size, coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(kVarintCases_case.size, output.ByteCount());
+ EXPECT_EQ(0,
+ memcmp(buffer_, kVarintCases_case.bytes, kVarintCases_case.size));
+}
+
+// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+int32 kSignExtendedVarintCases[] = {
+ 0, 1, -1, 1237894, -37895138
+};
+
+TEST_2D(CodedStreamTest, WriteVarint32SignExtended,
+ kSignExtendedVarintCases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ coded_output.WriteVarint32SignExtended(kSignExtendedVarintCases_case);
+ EXPECT_FALSE(coded_output.HadError());
+
+ if (kSignExtendedVarintCases_case < 0) {
+ EXPECT_EQ(10, coded_output.ByteCount());
+ } else {
+ EXPECT_LE(coded_output.ByteCount(), 5);
+ }
+ }
+
+ if (kSignExtendedVarintCases_case < 0) {
+ EXPECT_EQ(10, output.ByteCount());
+ } else {
+ EXPECT_LE(output.ByteCount(), 5);
+ }
+
+ // Read value back in as a varint64 and insure it matches.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_TRUE(coded_input.ReadVarint64(&value));
+
+ EXPECT_EQ(kSignExtendedVarintCases_case, static_cast<int64>(value));
+ }
+
+ EXPECT_EQ(output.ByteCount(), input.ByteCount());
+}
+
+#endif
+
+
+// -------------------------------------------------------------------
+// Varint failure test.
+
+struct VarintErrorCase {
+ uint8 bytes[12];
+ int size;
+ bool can_parse;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const VarintErrorCase& c) {
+ return os << "size " << c.size;
+}
+
+const VarintErrorCase kVarintErrorCases[] = {
+ // Control case. (Insures that there isn't something else wrong that
+ // makes parsing always fail.)
+ {{0x00}, 1, true},
+
+ // No input data.
+ {{}, 0, false},
+
+ // Input ends unexpectedly.
+ {{0xf0, 0xab}, 2, false},
+
+ // Input ends unexpectedly after 32 bits.
+ {{0xf0, 0xab, 0xc9, 0x9a, 0xf8, 0xb2}, 6, false},
+
+ // Longer than 10 bytes.
+ {{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x01},
+ 11, false},
+};
+
+TEST_2D(CodedStreamTest, ReadVarint32Error, kVarintErrorCases, kBlockSizes) {
+ memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size);
+ ArrayInputStream input(buffer_, kVarintErrorCases_case.size,
+ kBlockSizes_case);
+ CodedInputStream coded_input(&input);
+
+ uint32 value;
+ EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint32(&value));
+}
+
+TEST_2D(CodedStreamTest, ReadVarint64Error, kVarintErrorCases, kBlockSizes) {
+ memcpy(buffer_, kVarintErrorCases_case.bytes, kVarintErrorCases_case.size);
+ ArrayInputStream input(buffer_, kVarintErrorCases_case.size,
+ kBlockSizes_case);
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_EQ(kVarintErrorCases_case.can_parse, coded_input.ReadVarint64(&value));
+}
+
+// -------------------------------------------------------------------
+// VarintSize
+
+struct VarintSizeCase {
+ uint64 value;
+ int size;
+};
+
+inline std::ostream& operator<<(std::ostream& os, const VarintSizeCase& c) {
+ return os << c.value;
+}
+
+VarintSizeCase kVarintSizeCases[] = {
+ {0u, 1},
+ {1u, 1},
+ {127u, 1},
+ {128u, 2},
+ {758923u, 3},
+ {4000000000u, 5},
+ {ULL(41256202580718336), 8},
+ {ULL(11964378330978735131), 10},
+};
+
+TEST_1D(CodedStreamTest, VarintSize32, kVarintSizeCases) {
+ if (kVarintSizeCases_case.value > 0xffffffffu) {
+ // Skip 64-bit values.
+ return;
+ }
+
+ EXPECT_EQ(kVarintSizeCases_case.size,
+ CodedOutputStream::VarintSize32(
+ static_cast<uint32>(kVarintSizeCases_case.value)));
+}
+
+TEST_1D(CodedStreamTest, VarintSize64, kVarintSizeCases) {
+ EXPECT_EQ(kVarintSizeCases_case.size,
+ CodedOutputStream::VarintSize64(kVarintSizeCases_case.value));
+}
+
+// -------------------------------------------------------------------
+// Fixed-size int tests
+
+struct Fixed32Case {
+ uint8 bytes[sizeof(uint32)]; // Encoded bytes.
+ uint32 value; // Parsed value.
+};
+
+struct Fixed64Case {
+ uint8 bytes[sizeof(uint64)]; // Encoded bytes.
+ uint64 value; // Parsed value.
+};
+
+inline std::ostream& operator<<(std::ostream& os, const Fixed32Case& c) {
+ return os << "0x" << std::hex << c.value << std::dec;
+}
+
+inline std::ostream& operator<<(std::ostream& os, const Fixed64Case& c) {
+ return os << "0x" << std::hex << c.value << std::dec;
+}
+
+Fixed32Case kFixed32Cases[] = {
+ {{0xef, 0xcd, 0xab, 0x90}, 0x90abcdefu},
+ {{0x12, 0x34, 0x56, 0x78}, 0x78563412u},
+};
+
+Fixed64Case kFixed64Cases[] = {
+ {{0xef, 0xcd, 0xab, 0x90, 0x12, 0x34, 0x56, 0x78}, ULL(0x7856341290abcdef)},
+ {{0x11, 0x22, 0x33, 0x44, 0x55, 0x66, 0x77, 0x88}, ULL(0x8877665544332211)},
+};
+
+TEST_2D(CodedStreamTest, ReadLittleEndian32, kFixed32Cases, kBlockSizes) {
+ memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint32 value;
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(kFixed32Cases_case.value, value);
+ }
+
+ EXPECT_EQ(sizeof(uint32), input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, ReadLittleEndian64, kFixed64Cases, kBlockSizes) {
+ memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ uint64 value;
+ EXPECT_TRUE(coded_input.ReadLittleEndian64(&value));
+ EXPECT_EQ(kFixed64Cases_case.value, value);
+ }
+
+ EXPECT_EQ(sizeof(uint64), input.ByteCount());
+}
+
+TEST_2D(CodedStreamTest, WriteLittleEndian32, kFixed32Cases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ coded_output.WriteLittleEndian32(kFixed32Cases_case.value);
+ EXPECT_FALSE(coded_output.HadError());
+
+ EXPECT_EQ(sizeof(uint32), coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(sizeof(uint32), output.ByteCount());
+ EXPECT_EQ(0, memcmp(buffer_, kFixed32Cases_case.bytes, sizeof(uint32)));
+}
+
+TEST_2D(CodedStreamTest, WriteLittleEndian64, kFixed64Cases, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ coded_output.WriteLittleEndian64(kFixed64Cases_case.value);
+ EXPECT_FALSE(coded_output.HadError());
+
+ EXPECT_EQ(sizeof(uint64), coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(sizeof(uint64), output.ByteCount());
+ EXPECT_EQ(0, memcmp(buffer_, kFixed64Cases_case.bytes, sizeof(uint64)));
+}
+
+// Tests using the static methods to read fixed-size values from raw arrays.
+
+TEST_1D(CodedStreamTest, ReadLittleEndian32FromArray, kFixed32Cases) {
+ memcpy(buffer_, kFixed32Cases_case.bytes, sizeof(kFixed32Cases_case.bytes));
+
+ uint32 value;
+ const uint8* end = CodedInputStream::ReadLittleEndian32FromArray(
+ buffer_, &value);
+ EXPECT_EQ(kFixed32Cases_case.value, value);
+ EXPECT_TRUE(end == buffer_ + sizeof(value));
+}
+
+TEST_1D(CodedStreamTest, ReadLittleEndian64FromArray, kFixed64Cases) {
+ memcpy(buffer_, kFixed64Cases_case.bytes, sizeof(kFixed64Cases_case.bytes));
+
+ uint64 value;
+ const uint8* end = CodedInputStream::ReadLittleEndian64FromArray(
+ buffer_, &value);
+ EXPECT_EQ(kFixed64Cases_case.value, value);
+ EXPECT_TRUE(end == buffer_ + sizeof(value));
+}
+
+// -------------------------------------------------------------------
+// Raw reads and writes
+
+const char kRawBytes[] = "Some bytes which will be written and read raw.";
+
+TEST_1D(CodedStreamTest, ReadRaw, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+ char read_buffer[sizeof(kRawBytes)];
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_TRUE(coded_input.ReadRaw(read_buffer, sizeof(kRawBytes)));
+ EXPECT_EQ(0, memcmp(kRawBytes, read_buffer, sizeof(kRawBytes)));
+ }
+
+ EXPECT_EQ(sizeof(kRawBytes), input.ByteCount());
+}
+
+TEST_1D(CodedStreamTest, WriteRaw, kBlockSizes) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedOutputStream coded_output(&output);
+
+ coded_output.WriteRaw(kRawBytes, sizeof(kRawBytes));
+ EXPECT_FALSE(coded_output.HadError());
+
+ EXPECT_EQ(sizeof(kRawBytes), coded_output.ByteCount());
+ }
+
+ EXPECT_EQ(sizeof(kRawBytes), output.ByteCount());
+ EXPECT_EQ(0, memcmp(buffer_, kRawBytes, sizeof(kRawBytes)));
+}
+
+TEST_1D(CodedStreamTest, ReadString, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(kRawBytes, str);
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+// Check to make sure ReadString doesn't crash on impossibly large strings.
+TEST_1D(CodedStreamTest, ReadStringImpossiblyLarge, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ // Try to read a gigabyte.
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+ }
+}
+
+TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnStack) {
+ // Same test as above, except directly use a buffer. This used to cause
+ // crashes while the above did not.
+ uint8 buffer[8];
+ CodedInputStream coded_input(buffer, 8);
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+}
+
+TEST_F(CodedStreamTest, ReadStringImpossiblyLargeFromStringOnHeap) {
+ google::protobuf::scoped_array<uint8> buffer(new uint8[8]);
+ CodedInputStream coded_input(buffer.get(), 8);
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+}
+
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnTotalLimit, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.SetTotalBytesLimit(sizeof(kRawBytes), sizeof(kRawBytes));
+ EXPECT_EQ(sizeof(kRawBytes), coded_input.BytesUntilTotalBytesLimit());
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(sizeof(kRawBytes) - strlen(kRawBytes),
+ coded_input.BytesUntilTotalBytesLimit());
+ EXPECT_EQ(kRawBytes, str);
+ // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+ EXPECT_GE(str.capacity(), strlen(kRawBytes));
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_1D(CodedStreamTest, ReadStringReservesMemoryOnPushedLimit, kBlockSizes) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(kRawBytes, str);
+ // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+ EXPECT_GE(str.capacity(), strlen(kRawBytes));
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationIfLimitsNotSet) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ EXPECT_EQ(kRawBytes, str);
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate more than strlen(kRawBytes)
+ // if the content of kRawBytes is appended to string in small
+ // chunks.
+ // TODO(liujisi): Replace with a more meaningful test (see cl/60966023).
+ EXPECT_GE(str.capacity(), strlen(kRawBytes));
+ }
+
+ EXPECT_EQ(strlen(kRawBytes), input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsNegative) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, -1));
+ // Note: this check depends on string class implementation. It
+ // expects that string will always allocate the same amount of
+ // memory for an empty string.
+ EXPECT_EQ(string().capacity(), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsLarge) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1 << 30));
+ EXPECT_GT(1 << 30, str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheLimit) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(16);
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest, ReadStringNoReservationSizeIsOverTheTotalBytesLimit) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.SetTotalBytesLimit(16, 16);
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest,
+ ReadStringNoReservationSizeIsOverTheClosestLimit_GlobalLimitIsCloser) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(sizeof(buffer_));
+ coded_input.SetTotalBytesLimit(16, 16);
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
+TEST_F(CodedStreamTest,
+ ReadStringNoReservationSizeIsOverTheClosestLimit_LocalLimitIsCloser) {
+ memcpy(buffer_, kRawBytes, sizeof(kRawBytes));
+ // Buffer size in the input must be smaller than sizeof(kRawBytes),
+ // otherwise check against capacity will fail as ReadStringInline()
+ // will handle the reading and will reserve the memory as needed.
+ ArrayInputStream input(buffer_, sizeof(buffer_), 32);
+
+ {
+ CodedInputStream coded_input(&input);
+ coded_input.PushLimit(16);
+ coded_input.SetTotalBytesLimit(sizeof(buffer_), sizeof(buffer_));
+ EXPECT_EQ(sizeof(buffer_), coded_input.BytesUntilTotalBytesLimit());
+
+ string str;
+ EXPECT_FALSE(coded_input.ReadString(&str, strlen(kRawBytes)));
+ // Note: this check depends on string class implementation. It
+ // expects that string will allocate less than strlen(kRawBytes)
+ // for an empty string.
+ EXPECT_GT(strlen(kRawBytes), str.capacity());
+ }
+}
+
+
+// -------------------------------------------------------------------
+// Skip
+
+const char kSkipTestBytes[] =
+ "<Before skipping><To be skipped><After skipping>";
+
+TEST_1D(CodedStreamTest, SkipInput, kBlockSizes) {
+ memcpy(buffer_, kSkipTestBytes, sizeof(kSkipTestBytes));
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen("<Before skipping>")));
+ EXPECT_EQ("<Before skipping>", str);
+ EXPECT_TRUE(coded_input.Skip(strlen("<To be skipped>")));
+ EXPECT_TRUE(coded_input.ReadString(&str, strlen("<After skipping>")));
+ EXPECT_EQ("<After skipping>", str);
+ }
+
+ EXPECT_EQ(strlen(kSkipTestBytes), input.ByteCount());
+}
+
+// -------------------------------------------------------------------
+// GetDirectBufferPointer
+
+TEST_F(CodedStreamTest, GetDirectBufferPointerInput) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), 8);
+ CodedInputStream coded_input(&input);
+
+ const void* ptr;
+ int size;
+
+ EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Peeking again should return the same pointer.
+ EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Skip forward in the same buffer then peek again.
+ EXPECT_TRUE(coded_input.Skip(3));
+ EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_ + 3, ptr);
+ EXPECT_EQ(5, size);
+
+ // Skip to end of buffer and peek -- should get next buffer.
+ EXPECT_TRUE(coded_input.Skip(5));
+ EXPECT_TRUE(coded_input.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_ + 8, ptr);
+ EXPECT_EQ(8, size);
+}
+
+TEST_F(CodedStreamTest, GetDirectBufferPointerInlineInput) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), 8);
+ CodedInputStream coded_input(&input);
+
+ const void* ptr;
+ int size;
+
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Peeking again should return the same pointer.
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Skip forward in the same buffer then peek again.
+ EXPECT_TRUE(coded_input.Skip(3));
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_ + 3, ptr);
+ EXPECT_EQ(5, size);
+
+ // Skip to end of buffer and peek -- should return false and provide an empty
+ // buffer. It does not try to Refresh().
+ EXPECT_TRUE(coded_input.Skip(5));
+ coded_input.GetDirectBufferPointerInline(&ptr, &size);
+ EXPECT_EQ(buffer_ + 8, ptr);
+ EXPECT_EQ(0, size);
+}
+
+TEST_F(CodedStreamTest, GetDirectBufferPointerOutput) {
+ ArrayOutputStream output(buffer_, sizeof(buffer_), 8);
+ CodedOutputStream coded_output(&output);
+
+ void* ptr;
+ int size;
+
+ EXPECT_TRUE(coded_output.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Peeking again should return the same pointer.
+ EXPECT_TRUE(coded_output.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_, ptr);
+ EXPECT_EQ(8, size);
+
+ // Skip forward in the same buffer then peek again.
+ EXPECT_TRUE(coded_output.Skip(3));
+ EXPECT_TRUE(coded_output.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_ + 3, ptr);
+ EXPECT_EQ(5, size);
+
+ // Skip to end of buffer and peek -- should get next buffer.
+ EXPECT_TRUE(coded_output.Skip(5));
+ EXPECT_TRUE(coded_output.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_ + 8, ptr);
+ EXPECT_EQ(8, size);
+
+ // Skip over multiple buffers.
+ EXPECT_TRUE(coded_output.Skip(22));
+ EXPECT_TRUE(coded_output.GetDirectBufferPointer(&ptr, &size));
+ EXPECT_EQ(buffer_ + 30, ptr);
+ EXPECT_EQ(2, size);
+}
+
+// -------------------------------------------------------------------
+// Limits
+
+TEST_1D(CodedStreamTest, BasicLimit, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit = coded_input.PushLimit(8);
+
+ // Read until we hit the limit.
+ uint32 value;
+ EXPECT_EQ(8, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ }
+
+ EXPECT_EQ(12, input.ByteCount());
+}
+
+// Test what happens when we push two limits where the second (top) one is
+// shorter.
+TEST_1D(CodedStreamTest, SmallLimitOnTopOfBigLimit, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit1 = coded_input.PushLimit(8);
+ EXPECT_EQ(8, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit2 = coded_input.PushLimit(4);
+
+ uint32 value;
+
+ // Read until we hit limit2, the top and shortest limit.
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit2);
+
+ // Read until we hit limit1.
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit1);
+
+ // No more limits.
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ }
+
+ EXPECT_EQ(12, input.ByteCount());
+}
+
+// Test what happens when we push two limits where the second (top) one is
+// longer. In this case, the top limit is shortened to match the previous
+// limit.
+TEST_1D(CodedStreamTest, BigLimitOnTopOfSmallLimit, kBlockSizes) {
+ ArrayInputStream input(buffer_, sizeof(buffer_), kBlockSizes_case);
+
+ {
+ CodedInputStream coded_input(&input);
+
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit1 = coded_input.PushLimit(4);
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ CodedInputStream::Limit limit2 = coded_input.PushLimit(8);
+
+ uint32 value;
+
+ // Read until we hit limit2. Except, wait! limit1 is shorter, so
+ // we end up hitting that first, despite having 4 bytes to go on
+ // limit2.
+ EXPECT_EQ(4, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit2);
+
+ // OK, popped limit2, now limit1 is on top, which we've already hit.
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+ EXPECT_FALSE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_EQ(0, coded_input.BytesUntilLimit());
+
+ coded_input.PopLimit(limit1);
+
+ // No more limits.
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ }
+
+ EXPECT_EQ(8, input.ByteCount());
+}
+
+TEST_F(CodedStreamTest, ExpectAtEnd) {
+ // Test ExpectAtEnd(), which is based on limits.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+
+ EXPECT_FALSE(coded_input.ExpectAtEnd());
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(4);
+
+ uint32 value;
+ EXPECT_TRUE(coded_input.ReadLittleEndian32(&value));
+ EXPECT_TRUE(coded_input.ExpectAtEnd());
+
+ coded_input.PopLimit(limit);
+ EXPECT_FALSE(coded_input.ExpectAtEnd());
+}
+
+TEST_F(CodedStreamTest, NegativeLimit) {
+ // Check what happens when we push a negative limit.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(-1234);
+ // BytesUntilLimit() returns -1 to mean "no limit", which actually means
+ // "the limit is INT_MAX relative to the beginning of the stream".
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ coded_input.PopLimit(limit);
+}
+
+TEST_F(CodedStreamTest, NegativeLimitAfterReading) {
+ // Check what happens when we push a negative limit.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ ASSERT_TRUE(coded_input.Skip(128));
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(-64);
+ // BytesUntilLimit() returns -1 to mean "no limit", which actually means
+ // "the limit is INT_MAX relative to the beginning of the stream".
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ coded_input.PopLimit(limit);
+}
+
+TEST_F(CodedStreamTest, OverflowLimit) {
+ // Check what happens when we push a limit large enough that its absolute
+ // position is more than 2GB into the stream.
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ ASSERT_TRUE(coded_input.Skip(128));
+
+ CodedInputStream::Limit limit = coded_input.PushLimit(INT_MAX);
+ // BytesUntilLimit() returns -1 to mean "no limit", which actually means
+ // "the limit is INT_MAX relative to the beginning of the stream".
+ EXPECT_EQ(-1, coded_input.BytesUntilLimit());
+ coded_input.PopLimit(limit);
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimit) {
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ coded_input.SetTotalBytesLimit(16, -1);
+ EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
+
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, 16));
+ EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
+
+ vector<string> errors;
+
+ {
+ ScopedMemoryLog error_log;
+ EXPECT_FALSE(coded_input.ReadString(&str, 1));
+ errors = error_log.GetMessages(ERROR);
+ }
+
+ ASSERT_EQ(1, errors.size());
+ EXPECT_PRED_FORMAT2(testing::IsSubstring,
+ "A protocol message was rejected because it was too big", errors[0]);
+
+ coded_input.SetTotalBytesLimit(32, -1);
+ EXPECT_EQ(16, coded_input.BytesUntilTotalBytesLimit());
+ EXPECT_TRUE(coded_input.ReadString(&str, 16));
+ EXPECT_EQ(0, coded_input.BytesUntilTotalBytesLimit());
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimitNotValidMessageEnd) {
+ // total_bytes_limit_ is not a valid place for a message to end.
+
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+
+ // Set both total_bytes_limit and a regular limit at 16 bytes.
+ coded_input.SetTotalBytesLimit(16, -1);
+ CodedInputStream::Limit limit = coded_input.PushLimit(16);
+
+ // Read 16 bytes.
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, 16));
+
+ // Read a tag. Should fail, but report being a valid endpoint since it's
+ // a regular limit.
+ EXPECT_EQ(0, coded_input.ReadTag());
+ EXPECT_TRUE(coded_input.ConsumedEntireMessage());
+
+ // Pop the limit.
+ coded_input.PopLimit(limit);
+
+ // Read a tag. Should fail, and report *not* being a valid endpoint, since
+ // this time we're hitting the total bytes limit.
+ EXPECT_EQ(0, coded_input.ReadTag());
+ EXPECT_FALSE(coded_input.ConsumedEntireMessage());
+}
+
+// This method is used by the tests below.
+// It constructs a CodedInputStream with the given limits and tries to read 2KiB
+// of data from it. Then it returns the logged errors and warnings in the given
+// vectors.
+void CodedStreamTest::SetupTotalBytesLimitWarningTest(
+ int total_bytes_limit, int warning_threshold,
+ vector<string>* out_errors, vector<string>* out_warnings) {
+ ArrayInputStream raw_input(buffer_, sizeof(buffer_), 128);
+
+ ScopedMemoryLog scoped_log;
+ {
+ CodedInputStream input(&raw_input);
+ input.SetTotalBytesLimit(total_bytes_limit, warning_threshold);
+ string str;
+ EXPECT_TRUE(input.ReadString(&str, 2048));
+ }
+
+ *out_errors = scoped_log.GetMessages(ERROR);
+ *out_warnings = scoped_log.GetMessages(WARNING);
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimitWarning) {
+ vector<string> errors;
+ vector<string> warnings;
+ SetupTotalBytesLimitWarningTest(10240, 1024, &errors, &warnings);
+
+ EXPECT_EQ(0, errors.size());
+
+ ASSERT_EQ(2, warnings.size());
+ EXPECT_PRED_FORMAT2(testing::IsSubstring,
+ "Reading dangerously large protocol message. If the message turns out to "
+ "be larger than 10240 bytes, parsing will be halted for security reasons.",
+ warnings[0]);
+ EXPECT_PRED_FORMAT2(testing::IsSubstring,
+ "The total number of bytes read was 2048",
+ warnings[1]);
+}
+
+TEST_F(CodedStreamTest, TotalBytesLimitWarningDisabled) {
+ vector<string> errors;
+ vector<string> warnings;
+
+ // Test with -1
+ SetupTotalBytesLimitWarningTest(10240, -1, &errors, &warnings);
+ EXPECT_EQ(0, errors.size());
+ EXPECT_EQ(0, warnings.size());
+
+ // Test again with -2, expecting the same result
+ SetupTotalBytesLimitWarningTest(10240, -2, &errors, &warnings);
+ EXPECT_EQ(0, errors.size());
+ EXPECT_EQ(0, warnings.size());
+}
+
+
+TEST_F(CodedStreamTest, RecursionLimit) {
+ ArrayInputStream input(buffer_, sizeof(buffer_));
+ CodedInputStream coded_input(&input);
+ coded_input.SetRecursionLimit(4);
+
+ // This is way too much testing for a counter.
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6
+ coded_input.DecrementRecursionDepth(); // 5
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 6
+ coded_input.DecrementRecursionDepth(); // 5
+ coded_input.DecrementRecursionDepth(); // 4
+ coded_input.DecrementRecursionDepth(); // 3
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5
+ coded_input.DecrementRecursionDepth(); // 4
+ coded_input.DecrementRecursionDepth(); // 3
+ coded_input.DecrementRecursionDepth(); // 2
+ coded_input.DecrementRecursionDepth(); // 1
+ coded_input.DecrementRecursionDepth(); // 0
+ coded_input.DecrementRecursionDepth(); // 0
+ coded_input.DecrementRecursionDepth(); // 0
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 1
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 2
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 3
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 4
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 5
+
+ coded_input.SetRecursionLimit(6);
+ EXPECT_TRUE(coded_input.IncrementRecursionDepth()); // 6
+ EXPECT_FALSE(coded_input.IncrementRecursionDepth()); // 7
+}
+
+
+class ReallyBigInputStream : public ZeroCopyInputStream {
+ public:
+ ReallyBigInputStream() : backup_amount_(0), buffer_count_(0) {}
+ ~ReallyBigInputStream() {}
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size) {
+ // We only expect BackUp() to be called at the end.
+ EXPECT_EQ(0, backup_amount_);
+
+ switch (buffer_count_++) {
+ case 0:
+ *data = buffer_;
+ *size = sizeof(buffer_);
+ return true;
+ case 1:
+ // Return an enormously large buffer that, when combined with the 1k
+ // returned already, should overflow the total_bytes_read_ counter in
+ // CodedInputStream. Note that we'll only read the first 1024 bytes
+ // of this buffer so it's OK that we have it point at buffer_.
+ *data = buffer_;
+ *size = INT_MAX;
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ void BackUp(int count) {
+ backup_amount_ = count;
+ }
+
+ bool Skip(int count) { GOOGLE_LOG(FATAL) << "Not implemented."; return false; }
+ int64 ByteCount() const { GOOGLE_LOG(FATAL) << "Not implemented."; return 0; }
+
+ int backup_amount_;
+
+ private:
+ char buffer_[1024];
+ int64 buffer_count_;
+};
+
+TEST_F(CodedStreamTest, InputOver2G) {
+ // CodedInputStream should gracefully handle input over 2G and call
+ // input.BackUp() with the correct number of bytes on destruction.
+ ReallyBigInputStream input;
+
+ vector<string> errors;
+
+ {
+ ScopedMemoryLog error_log;
+ CodedInputStream coded_input(&input);
+ string str;
+ EXPECT_TRUE(coded_input.ReadString(&str, 512));
+ EXPECT_TRUE(coded_input.ReadString(&str, 1024));
+ errors = error_log.GetMessages(ERROR);
+ }
+
+ EXPECT_EQ(INT_MAX - 512, input.backup_amount_);
+ EXPECT_EQ(0, errors.size());
+}
+
+// ===================================================================
+
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/gzip_stream.cc b/src/google/protobuf/io/gzip_stream.cc
new file mode 100644
index 0000000..1be6c86
--- /dev/null
+++ b/src/google/protobuf/io/gzip_stream.cc
@@ -0,0 +1,332 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: brianolson@google.com (Brian Olson)
+//
+// This file contains the implementation of classes GzipInputStream and
+// GzipOutputStream.
+
+
+#if HAVE_ZLIB
+#include <google/protobuf/io/gzip_stream.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+static const int kDefaultBufferSize = 65536;
+
+GzipInputStream::GzipInputStream(
+ ZeroCopyInputStream* sub_stream, Format format, int buffer_size)
+ : format_(format), sub_stream_(sub_stream), zerror_(Z_OK), byte_count_(0) {
+ zcontext_.state = Z_NULL;
+ zcontext_.zalloc = Z_NULL;
+ zcontext_.zfree = Z_NULL;
+ zcontext_.opaque = Z_NULL;
+ zcontext_.total_out = 0;
+ zcontext_.next_in = NULL;
+ zcontext_.avail_in = 0;
+ zcontext_.total_in = 0;
+ zcontext_.msg = NULL;
+ if (buffer_size == -1) {
+ output_buffer_length_ = kDefaultBufferSize;
+ } else {
+ output_buffer_length_ = buffer_size;
+ }
+ output_buffer_ = operator new(output_buffer_length_);
+ GOOGLE_CHECK(output_buffer_ != NULL);
+ zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
+ zcontext_.avail_out = output_buffer_length_;
+ output_position_ = output_buffer_;
+}
+GzipInputStream::~GzipInputStream() {
+ operator delete(output_buffer_);
+ zerror_ = inflateEnd(&zcontext_);
+}
+
+static inline int internalInflateInit2(
+ z_stream* zcontext, GzipInputStream::Format format) {
+ int windowBitsFormat = 0;
+ switch (format) {
+ case GzipInputStream::GZIP: windowBitsFormat = 16; break;
+ case GzipInputStream::AUTO: windowBitsFormat = 32; break;
+ case GzipInputStream::ZLIB: windowBitsFormat = 0; break;
+ }
+ return inflateInit2(zcontext, /* windowBits */15 | windowBitsFormat);
+}
+
+int GzipInputStream::Inflate(int flush) {
+ if ((zerror_ == Z_OK) && (zcontext_.avail_out == 0)) {
+ // previous inflate filled output buffer. don't change input params yet.
+ } else if (zcontext_.avail_in == 0) {
+ const void* in;
+ int in_size;
+ bool first = zcontext_.next_in == NULL;
+ bool ok = sub_stream_->Next(&in, &in_size);
+ if (!ok) {
+ zcontext_.next_out = NULL;
+ zcontext_.avail_out = 0;
+ return Z_STREAM_END;
+ }
+ zcontext_.next_in = static_cast<Bytef*>(const_cast<void*>(in));
+ zcontext_.avail_in = in_size;
+ if (first) {
+ int error = internalInflateInit2(&zcontext_, format_);
+ if (error != Z_OK) {
+ return error;
+ }
+ }
+ }
+ zcontext_.next_out = static_cast<Bytef*>(output_buffer_);
+ zcontext_.avail_out = output_buffer_length_;
+ output_position_ = output_buffer_;
+ int error = inflate(&zcontext_, flush);
+ return error;
+}
+
+void GzipInputStream::DoNextOutput(const void** data, int* size) {
+ *data = output_position_;
+ *size = ((uintptr_t)zcontext_.next_out) - ((uintptr_t)output_position_);
+ output_position_ = zcontext_.next_out;
+}
+
+// implements ZeroCopyInputStream ----------------------------------
+bool GzipInputStream::Next(const void** data, int* size) {
+ bool ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END)
+ || (zerror_ == Z_BUF_ERROR);
+ if ((!ok) || (zcontext_.next_out == NULL)) {
+ return false;
+ }
+ if (zcontext_.next_out != output_position_) {
+ DoNextOutput(data, size);
+ return true;
+ }
+ if (zerror_ == Z_STREAM_END) {
+ if (zcontext_.next_out != NULL) {
+ // sub_stream_ may have concatenated streams to follow
+ zerror_ = inflateEnd(&zcontext_);
+ byte_count_ += zcontext_.total_out;
+ if (zerror_ != Z_OK) {
+ return false;
+ }
+ zerror_ = internalInflateInit2(&zcontext_, format_);
+ if (zerror_ != Z_OK) {
+ return false;
+ }
+ } else {
+ *data = NULL;
+ *size = 0;
+ return false;
+ }
+ }
+ zerror_ = Inflate(Z_NO_FLUSH);
+ if ((zerror_ == Z_STREAM_END) && (zcontext_.next_out == NULL)) {
+ // The underlying stream's Next returned false inside Inflate.
+ return false;
+ }
+ ok = (zerror_ == Z_OK) || (zerror_ == Z_STREAM_END)
+ || (zerror_ == Z_BUF_ERROR);
+ if (!ok) {
+ return false;
+ }
+ DoNextOutput(data, size);
+ return true;
+}
+void GzipInputStream::BackUp(int count) {
+ output_position_ = reinterpret_cast<void*>(
+ reinterpret_cast<uintptr_t>(output_position_) - count);
+}
+bool GzipInputStream::Skip(int count) {
+ const void* data;
+ int size;
+ bool ok = Next(&data, &size);
+ while (ok && (size < count)) {
+ count -= size;
+ ok = Next(&data, &size);
+ }
+ if (size > count) {
+ BackUp(size - count);
+ }
+ return ok;
+}
+int64 GzipInputStream::ByteCount() const {
+ int64 ret = byte_count_ + zcontext_.total_out;
+ if (zcontext_.next_out != NULL && output_position_ != NULL) {
+ ret += reinterpret_cast<uintptr_t>(zcontext_.next_out) -
+ reinterpret_cast<uintptr_t>(output_position_);
+ }
+ return ret;
+}
+
+// =========================================================================
+
+GzipOutputStream::Options::Options()
+ : format(GZIP),
+ buffer_size(kDefaultBufferSize),
+ compression_level(Z_DEFAULT_COMPRESSION),
+ compression_strategy(Z_DEFAULT_STRATEGY) {}
+
+GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream) {
+ Init(sub_stream, Options());
+}
+
+GzipOutputStream::GzipOutputStream(ZeroCopyOutputStream* sub_stream,
+ const Options& options) {
+ Init(sub_stream, options);
+}
+
+void GzipOutputStream::Init(ZeroCopyOutputStream* sub_stream,
+ const Options& options) {
+ sub_stream_ = sub_stream;
+ sub_data_ = NULL;
+ sub_data_size_ = 0;
+
+ input_buffer_length_ = options.buffer_size;
+ input_buffer_ = operator new(input_buffer_length_);
+ GOOGLE_CHECK(input_buffer_ != NULL);
+
+ zcontext_.zalloc = Z_NULL;
+ zcontext_.zfree = Z_NULL;
+ zcontext_.opaque = Z_NULL;
+ zcontext_.next_out = NULL;
+ zcontext_.avail_out = 0;
+ zcontext_.total_out = 0;
+ zcontext_.next_in = NULL;
+ zcontext_.avail_in = 0;
+ zcontext_.total_in = 0;
+ zcontext_.msg = NULL;
+ // default to GZIP format
+ int windowBitsFormat = 16;
+ if (options.format == ZLIB) {
+ windowBitsFormat = 0;
+ }
+ zerror_ = deflateInit2(
+ &zcontext_,
+ options.compression_level,
+ Z_DEFLATED,
+ /* windowBits */15 | windowBitsFormat,
+ /* memLevel (default) */8,
+ options.compression_strategy);
+}
+
+GzipOutputStream::~GzipOutputStream() {
+ Close();
+ if (input_buffer_ != NULL) {
+ operator delete(input_buffer_);
+ }
+}
+
+// private
+int GzipOutputStream::Deflate(int flush) {
+ int error = Z_OK;
+ do {
+ if ((sub_data_ == NULL) || (zcontext_.avail_out == 0)) {
+ bool ok = sub_stream_->Next(&sub_data_, &sub_data_size_);
+ if (!ok) {
+ sub_data_ = NULL;
+ sub_data_size_ = 0;
+ return Z_BUF_ERROR;
+ }
+ GOOGLE_CHECK_GT(sub_data_size_, 0);
+ zcontext_.next_out = static_cast<Bytef*>(sub_data_);
+ zcontext_.avail_out = sub_data_size_;
+ }
+ error = deflate(&zcontext_, flush);
+ } while (error == Z_OK && zcontext_.avail_out == 0);
+ if ((flush == Z_FULL_FLUSH) || (flush == Z_FINISH)) {
+ // Notify lower layer of data.
+ sub_stream_->BackUp(zcontext_.avail_out);
+ // We don't own the buffer anymore.
+ sub_data_ = NULL;
+ sub_data_size_ = 0;
+ }
+ return error;
+}
+
+// implements ZeroCopyOutputStream ---------------------------------
+bool GzipOutputStream::Next(void** data, int* size) {
+ if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
+ return false;
+ }
+ if (zcontext_.avail_in != 0) {
+ zerror_ = Deflate(Z_NO_FLUSH);
+ if (zerror_ != Z_OK) {
+ return false;
+ }
+ }
+ if (zcontext_.avail_in == 0) {
+ // all input was consumed. reset the buffer.
+ zcontext_.next_in = static_cast<Bytef*>(input_buffer_);
+ zcontext_.avail_in = input_buffer_length_;
+ *data = input_buffer_;
+ *size = input_buffer_length_;
+ } else {
+ // The loop in Deflate should consume all avail_in
+ GOOGLE_LOG(DFATAL) << "Deflate left bytes unconsumed";
+ }
+ return true;
+}
+void GzipOutputStream::BackUp(int count) {
+ GOOGLE_CHECK_GE(zcontext_.avail_in, count);
+ zcontext_.avail_in -= count;
+}
+int64 GzipOutputStream::ByteCount() const {
+ return zcontext_.total_in + zcontext_.avail_in;
+}
+
+bool GzipOutputStream::Flush() {
+ zerror_ = Deflate(Z_FULL_FLUSH);
+ // Return true if the flush succeeded or if it was a no-op.
+ return (zerror_ == Z_OK) ||
+ (zerror_ == Z_BUF_ERROR && zcontext_.avail_in == 0 &&
+ zcontext_.avail_out != 0);
+}
+
+bool GzipOutputStream::Close() {
+ if ((zerror_ != Z_OK) && (zerror_ != Z_BUF_ERROR)) {
+ return false;
+ }
+ do {
+ zerror_ = Deflate(Z_FINISH);
+ } while (zerror_ == Z_OK);
+ zerror_ = deflateEnd(&zcontext_);
+ bool ok = zerror_ == Z_OK;
+ zerror_ = Z_STREAM_END;
+ return ok;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
+
+#endif // HAVE_ZLIB
diff --git a/src/google/protobuf/io/gzip_stream.h b/src/google/protobuf/io/gzip_stream.h
new file mode 100644
index 0000000..8244500
--- /dev/null
+++ b/src/google/protobuf/io/gzip_stream.h
@@ -0,0 +1,210 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: brianolson@google.com (Brian Olson)
+//
+// This file contains the definition for classes GzipInputStream and
+// GzipOutputStream.
+//
+// GzipInputStream decompresses data from an underlying
+// ZeroCopyInputStream and provides the decompressed data as a
+// ZeroCopyInputStream.
+//
+// GzipOutputStream is an ZeroCopyOutputStream that compresses data to
+// an underlying ZeroCopyOutputStream.
+
+#ifndef GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__
+
+#include <zlib.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// A ZeroCopyInputStream that reads compressed data through zlib
+class LIBPROTOBUF_EXPORT GzipInputStream : public ZeroCopyInputStream {
+ public:
+ // Format key for constructor
+ enum Format {
+ // zlib will autodetect gzip header or deflate stream
+ AUTO = 0,
+
+ // GZIP streams have some extra header data for file attributes.
+ GZIP = 1,
+
+ // Simpler zlib stream format.
+ ZLIB = 2,
+ };
+
+ // buffer_size and format may be -1 for default of 64kB and GZIP format
+ explicit GzipInputStream(
+ ZeroCopyInputStream* sub_stream,
+ Format format = AUTO,
+ int buffer_size = -1);
+ virtual ~GzipInputStream();
+
+ // Return last error message or NULL if no error.
+ inline const char* ZlibErrorMessage() const {
+ return zcontext_.msg;
+ }
+ inline int ZlibErrorCode() const {
+ return zerror_;
+ }
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+ private:
+ Format format_;
+
+ ZeroCopyInputStream* sub_stream_;
+
+ z_stream zcontext_;
+ int zerror_;
+
+ void* output_buffer_;
+ void* output_position_;
+ size_t output_buffer_length_;
+ int64 byte_count_;
+
+ int Inflate(int flush);
+ void DoNextOutput(const void** data, int* size);
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipInputStream);
+};
+
+
+class LIBPROTOBUF_EXPORT GzipOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Format key for constructor
+ enum Format {
+ // GZIP streams have some extra header data for file attributes.
+ GZIP = 1,
+
+ // Simpler zlib stream format.
+ ZLIB = 2,
+ };
+
+ struct Options {
+ // Defaults to GZIP.
+ Format format;
+
+ // What size buffer to use internally. Defaults to 64kB.
+ int buffer_size;
+
+ // A number between 0 and 9, where 0 is no compression and 9 is best
+ // compression. Defaults to Z_DEFAULT_COMPRESSION (see zlib.h).
+ int compression_level;
+
+ // Defaults to Z_DEFAULT_STRATEGY. Can also be set to Z_FILTERED,
+ // Z_HUFFMAN_ONLY, or Z_RLE. See the documentation for deflateInit2 in
+ // zlib.h for definitions of these constants.
+ int compression_strategy;
+
+ Options(); // Initializes with default values.
+ };
+
+ // Create a GzipOutputStream with default options.
+ explicit GzipOutputStream(ZeroCopyOutputStream* sub_stream);
+
+ // Create a GzipOutputStream with the given options.
+ GzipOutputStream(
+ ZeroCopyOutputStream* sub_stream,
+ const Options& options);
+
+ virtual ~GzipOutputStream();
+
+ // Return last error message or NULL if no error.
+ inline const char* ZlibErrorMessage() const {
+ return zcontext_.msg;
+ }
+ inline int ZlibErrorCode() const {
+ return zerror_;
+ }
+
+ // Flushes data written so far to zipped data in the underlying stream.
+ // It is the caller's responsibility to flush the underlying stream if
+ // necessary.
+ // Compression may be less efficient stopping and starting around flushes.
+ // Returns true if no error.
+ //
+ // Please ensure that block size is > 6. Here is an excerpt from the zlib
+ // doc that explains why:
+ //
+ // In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that avail_out
+ // is greater than six to avoid repeated flush markers due to
+ // avail_out == 0 on return.
+ bool Flush();
+
+ // Writes out all data and closes the gzip stream.
+ // It is the caller's responsibility to close the underlying stream if
+ // necessary.
+ // Returns true if no error.
+ bool Close();
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ ZeroCopyOutputStream* sub_stream_;
+ // Result from calling Next() on sub_stream_
+ void* sub_data_;
+ int sub_data_size_;
+
+ z_stream zcontext_;
+ int zerror_;
+ void* input_buffer_;
+ size_t input_buffer_length_;
+
+ // Shared constructor code.
+ void Init(ZeroCopyOutputStream* sub_stream, const Options& options);
+
+ // Do some compression.
+ // Takes zlib flush mode.
+ // Returns zlib error code.
+ int Deflate(int flush);
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(GzipOutputStream);
+};
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_GZIP_STREAM_H__
diff --git a/src/google/protobuf/io/gzip_stream_unittest.sh b/src/google/protobuf/io/gzip_stream_unittest.sh
new file mode 100755
index 0000000..16251a9
--- /dev/null
+++ b/src/google/protobuf/io/gzip_stream_unittest.sh
@@ -0,0 +1,44 @@
+#!/bin/sh -x
+#
+# Protocol Buffers - Google's data interchange format
+# Copyright 2009 Google Inc. All rights reserved.
+# https://developers.google.com/protocol-buffers/
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following disclaimer
+# in the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Author: brianolson@google.com (Brian Olson)
+#
+# Test compatibility between command line gzip/gunzip binaries and
+# ZeroCopyStream versions.
+
+TESTFILE=Makefile
+
+(./zcgzip < ${TESTFILE} | gunzip | cmp - ${TESTFILE}) && \
+(gzip < ${TESTFILE} | ./zcgunzip | cmp - ${TESTFILE})
+
+# Result of "(cmd) && (cmd)" implicitly becomes result of this script
+# and thus the test.
diff --git a/src/google/protobuf/io/package_info.h b/src/google/protobuf/io/package_info.h
new file mode 100644
index 0000000..dc1fc91
--- /dev/null
+++ b/src/google/protobuf/io/package_info.h
@@ -0,0 +1,54 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file exists solely to document the google::protobuf::io namespace.
+// It is not compiled into anything, but it may be read by an automated
+// documentation generator.
+
+namespace google {
+
+namespace protobuf {
+
+// Auxiliary classes used for I/O.
+//
+// The Protocol Buffer library uses the classes in this package to deal with
+// I/O and encoding/decoding raw bytes. Most users will not need to
+// deal with this package. However, users who want to adapt the system to
+// work with their own I/O abstractions -- e.g., to allow Protocol Buffers
+// to be read from a different kind of input stream without the need for a
+// temporary buffer -- should take a closer look.
+namespace io {}
+
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/printer.cc b/src/google/protobuf/io/printer.cc
new file mode 100644
index 0000000..7d88650
--- /dev/null
+++ b/src/google/protobuf/io/printer.cc
@@ -0,0 +1,284 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <google/protobuf/io/printer.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+Printer::Printer(ZeroCopyOutputStream* output, char variable_delimiter)
+ : variable_delimiter_(variable_delimiter),
+ output_(output),
+ buffer_(NULL),
+ buffer_size_(0),
+ at_start_of_line_(true),
+ failed_(false) {
+}
+
+Printer::~Printer() {
+ // Only BackUp() if we have called Next() at least once and never failed.
+ if (buffer_size_ > 0 && !failed_) {
+ output_->BackUp(buffer_size_);
+ }
+}
+
+void Printer::Print(const map<string, string>& variables, const char* text) {
+ int size = strlen(text);
+ int pos = 0; // The number of bytes we've written so far.
+
+ for (int i = 0; i < size; i++) {
+ if (text[i] == '\n') {
+ // Saw newline. If there is more text, we may need to insert an indent
+ // here. So, write what we have so far, including the '\n'.
+ WriteRaw(text + pos, i - pos + 1);
+ pos = i + 1;
+
+ // Setting this true will cause the next WriteRaw() to insert an indent
+ // first.
+ at_start_of_line_ = true;
+
+ } else if (text[i] == variable_delimiter_) {
+ // Saw the start of a variable name.
+
+ // Write what we have so far.
+ WriteRaw(text + pos, i - pos);
+ pos = i + 1;
+
+ // Find closing delimiter.
+ const char* end = strchr(text + pos, variable_delimiter_);
+ if (end == NULL) {
+ GOOGLE_LOG(DFATAL) << " Unclosed variable name.";
+ end = text + pos;
+ }
+ int endpos = end - text;
+
+ string varname(text + pos, endpos - pos);
+ if (varname.empty()) {
+ // Two delimiters in a row reduce to a literal delimiter character.
+ WriteRaw(&variable_delimiter_, 1);
+ } else {
+ // Replace with the variable's value.
+ map<string, string>::const_iterator iter = variables.find(varname);
+ if (iter == variables.end()) {
+ GOOGLE_LOG(DFATAL) << " Undefined variable: " << varname;
+ } else {
+ WriteRaw(iter->second.data(), iter->second.size());
+ }
+ }
+
+ // Advance past this variable.
+ i = endpos;
+ pos = endpos + 1;
+ }
+ }
+
+ // Write the rest.
+ WriteRaw(text + pos, size - pos);
+}
+
+void Printer::Print(const char* text) {
+ static map<string, string> empty;
+ Print(empty, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable, const string& value) {
+ map<string, string> vars;
+ vars[variable] = value;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ vars[variable3] = value3;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ vars[variable3] = value3;
+ vars[variable4] = value4;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ vars[variable3] = value3;
+ vars[variable4] = value4;
+ vars[variable5] = value5;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5,
+ const char* variable6, const string& value6) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ vars[variable3] = value3;
+ vars[variable4] = value4;
+ vars[variable5] = value5;
+ vars[variable6] = value6;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5,
+ const char* variable6, const string& value6,
+ const char* variable7, const string& value7) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ vars[variable3] = value3;
+ vars[variable4] = value4;
+ vars[variable5] = value5;
+ vars[variable6] = value6;
+ vars[variable7] = value7;
+ Print(vars, text);
+}
+
+void Printer::Print(const char* text,
+ const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5,
+ const char* variable6, const string& value6,
+ const char* variable7, const string& value7,
+ const char* variable8, const string& value8) {
+ map<string, string> vars;
+ vars[variable1] = value1;
+ vars[variable2] = value2;
+ vars[variable3] = value3;
+ vars[variable4] = value4;
+ vars[variable5] = value5;
+ vars[variable6] = value6;
+ vars[variable7] = value7;
+ vars[variable8] = value8;
+ Print(vars, text);
+}
+
+void Printer::Indent() {
+ indent_ += " ";
+}
+
+void Printer::Outdent() {
+ if (indent_.empty()) {
+ GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
+ return;
+ }
+
+ indent_.resize(indent_.size() - 2);
+}
+
+void Printer::PrintRaw(const string& data) {
+ WriteRaw(data.data(), data.size());
+}
+
+void Printer::PrintRaw(const char* data) {
+ if (failed_) return;
+ WriteRaw(data, strlen(data));
+}
+
+void Printer::WriteRaw(const char* data, int size) {
+ if (failed_) return;
+ if (size == 0) return;
+
+ if (at_start_of_line_ && (size > 0) && (data[0] != '\n')) {
+ // Insert an indent.
+ at_start_of_line_ = false;
+ WriteRaw(indent_.data(), indent_.size());
+ if (failed_) return;
+ }
+
+ while (size > buffer_size_) {
+ // Data exceeds space in the buffer. Copy what we can and request a
+ // new buffer.
+ memcpy(buffer_, data, buffer_size_);
+ data += buffer_size_;
+ size -= buffer_size_;
+ void* void_buffer;
+ failed_ = !output_->Next(&void_buffer, &buffer_size_);
+ if (failed_) return;
+ buffer_ = reinterpret_cast<char*>(void_buffer);
+ }
+
+ // Buffer is big enough to receive the data; copy it.
+ memcpy(buffer_, data, size);
+ buffer_ += size;
+ buffer_size_ -= size;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/printer.h b/src/google/protobuf/io/printer.h
new file mode 100644
index 0000000..f1490bb
--- /dev/null
+++ b/src/google/protobuf/io/printer.h
@@ -0,0 +1,169 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Utility class for writing text to a ZeroCopyOutputStream.
+
+#ifndef GOOGLE_PROTOBUF_IO_PRINTER_H__
+#define GOOGLE_PROTOBUF_IO_PRINTER_H__
+
+#include <string>
+#include <map>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+class ZeroCopyOutputStream; // zero_copy_stream.h
+
+// This simple utility class assists in code generation. It basically
+// allows the caller to define a set of variables and then output some
+// text with variable substitutions. Example usage:
+//
+// Printer printer(output, '$');
+// map<string, string> vars;
+// vars["name"] = "Bob";
+// printer.Print(vars, "My name is $name$.");
+//
+// The above writes "My name is Bob." to the output stream.
+//
+// Printer aggressively enforces correct usage, crashing (with assert failures)
+// in the case of undefined variables in debug builds. This helps greatly in
+// debugging code which uses it.
+class LIBPROTOBUF_EXPORT Printer {
+ public:
+ // Create a printer that writes text to the given output stream. Use the
+ // given character as the delimiter for variables.
+ Printer(ZeroCopyOutputStream* output, char variable_delimiter);
+ ~Printer();
+
+ // Print some text after applying variable substitutions. If a particular
+ // variable in the text is not defined, this will crash. Variables to be
+ // substituted are identified by their names surrounded by delimiter
+ // characters (as given to the constructor). The variable bindings are
+ // defined by the given map.
+ void Print(const map<string, string>& variables, const char* text);
+
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable, const string& value);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5,
+ const char* variable6, const string& value6);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5,
+ const char* variable6, const string& value6,
+ const char* variable7, const string& value7);
+ // Like the first Print(), except the substitutions are given as parameters.
+ void Print(const char* text, const char* variable1, const string& value1,
+ const char* variable2, const string& value2,
+ const char* variable3, const string& value3,
+ const char* variable4, const string& value4,
+ const char* variable5, const string& value5,
+ const char* variable6, const string& value6,
+ const char* variable7, const string& value7,
+ const char* variable8, const string& value8);
+
+ // Indent text by two spaces. After calling Indent(), two spaces will be
+ // inserted at the beginning of each line of text. Indent() may be called
+ // multiple times to produce deeper indents.
+ void Indent();
+
+ // Reduces the current indent level by two spaces, or crashes if the indent
+ // level is zero.
+ void Outdent();
+
+ // Write a string to the output buffer.
+ // This method does not look for newlines to add indentation.
+ void PrintRaw(const string& data);
+
+ // Write a zero-delimited string to output buffer.
+ // This method does not look for newlines to add indentation.
+ void PrintRaw(const char* data);
+
+ // Write some bytes to the output buffer.
+ // This method does not look for newlines to add indentation.
+ void WriteRaw(const char* data, int size);
+
+ // True if any write to the underlying stream failed. (We don't just
+ // crash in this case because this is an I/O failure, not a programming
+ // error.)
+ bool failed() const { return failed_; }
+
+ private:
+ const char variable_delimiter_;
+
+ ZeroCopyOutputStream* const output_;
+ char* buffer_;
+ int buffer_size_;
+
+ string indent_;
+ bool at_start_of_line_;
+ bool failed_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Printer);
+};
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_PRINTER_H__
diff --git a/src/google/protobuf/io/printer_unittest.cc b/src/google/protobuf/io/printer_unittest.cc
new file mode 100644
index 0000000..258dd98
--- /dev/null
+++ b/src/google/protobuf/io/printer_unittest.cc
@@ -0,0 +1,286 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <vector>
+
+#include <google/protobuf/io/printer.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// Each test repeats over several block sizes in order to test both cases
+// where particular writes cross a buffer boundary and cases where they do
+// not.
+
+TEST(Printer, EmptyPrinter) {
+ char buffer[8192];
+ const int block_size = 100;
+ ArrayOutputStream output(buffer, GOOGLE_ARRAYSIZE(buffer), block_size);
+ Printer printer(&output, '\0');
+ EXPECT_TRUE(!printer.failed());
+}
+
+TEST(Printer, BasicPrinting) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ Printer printer(&output, '\0');
+
+ printer.Print("Hello World!");
+ printer.Print(" This is the same line.\n");
+ printer.Print("But this is a new one.\nAnd this is another one.");
+
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ("Hello World! This is the same line.\n"
+ "But this is a new one.\n"
+ "And this is another one.",
+ buffer);
+ }
+}
+
+TEST(Printer, WriteRaw) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ string string_obj = "From an object\n";
+ Printer printer(&output, '$');
+ printer.WriteRaw("Hello World!", 12);
+ printer.PrintRaw(" This is the same line.\n");
+ printer.PrintRaw("But this is a new one.\nAnd this is another one.");
+ printer.WriteRaw("\n", 1);
+ printer.PrintRaw(string_obj);
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ("Hello World! This is the same line.\n"
+ "But this is a new one.\n"
+ "And this is another one."
+ "\n"
+ "From an object\n",
+ buffer);
+ }
+}
+
+TEST(Printer, VariableSubstitution) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ Printer printer(&output, '$');
+ map<string, string> vars;
+
+ vars["foo"] = "World";
+ vars["bar"] = "$foo$";
+ vars["abcdefg"] = "1234";
+
+ printer.Print(vars, "Hello $foo$!\nbar = $bar$\n");
+ printer.PrintRaw("RawBit\n");
+ printer.Print(vars, "$abcdefg$\nA literal dollar sign: $$");
+
+ vars["foo"] = "blah";
+ printer.Print(vars, "\nNow foo = $foo$.");
+
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ("Hello World!\n"
+ "bar = $foo$\n"
+ "RawBit\n"
+ "1234\n"
+ "A literal dollar sign: $\n"
+ "Now foo = blah.",
+ buffer);
+ }
+}
+
+TEST(Printer, InlineVariableSubstitution) {
+ char buffer[8192];
+
+ ArrayOutputStream output(buffer, sizeof(buffer));
+
+ {
+ Printer printer(&output, '$');
+ printer.Print("Hello $foo$!\n", "foo", "World");
+ printer.PrintRaw("RawBit\n");
+ printer.Print("$foo$ $bar$\n", "foo", "one", "bar", "two");
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ("Hello World!\n"
+ "RawBit\n"
+ "one two\n",
+ buffer);
+}
+
+TEST(Printer, Indenting) {
+ char buffer[8192];
+
+ for (int block_size = 1; block_size < 512; block_size *= 2) {
+ ArrayOutputStream output(buffer, sizeof(buffer), block_size);
+
+ {
+ Printer printer(&output, '$');
+ map<string, string> vars;
+
+ vars["newline"] = "\n";
+
+ printer.Print("This is not indented.\n");
+ printer.Indent();
+ printer.Print("This is indented\nAnd so is this\n");
+ printer.Outdent();
+ printer.Print("But this is not.");
+ printer.Indent();
+ printer.Print(" And this is still the same line.\n"
+ "But this is indented.\n");
+ printer.PrintRaw("RawBit has indent at start\n");
+ printer.PrintRaw("but not after a raw newline\n");
+ printer.Print(vars, "Note that a newline in a variable will break "
+ "indenting, as we see$newline$here.\n");
+ printer.Indent();
+ printer.Print("And this");
+ printer.Outdent();
+ printer.Outdent();
+ printer.Print(" is double-indented\nBack to normal.");
+
+ EXPECT_FALSE(printer.failed());
+ }
+
+ buffer[output.ByteCount()] = '\0';
+
+ EXPECT_STREQ(
+ "This is not indented.\n"
+ " This is indented\n"
+ " And so is this\n"
+ "But this is not. And this is still the same line.\n"
+ " But this is indented.\n"
+ " RawBit has indent at start\n"
+ "but not after a raw newline\n"
+ "Note that a newline in a variable will break indenting, as we see\n"
+ "here.\n"
+ " And this is double-indented\n"
+ "Back to normal.",
+ buffer);
+ }
+}
+
+// Death tests do not work on Windows as of yet.
+#ifdef PROTOBUF_HAS_DEATH_TEST
+TEST(Printer, Death) {
+ char buffer[8192];
+
+ ArrayOutputStream output(buffer, sizeof(buffer));
+ Printer printer(&output, '$');
+
+ EXPECT_DEBUG_DEATH(printer.Print("$nosuchvar$"), "Undefined variable");
+ EXPECT_DEBUG_DEATH(printer.Print("$unclosed"), "Unclosed variable name");
+ EXPECT_DEBUG_DEATH(printer.Outdent(), "without matching Indent");
+}
+#endif // PROTOBUF_HAS_DEATH_TEST
+
+TEST(Printer, WriteFailurePartial) {
+ char buffer[17];
+
+ ArrayOutputStream output(buffer, sizeof(buffer));
+ Printer printer(&output, '$');
+
+ // Print 16 bytes to almost fill the buffer (should not fail).
+ printer.Print("0123456789abcdef");
+ EXPECT_FALSE(printer.failed());
+
+ // Try to print 2 chars. Only one fits.
+ printer.Print("<>");
+ EXPECT_TRUE(printer.failed());
+
+ // Anything else should fail too.
+ printer.Print(" ");
+ EXPECT_TRUE(printer.failed());
+ printer.Print("blah");
+ EXPECT_TRUE(printer.failed());
+
+ // Buffer should contain the first 17 bytes written.
+ EXPECT_EQ("0123456789abcdef<", string(buffer, sizeof(buffer)));
+}
+
+TEST(Printer, WriteFailureExact) {
+ char buffer[16];
+
+ ArrayOutputStream output(buffer, sizeof(buffer));
+ Printer printer(&output, '$');
+
+ // Print 16 bytes to fill the buffer exactly (should not fail).
+ printer.Print("0123456789abcdef");
+ EXPECT_FALSE(printer.failed());
+
+ // Try to print one more byte (should fail).
+ printer.Print(" ");
+ EXPECT_TRUE(printer.failed());
+
+ // Should not crash
+ printer.Print("blah");
+ EXPECT_TRUE(printer.failed());
+
+ // Buffer should contain the first 16 bytes written.
+ EXPECT_EQ("0123456789abcdef", string(buffer, sizeof(buffer)));
+}
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/strtod.cc b/src/google/protobuf/io/strtod.cc
new file mode 100644
index 0000000..a90bb9a
--- /dev/null
+++ b/src/google/protobuf/io/strtod.cc
@@ -0,0 +1,125 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <google/protobuf/io/strtod.h>
+
+#include <cstdio>
+#include <cstring>
+#include <limits>
+#include <string>
+
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// ----------------------------------------------------------------------
+// NoLocaleStrtod()
+// This code will make you cry.
+// ----------------------------------------------------------------------
+
+namespace {
+
+// Returns a string identical to *input except that the character pointed to
+// by radix_pos (which should be '.') is replaced with the locale-specific
+// radix character.
+string LocalizeRadix(const char* input, const char* radix_pos) {
+ // Determine the locale-specific radix character by calling sprintf() to
+ // print the number 1.5, then stripping off the digits. As far as I can
+ // tell, this is the only portable, thread-safe way to get the C library
+ // to divuldge the locale's radix character. No, localeconv() is NOT
+ // thread-safe.
+ char temp[16];
+ int size = sprintf(temp, "%.1f", 1.5);
+ GOOGLE_CHECK_EQ(temp[0], '1');
+ GOOGLE_CHECK_EQ(temp[size-1], '5');
+ GOOGLE_CHECK_LE(size, 6);
+
+ // Now replace the '.' in the input with it.
+ string result;
+ result.reserve(strlen(input) + size - 3);
+ result.append(input, radix_pos);
+ result.append(temp + 1, size - 2);
+ result.append(radix_pos + 1);
+ return result;
+}
+
+} // namespace
+
+double NoLocaleStrtod(const char* text, char** original_endptr) {
+ // We cannot simply set the locale to "C" temporarily with setlocale()
+ // as this is not thread-safe. Instead, we try to parse in the current
+ // locale first. If parsing stops at a '.' character, then this is a
+ // pretty good hint that we're actually in some other locale in which
+ // '.' is not the radix character.
+
+ char* temp_endptr;
+ double result = strtod(text, &temp_endptr);
+ if (original_endptr != NULL) *original_endptr = temp_endptr;
+ if (*temp_endptr != '.') return result;
+
+ // Parsing halted on a '.'. Perhaps we're in a different locale? Let's
+ // try to replace the '.' with a locale-specific radix character and
+ // try again.
+ string localized = LocalizeRadix(text, temp_endptr);
+ const char* localized_cstr = localized.c_str();
+ char* localized_endptr;
+ result = strtod(localized_cstr, &localized_endptr);
+ if ((localized_endptr - localized_cstr) >
+ (temp_endptr - text)) {
+ // This attempt got further, so replacing the decimal must have helped.
+ // Update original_endptr to point at the right location.
+ if (original_endptr != NULL) {
+ // size_diff is non-zero if the localized radix has multiple bytes.
+ int size_diff = localized.size() - strlen(text);
+ // const_cast is necessary to match the strtod() interface.
+ *original_endptr = const_cast<char*>(
+ text + (localized_endptr - localized_cstr - size_diff));
+ }
+ }
+
+ return result;
+}
+
+float SafeDoubleToFloat(double value) {
+ if (value > std::numeric_limits<float>::max()) {
+ return std::numeric_limits<float>::infinity();
+ } else if (value < -std::numeric_limits<float>::max()) {
+ return -std::numeric_limits<float>::infinity();
+ } else {
+ return static_cast<float>(value);
+ }
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/strtod.h b/src/google/protobuf/io/strtod.h
new file mode 100644
index 0000000..f56e41c
--- /dev/null
+++ b/src/google/protobuf/io/strtod.h
@@ -0,0 +1,55 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// A locale-independent version of strtod(), used to parse floating
+// point default values in .proto files, where the decimal separator
+// is always a dot.
+
+#ifndef GOOGLE_PROTOBUF_IO_STRTOD_H__
+#define GOOGLE_PROTOBUF_IO_STRTOD_H__
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// A locale-independent version of the standard strtod(), which always
+// uses a dot as the decimal separator.
+double NoLocaleStrtod(const char* str, char** endptr);
+
+// Casts a double value to a float value. If the value is outside of the
+// representable range of float, it will be converted to positive or negative
+// infinity.
+float SafeDoubleToFloat(double value);
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_STRTOD_H__
diff --git a/src/google/protobuf/io/tokenizer.cc b/src/google/protobuf/io/tokenizer.cc
new file mode 100644
index 0000000..3d57707
--- /dev/null
+++ b/src/google/protobuf/io/tokenizer.cc
@@ -0,0 +1,1137 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Here we have a hand-written lexer. At first you might ask yourself,
+// "Hand-written text processing? Is Kenton crazy?!" Well, first of all,
+// yes I am crazy, but that's beside the point. There are actually reasons
+// why I ended up writing this this way.
+//
+// The traditional approach to lexing is to use lex to generate a lexer for
+// you. Unfortunately, lex's output is ridiculously ugly and difficult to
+// integrate cleanly with C++ code, especially abstract code or code meant
+// as a library. Better parser-generators exist but would add dependencies
+// which most users won't already have, which we'd like to avoid. (GNU flex
+// has a C++ output option, but it's still ridiculously ugly, non-abstract,
+// and not library-friendly.)
+//
+// The next approach that any good software engineer should look at is to
+// use regular expressions. And, indeed, I did. I have code which
+// implements this same class using regular expressions. It's about 200
+// lines shorter. However:
+// - Rather than error messages telling you "This string has an invalid
+// escape sequence at line 5, column 45", you get error messages like
+// "Parse error on line 5". Giving more precise errors requires adding
+// a lot of code that ends up basically as complex as the hand-coded
+// version anyway.
+// - The regular expression to match a string literal looks like this:
+// kString = new RE("(\"([^\"\\\\]|" // non-escaped
+// "\\\\[abfnrtv?\"'\\\\0-7]|" // normal escape
+// "\\\\x[0-9a-fA-F])*\"|" // hex escape
+// "\'([^\'\\\\]|" // Also support single-quotes.
+// "\\\\[abfnrtv?\"'\\\\0-7]|"
+// "\\\\x[0-9a-fA-F])*\')");
+// Verifying the correctness of this line noise is actually harder than
+// verifying the correctness of ConsumeString(), defined below. I'm not
+// even confident that the above is correct, after staring at it for some
+// time.
+// - PCRE is fast, but there's still more overhead involved than the code
+// below.
+// - Sadly, regular expressions are not part of the C standard library, so
+// using them would require depending on some other library. For the
+// open source release, this could be really annoying. Nobody likes
+// downloading one piece of software just to find that they need to
+// download something else to make it work, and in all likelihood
+// people downloading Protocol Buffers will already be doing so just
+// to make something else work. We could include a copy of PCRE with
+// our code, but that obligates us to keep it up-to-date and just seems
+// like a big waste just to save 200 lines of code.
+//
+// On a similar but unrelated note, I'm even scared to use ctype.h.
+// Apparently functions like isalpha() are locale-dependent. So, if we used
+// that, then if this code is being called from some program that doesn't
+// have its locale set to "C", it would behave strangely. We can't just set
+// the locale to "C" ourselves since we might break the calling program that
+// way, particularly if it is multi-threaded. WTF? Someone please let me
+// (Kenton) know if I'm missing something here...
+//
+// I'd love to hear about other alternatives, though, as this code isn't
+// exactly pretty.
+
+#include <google/protobuf/io/tokenizer.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/stringprintf.h>
+#include <google/protobuf/io/strtod.h>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/stl_util.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// As mentioned above, I don't trust ctype.h due to the presence of "locales".
+// So, I have written replacement functions here. Someone please smack me if
+// this is a bad idea or if there is some way around this.
+//
+// These "character classes" are designed to be used in template methods.
+// For instance, Tokenizer::ConsumeZeroOrMore<Whitespace>() will eat
+// whitespace.
+
+// Note: No class is allowed to contain '\0', since this is used to mark end-
+// of-input and is handled specially.
+
+#define CHARACTER_CLASS(NAME, EXPRESSION) \
+ class NAME { \
+ public: \
+ static inline bool InClass(char c) { \
+ return EXPRESSION; \
+ } \
+ }
+
+CHARACTER_CLASS(Whitespace, c == ' ' || c == '\n' || c == '\t' ||
+ c == '\r' || c == '\v' || c == '\f');
+CHARACTER_CLASS(WhitespaceNoNewline, c == ' ' || c == '\t' ||
+ c == '\r' || c == '\v' || c == '\f');
+
+CHARACTER_CLASS(Unprintable, c < ' ' && c > '\0');
+
+CHARACTER_CLASS(Digit, '0' <= c && c <= '9');
+CHARACTER_CLASS(OctalDigit, '0' <= c && c <= '7');
+CHARACTER_CLASS(HexDigit, ('0' <= c && c <= '9') ||
+ ('a' <= c && c <= 'f') ||
+ ('A' <= c && c <= 'F'));
+
+CHARACTER_CLASS(Letter, ('a' <= c && c <= 'z') ||
+ ('A' <= c && c <= 'Z') ||
+ (c == '_'));
+
+CHARACTER_CLASS(Alphanumeric, ('a' <= c && c <= 'z') ||
+ ('A' <= c && c <= 'Z') ||
+ ('0' <= c && c <= '9') ||
+ (c == '_'));
+
+CHARACTER_CLASS(Escape, c == 'a' || c == 'b' || c == 'f' || c == 'n' ||
+ c == 'r' || c == 't' || c == 'v' || c == '\\' ||
+ c == '?' || c == '\'' || c == '\"');
+
+#undef CHARACTER_CLASS
+
+// Given a char, interpret it as a numeric digit and return its value.
+// This supports any number base up to 36.
+inline int DigitValue(char digit) {
+ if ('0' <= digit && digit <= '9') return digit - '0';
+ if ('a' <= digit && digit <= 'z') return digit - 'a' + 10;
+ if ('A' <= digit && digit <= 'Z') return digit - 'A' + 10;
+ return -1;
+}
+
+// Inline because it's only used in one place.
+inline char TranslateEscape(char c) {
+ switch (c) {
+ case 'a': return '\a';
+ case 'b': return '\b';
+ case 'f': return '\f';
+ case 'n': return '\n';
+ case 'r': return '\r';
+ case 't': return '\t';
+ case 'v': return '\v';
+ case '\\': return '\\';
+ case '?': return '\?'; // Trigraphs = :(
+ case '\'': return '\'';
+ case '"': return '\"';
+
+ // We expect escape sequences to have been validated separately.
+ default: return '?';
+ }
+}
+
+} // anonymous namespace
+
+ErrorCollector::~ErrorCollector() {}
+
+// ===================================================================
+
+Tokenizer::Tokenizer(ZeroCopyInputStream* input,
+ ErrorCollector* error_collector)
+ : input_(input),
+ error_collector_(error_collector),
+ buffer_(NULL),
+ buffer_size_(0),
+ buffer_pos_(0),
+ read_error_(false),
+ line_(0),
+ column_(0),
+ record_target_(NULL),
+ record_start_(-1),
+ allow_f_after_float_(false),
+ comment_style_(CPP_COMMENT_STYLE),
+ require_space_after_number_(true),
+ allow_multiline_strings_(false) {
+
+ current_.line = 0;
+ current_.column = 0;
+ current_.end_column = 0;
+ current_.type = TYPE_START;
+
+ Refresh();
+}
+
+Tokenizer::~Tokenizer() {
+ // If we had any buffer left unread, return it to the underlying stream
+ // so that someone else can read it.
+ if (buffer_size_ > buffer_pos_) {
+ input_->BackUp(buffer_size_ - buffer_pos_);
+ }
+}
+
+// -------------------------------------------------------------------
+// Internal helpers.
+
+void Tokenizer::NextChar() {
+ // Update our line and column counters based on the character being
+ // consumed.
+ if (current_char_ == '\n') {
+ ++line_;
+ column_ = 0;
+ } else if (current_char_ == '\t') {
+ column_ += kTabWidth - column_ % kTabWidth;
+ } else {
+ ++column_;
+ }
+
+ // Advance to the next character.
+ ++buffer_pos_;
+ if (buffer_pos_ < buffer_size_) {
+ current_char_ = buffer_[buffer_pos_];
+ } else {
+ Refresh();
+ }
+}
+
+void Tokenizer::Refresh() {
+ if (read_error_) {
+ current_char_ = '\0';
+ return;
+ }
+
+ // If we're in a token, append the rest of the buffer to it.
+ if (record_target_ != NULL && record_start_ < buffer_size_) {
+ record_target_->append(buffer_ + record_start_, buffer_size_ - record_start_);
+ record_start_ = 0;
+ }
+
+ const void* data = NULL;
+ buffer_ = NULL;
+ buffer_pos_ = 0;
+ do {
+ if (!input_->Next(&data, &buffer_size_)) {
+ // end of stream (or read error)
+ buffer_size_ = 0;
+ read_error_ = true;
+ current_char_ = '\0';
+ return;
+ }
+ } while (buffer_size_ == 0);
+
+ buffer_ = static_cast<const char*>(data);
+
+ current_char_ = buffer_[0];
+}
+
+inline void Tokenizer::RecordTo(string* target) {
+ record_target_ = target;
+ record_start_ = buffer_pos_;
+}
+
+inline void Tokenizer::StopRecording() {
+ // Note: The if() is necessary because some STL implementations crash when
+ // you call string::append(NULL, 0), presumably because they are trying to
+ // be helpful by detecting the NULL pointer, even though there's nothing
+ // wrong with reading zero bytes from NULL.
+ if (buffer_pos_ != record_start_) {
+ record_target_->append(buffer_ + record_start_, buffer_pos_ - record_start_);
+ }
+ record_target_ = NULL;
+ record_start_ = -1;
+}
+
+inline void Tokenizer::StartToken() {
+ current_.type = TYPE_START; // Just for the sake of initializing it.
+ current_.text.clear();
+ current_.line = line_;
+ current_.column = column_;
+ RecordTo(¤t_.text);
+}
+
+inline void Tokenizer::EndToken() {
+ StopRecording();
+ current_.end_column = column_;
+}
+
+// -------------------------------------------------------------------
+// Helper methods that consume characters.
+
+template<typename CharacterClass>
+inline bool Tokenizer::LookingAt() {
+ return CharacterClass::InClass(current_char_);
+}
+
+template<typename CharacterClass>
+inline bool Tokenizer::TryConsumeOne() {
+ if (CharacterClass::InClass(current_char_)) {
+ NextChar();
+ return true;
+ } else {
+ return false;
+ }
+}
+
+inline bool Tokenizer::TryConsume(char c) {
+ if (current_char_ == c) {
+ NextChar();
+ return true;
+ } else {
+ return false;
+ }
+}
+
+template<typename CharacterClass>
+inline void Tokenizer::ConsumeZeroOrMore() {
+ while (CharacterClass::InClass(current_char_)) {
+ NextChar();
+ }
+}
+
+template<typename CharacterClass>
+inline void Tokenizer::ConsumeOneOrMore(const char* error) {
+ if (!CharacterClass::InClass(current_char_)) {
+ AddError(error);
+ } else {
+ do {
+ NextChar();
+ } while (CharacterClass::InClass(current_char_));
+ }
+}
+
+// -------------------------------------------------------------------
+// Methods that read whole patterns matching certain kinds of tokens
+// or comments.
+
+void Tokenizer::ConsumeString(char delimiter) {
+ while (true) {
+ switch (current_char_) {
+ case '\0':
+ AddError("Unexpected end of string.");
+ return;
+
+ case '\n': {
+ if (!allow_multiline_strings_) {
+ AddError("String literals cannot cross line boundaries.");
+ return;
+ }
+ NextChar();
+ break;
+ }
+
+ case '\\': {
+ // An escape sequence.
+ NextChar();
+ if (TryConsumeOne<Escape>()) {
+ // Valid escape sequence.
+ } else if (TryConsumeOne<OctalDigit>()) {
+ // Possibly followed by two more octal digits, but these will
+ // just be consumed by the main loop anyway so we don't need
+ // to do so explicitly here.
+ } else if (TryConsume('x')) {
+ if (!TryConsumeOne<HexDigit>()) {
+ AddError("Expected hex digits for escape sequence.");
+ }
+ // Possibly followed by another hex digit, but again we don't care.
+ } else if (TryConsume('u')) {
+ if (!TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>()) {
+ AddError("Expected four hex digits for \\u escape sequence.");
+ }
+ } else if (TryConsume('U')) {
+ // We expect 8 hex digits; but only the range up to 0x10ffff is
+ // legal.
+ if (!TryConsume('0') ||
+ !TryConsume('0') ||
+ !(TryConsume('0') || TryConsume('1')) ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>() ||
+ !TryConsumeOne<HexDigit>()) {
+ AddError("Expected eight hex digits up to 10ffff for \\U escape "
+ "sequence");
+ }
+ } else {
+ AddError("Invalid escape sequence in string literal.");
+ }
+ break;
+ }
+
+ default: {
+ if (current_char_ == delimiter) {
+ NextChar();
+ return;
+ }
+ NextChar();
+ break;
+ }
+ }
+ }
+}
+
+Tokenizer::TokenType Tokenizer::ConsumeNumber(bool started_with_zero,
+ bool started_with_dot) {
+ bool is_float = false;
+
+ if (started_with_zero && (TryConsume('x') || TryConsume('X'))) {
+ // A hex number (started with "0x").
+ ConsumeOneOrMore<HexDigit>("\"0x\" must be followed by hex digits.");
+
+ } else if (started_with_zero && LookingAt<Digit>()) {
+ // An octal number (had a leading zero).
+ ConsumeZeroOrMore<OctalDigit>();
+ if (LookingAt<Digit>()) {
+ AddError("Numbers starting with leading zero must be in octal.");
+ ConsumeZeroOrMore<Digit>();
+ }
+
+ } else {
+ // A decimal number.
+ if (started_with_dot) {
+ is_float = true;
+ ConsumeZeroOrMore<Digit>();
+ } else {
+ ConsumeZeroOrMore<Digit>();
+
+ if (TryConsume('.')) {
+ is_float = true;
+ ConsumeZeroOrMore<Digit>();
+ }
+ }
+
+ if (TryConsume('e') || TryConsume('E')) {
+ is_float = true;
+ TryConsume('-') || TryConsume('+');
+ ConsumeOneOrMore<Digit>("\"e\" must be followed by exponent.");
+ }
+
+ if (allow_f_after_float_ && (TryConsume('f') || TryConsume('F'))) {
+ is_float = true;
+ }
+ }
+
+ if (LookingAt<Letter>() && require_space_after_number_) {
+ AddError("Need space between number and identifier.");
+ } else if (current_char_ == '.') {
+ if (is_float) {
+ AddError(
+ "Already saw decimal point or exponent; can't have another one.");
+ } else {
+ AddError("Hex and octal numbers must be integers.");
+ }
+ }
+
+ return is_float ? TYPE_FLOAT : TYPE_INTEGER;
+}
+
+void Tokenizer::ConsumeLineComment(string* content) {
+ if (content != NULL) RecordTo(content);
+
+ while (current_char_ != '\0' && current_char_ != '\n') {
+ NextChar();
+ }
+ TryConsume('\n');
+
+ if (content != NULL) StopRecording();
+}
+
+void Tokenizer::ConsumeBlockComment(string* content) {
+ int start_line = line_;
+ int start_column = column_ - 2;
+
+ if (content != NULL) RecordTo(content);
+
+ while (true) {
+ while (current_char_ != '\0' &&
+ current_char_ != '*' &&
+ current_char_ != '/' &&
+ current_char_ != '\n') {
+ NextChar();
+ }
+
+ if (TryConsume('\n')) {
+ if (content != NULL) StopRecording();
+
+ // Consume leading whitespace and asterisk;
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ if (TryConsume('*')) {
+ if (TryConsume('/')) {
+ // End of comment.
+ break;
+ }
+ }
+
+ if (content != NULL) RecordTo(content);
+ } else if (TryConsume('*') && TryConsume('/')) {
+ // End of comment.
+ if (content != NULL) {
+ StopRecording();
+ // Strip trailing "*/".
+ content->erase(content->size() - 2);
+ }
+ break;
+ } else if (TryConsume('/') && current_char_ == '*') {
+ // Note: We didn't consume the '*' because if there is a '/' after it
+ // we want to interpret that as the end of the comment.
+ AddError(
+ "\"/*\" inside block comment. Block comments cannot be nested.");
+ } else if (current_char_ == '\0') {
+ AddError("End-of-file inside block comment.");
+ error_collector_->AddError(
+ start_line, start_column, " Comment started here.");
+ if (content != NULL) StopRecording();
+ break;
+ }
+ }
+}
+
+Tokenizer::NextCommentStatus Tokenizer::TryConsumeCommentStart() {
+ if (comment_style_ == CPP_COMMENT_STYLE && TryConsume('/')) {
+ if (TryConsume('/')) {
+ return LINE_COMMENT;
+ } else if (TryConsume('*')) {
+ return BLOCK_COMMENT;
+ } else {
+ // Oops, it was just a slash. Return it.
+ current_.type = TYPE_SYMBOL;
+ current_.text = "/";
+ current_.line = line_;
+ current_.column = column_ - 1;
+ current_.end_column = column_;
+ return SLASH_NOT_COMMENT;
+ }
+ } else if (comment_style_ == SH_COMMENT_STYLE && TryConsume('#')) {
+ return LINE_COMMENT;
+ } else {
+ return NO_COMMENT;
+ }
+}
+
+// -------------------------------------------------------------------
+
+bool Tokenizer::Next() {
+ previous_ = current_;
+
+ while (!read_error_) {
+ ConsumeZeroOrMore<Whitespace>();
+
+ switch (TryConsumeCommentStart()) {
+ case LINE_COMMENT:
+ ConsumeLineComment(NULL);
+ continue;
+ case BLOCK_COMMENT:
+ ConsumeBlockComment(NULL);
+ continue;
+ case SLASH_NOT_COMMENT:
+ return true;
+ case NO_COMMENT:
+ break;
+ }
+
+ // Check for EOF before continuing.
+ if (read_error_) break;
+
+ if (LookingAt<Unprintable>() || current_char_ == '\0') {
+ AddError("Invalid control characters encountered in text.");
+ NextChar();
+ // Skip more unprintable characters, too. But, remember that '\0' is
+ // also what current_char_ is set to after EOF / read error. We have
+ // to be careful not to go into an infinite loop of trying to consume
+ // it, so make sure to check read_error_ explicitly before consuming
+ // '\0'.
+ while (TryConsumeOne<Unprintable>() ||
+ (!read_error_ && TryConsume('\0'))) {
+ // Ignore.
+ }
+
+ } else {
+ // Reading some sort of token.
+ StartToken();
+
+ if (TryConsumeOne<Letter>()) {
+ ConsumeZeroOrMore<Alphanumeric>();
+ current_.type = TYPE_IDENTIFIER;
+ } else if (TryConsume('0')) {
+ current_.type = ConsumeNumber(true, false);
+ } else if (TryConsume('.')) {
+ // This could be the beginning of a floating-point number, or it could
+ // just be a '.' symbol.
+
+ if (TryConsumeOne<Digit>()) {
+ // It's a floating-point number.
+ if (previous_.type == TYPE_IDENTIFIER &&
+ current_.line == previous_.line &&
+ current_.column == previous_.end_column) {
+ // We don't accept syntax like "blah.123".
+ error_collector_->AddError(line_, column_ - 2,
+ "Need space between identifier and decimal point.");
+ }
+ current_.type = ConsumeNumber(false, true);
+ } else {
+ current_.type = TYPE_SYMBOL;
+ }
+ } else if (TryConsumeOne<Digit>()) {
+ current_.type = ConsumeNumber(false, false);
+ } else if (TryConsume('\"')) {
+ ConsumeString('\"');
+ current_.type = TYPE_STRING;
+ } else if (TryConsume('\'')) {
+ ConsumeString('\'');
+ current_.type = TYPE_STRING;
+ } else {
+ // Check if the high order bit is set.
+ if (current_char_ & 0x80) {
+ error_collector_->AddError(line_, column_,
+ StringPrintf("Interpreting non ascii codepoint %d.",
+ static_cast<unsigned char>(current_char_)));
+ }
+ NextChar();
+ current_.type = TYPE_SYMBOL;
+ }
+
+ EndToken();
+ return true;
+ }
+ }
+
+ // EOF
+ current_.type = TYPE_END;
+ current_.text.clear();
+ current_.line = line_;
+ current_.column = column_;
+ current_.end_column = column_;
+ return false;
+}
+
+namespace {
+
+// Helper class for collecting comments and putting them in the right places.
+//
+// This basically just buffers the most recent comment until it can be decided
+// exactly where that comment should be placed. When Flush() is called, the
+// current comment goes into either prev_trailing_comments or detached_comments.
+// When the CommentCollector is destroyed, the last buffered comment goes into
+// next_leading_comments.
+class CommentCollector {
+ public:
+ CommentCollector(string* prev_trailing_comments,
+ vector<string>* detached_comments,
+ string* next_leading_comments)
+ : prev_trailing_comments_(prev_trailing_comments),
+ detached_comments_(detached_comments),
+ next_leading_comments_(next_leading_comments),
+ has_comment_(false),
+ is_line_comment_(false),
+ can_attach_to_prev_(true) {
+ if (prev_trailing_comments != NULL) prev_trailing_comments->clear();
+ if (detached_comments != NULL) detached_comments->clear();
+ if (next_leading_comments != NULL) next_leading_comments->clear();
+ }
+
+ ~CommentCollector() {
+ // Whatever is in the buffer is a leading comment.
+ if (next_leading_comments_ != NULL && has_comment_) {
+ comment_buffer_.swap(*next_leading_comments_);
+ }
+ }
+
+ // About to read a line comment. Get the comment buffer pointer in order to
+ // read into it.
+ string* GetBufferForLineComment() {
+ // We want to combine with previous line comments, but not block comments.
+ if (has_comment_ && !is_line_comment_) {
+ Flush();
+ }
+ has_comment_ = true;
+ is_line_comment_ = true;
+ return &comment_buffer_;
+ }
+
+ // About to read a block comment. Get the comment buffer pointer in order to
+ // read into it.
+ string* GetBufferForBlockComment() {
+ if (has_comment_) {
+ Flush();
+ }
+ has_comment_ = true;
+ is_line_comment_ = false;
+ return &comment_buffer_;
+ }
+
+ void ClearBuffer() {
+ comment_buffer_.clear();
+ has_comment_ = false;
+ }
+
+ // Called once we know that the comment buffer is complete and is *not*
+ // connected to the next token.
+ void Flush() {
+ if (has_comment_) {
+ if (can_attach_to_prev_) {
+ if (prev_trailing_comments_ != NULL) {
+ prev_trailing_comments_->append(comment_buffer_);
+ }
+ can_attach_to_prev_ = false;
+ } else {
+ if (detached_comments_ != NULL) {
+ detached_comments_->push_back(comment_buffer_);
+ }
+ }
+ ClearBuffer();
+ }
+ }
+
+ void DetachFromPrev() {
+ can_attach_to_prev_ = false;
+ }
+
+ private:
+ string* prev_trailing_comments_;
+ vector<string>* detached_comments_;
+ string* next_leading_comments_;
+
+ string comment_buffer_;
+
+ // True if any comments were read into comment_buffer_. This can be true even
+ // if comment_buffer_ is empty, namely if the comment was "/**/".
+ bool has_comment_;
+
+ // Is the comment in the comment buffer a line comment?
+ bool is_line_comment_;
+
+ // Is it still possible that we could be reading a comment attached to the
+ // previous token?
+ bool can_attach_to_prev_;
+};
+
+} // namespace
+
+bool Tokenizer::NextWithComments(string* prev_trailing_comments,
+ vector<string>* detached_comments,
+ string* next_leading_comments) {
+ CommentCollector collector(prev_trailing_comments, detached_comments,
+ next_leading_comments);
+
+ if (current_.type == TYPE_START) {
+ // Ignore unicode byte order mark(BOM) if it appears at the file
+ // beginning. Only UTF-8 BOM (0xEF 0xBB 0xBF) is accepted.
+ if (TryConsume((char)0xEF)) {
+ if (!TryConsume((char)0xBB) || !TryConsume((char)0xBF)) {
+ AddError("Proto file starts with 0xEF but not UTF-8 BOM. "
+ "Only UTF-8 is accepted for proto file.");
+ return false;
+ }
+ }
+ collector.DetachFromPrev();
+ } else {
+ // A comment appearing on the same line must be attached to the previous
+ // declaration.
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ switch (TryConsumeCommentStart()) {
+ case LINE_COMMENT:
+ ConsumeLineComment(collector.GetBufferForLineComment());
+
+ // Don't allow comments on subsequent lines to be attached to a trailing
+ // comment.
+ collector.Flush();
+ break;
+ case BLOCK_COMMENT:
+ ConsumeBlockComment(collector.GetBufferForBlockComment());
+
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ if (!TryConsume('\n')) {
+ // Oops, the next token is on the same line. If we recorded a comment
+ // we really have no idea which token it should be attached to.
+ collector.ClearBuffer();
+ return Next();
+ }
+
+ // Don't allow comments on subsequent lines to be attached to a trailing
+ // comment.
+ collector.Flush();
+ break;
+ case SLASH_NOT_COMMENT:
+ return true;
+ case NO_COMMENT:
+ if (!TryConsume('\n')) {
+ // The next token is on the same line. There are no comments.
+ return Next();
+ }
+ break;
+ }
+ }
+
+ // OK, we are now on the line *after* the previous token.
+ while (true) {
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+
+ switch (TryConsumeCommentStart()) {
+ case LINE_COMMENT:
+ ConsumeLineComment(collector.GetBufferForLineComment());
+ break;
+ case BLOCK_COMMENT:
+ ConsumeBlockComment(collector.GetBufferForBlockComment());
+
+ // Consume the rest of the line so that we don't interpret it as a
+ // blank line the next time around the loop.
+ ConsumeZeroOrMore<WhitespaceNoNewline>();
+ TryConsume('\n');
+ break;
+ case SLASH_NOT_COMMENT:
+ return true;
+ case NO_COMMENT:
+ if (TryConsume('\n')) {
+ // Completely blank line.
+ collector.Flush();
+ collector.DetachFromPrev();
+ } else {
+ bool result = Next();
+ if (!result ||
+ current_.text == "}" ||
+ current_.text == "]" ||
+ current_.text == ")") {
+ // It looks like we're at the end of a scope. In this case it
+ // makes no sense to attach a comment to the following token.
+ collector.Flush();
+ }
+ return result;
+ }
+ break;
+ }
+ }
+}
+
+// -------------------------------------------------------------------
+// Token-parsing helpers. Remember that these don't need to report
+// errors since any errors should already have been reported while
+// tokenizing. Also, these can assume that whatever text they
+// are given is text that the tokenizer actually parsed as a token
+// of the given type.
+
+bool Tokenizer::ParseInteger(const string& text, uint64 max_value,
+ uint64* output) {
+ // Sadly, we can't just use strtoul() since it is only 32-bit and strtoull()
+ // is non-standard. I hate the C standard library. :(
+
+// return strtoull(text.c_str(), NULL, 0);
+
+ const char* ptr = text.c_str();
+ int base = 10;
+ if (ptr[0] == '0') {
+ if (ptr[1] == 'x' || ptr[1] == 'X') {
+ // This is hex.
+ base = 16;
+ ptr += 2;
+ } else {
+ // This is octal.
+ base = 8;
+ }
+ }
+
+ uint64 result = 0;
+ for (; *ptr != '\0'; ptr++) {
+ int digit = DigitValue(*ptr);
+ GOOGLE_LOG_IF(DFATAL, digit < 0 || digit >= base)
+ << " Tokenizer::ParseInteger() passed text that could not have been"
+ " tokenized as an integer: " << CEscape(text);
+ if (digit > max_value || result > (max_value - digit) / base) {
+ // Overflow.
+ return false;
+ }
+ result = result * base + digit;
+ }
+
+ *output = result;
+ return true;
+}
+
+double Tokenizer::ParseFloat(const string& text) {
+ const char* start = text.c_str();
+ char* end;
+ double result = NoLocaleStrtod(start, &end);
+
+ // "1e" is not a valid float, but if the tokenizer reads it, it will
+ // report an error but still return it as a valid token. We need to
+ // accept anything the tokenizer could possibly return, error or not.
+ if (*end == 'e' || *end == 'E') {
+ ++end;
+ if (*end == '-' || *end == '+') ++end;
+ }
+
+ // If the Tokenizer had allow_f_after_float_ enabled, the float may be
+ // suffixed with the letter 'f'.
+ if (*end == 'f' || *end == 'F') {
+ ++end;
+ }
+
+ GOOGLE_LOG_IF(DFATAL, end - start != text.size() || *start == '-')
+ << " Tokenizer::ParseFloat() passed text that could not have been"
+ " tokenized as a float: " << CEscape(text);
+ return result;
+}
+
+// Helper to append a Unicode code point to a string as UTF8, without bringing
+// in any external dependencies.
+static void AppendUTF8(uint32 code_point, string* output) {
+ uint32 tmp = 0;
+ int len = 0;
+ if (code_point <= 0x7f) {
+ tmp = code_point;
+ len = 1;
+ } else if (code_point <= 0x07ff) {
+ tmp = 0x0000c080 |
+ ((code_point & 0x07c0) << 2) |
+ (code_point & 0x003f);
+ len = 2;
+ } else if (code_point <= 0xffff) {
+ tmp = 0x00e08080 |
+ ((code_point & 0xf000) << 4) |
+ ((code_point & 0x0fc0) << 2) |
+ (code_point & 0x003f);
+ len = 3;
+ } else if (code_point <= 0x1fffff) {
+ tmp = 0xf0808080 |
+ ((code_point & 0x1c0000) << 6) |
+ ((code_point & 0x03f000) << 4) |
+ ((code_point & 0x000fc0) << 2) |
+ (code_point & 0x003f);
+ len = 4;
+ } else {
+ // UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
+ // normally only defined up to there as well.
+ StringAppendF(output, "\\U%08x", code_point);
+ return;
+ }
+ tmp = ghtonl(tmp);
+ output->append(reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
+}
+
+// Try to read <len> hex digits from ptr, and stuff the numeric result into
+// *result. Returns true if that many digits were successfully consumed.
+static bool ReadHexDigits(const char* ptr, int len, uint32* result) {
+ *result = 0;
+ if (len == 0) return false;
+ for (const char* end = ptr + len; ptr < end; ++ptr) {
+ if (*ptr == '\0') return false;
+ *result = (*result << 4) + DigitValue(*ptr);
+ }
+ return true;
+}
+
+// Handling UTF-16 surrogate pairs. UTF-16 encodes code points in the range
+// 0x10000...0x10ffff as a pair of numbers, a head surrogate followed by a trail
+// surrogate. These numbers are in a reserved range of Unicode code points, so
+// if we encounter such a pair we know how to parse it and convert it into a
+// single code point.
+static const uint32 kMinHeadSurrogate = 0xd800;
+static const uint32 kMaxHeadSurrogate = 0xdc00;
+static const uint32 kMinTrailSurrogate = 0xdc00;
+static const uint32 kMaxTrailSurrogate = 0xe000;
+
+static inline bool IsHeadSurrogate(uint32 code_point) {
+ return (code_point >= kMinHeadSurrogate) && (code_point < kMaxHeadSurrogate);
+}
+
+static inline bool IsTrailSurrogate(uint32 code_point) {
+ return (code_point >= kMinTrailSurrogate) &&
+ (code_point < kMaxTrailSurrogate);
+}
+
+// Combine a head and trail surrogate into a single Unicode code point.
+static uint32 AssembleUTF16(uint32 head_surrogate, uint32 trail_surrogate) {
+ GOOGLE_DCHECK(IsHeadSurrogate(head_surrogate));
+ GOOGLE_DCHECK(IsTrailSurrogate(trail_surrogate));
+ return 0x10000 + (((head_surrogate - kMinHeadSurrogate) << 10) |
+ (trail_surrogate - kMinTrailSurrogate));
+}
+
+// Convert the escape sequence parameter to a number of expected hex digits.
+static inline int UnicodeLength(char key) {
+ if (key == 'u') return 4;
+ if (key == 'U') return 8;
+ return 0;
+}
+
+// Given a pointer to the 'u' or 'U' starting a Unicode escape sequence, attempt
+// to parse that sequence. On success, returns a pointer to the first char
+// beyond that sequence, and fills in *code_point. On failure, returns ptr
+// itself.
+static const char* FetchUnicodePoint(const char* ptr, uint32* code_point) {
+ const char* p = ptr;
+ // Fetch the code point.
+ const int len = UnicodeLength(*p++);
+ if (!ReadHexDigits(p, len, code_point))
+ return ptr;
+ p += len;
+
+ // Check if the code point we read is a "head surrogate." If so, then we
+ // expect it to be immediately followed by another code point which is a valid
+ // "trail surrogate," and together they form a UTF-16 pair which decodes into
+ // a single Unicode point. Trail surrogates may only use \u, not \U.
+ if (IsHeadSurrogate(*code_point) && *p == '\\' && *(p + 1) == 'u') {
+ uint32 trail_surrogate;
+ if (ReadHexDigits(p + 2, 4, &trail_surrogate) &&
+ IsTrailSurrogate(trail_surrogate)) {
+ *code_point = AssembleUTF16(*code_point, trail_surrogate);
+ p += 6;
+ }
+ // If this failed, then we just emit the head surrogate as a code point.
+ // It's bogus, but so is the string.
+ }
+
+ return p;
+}
+
+// The text string must begin and end with single or double quote
+// characters.
+void Tokenizer::ParseStringAppend(const string& text, string* output) {
+ // Reminder: text[0] is always a quote character. (If text is
+ // empty, it's invalid, so we'll just return).
+ const size_t text_size = text.size();
+ if (text_size == 0) {
+ GOOGLE_LOG(DFATAL)
+ << " Tokenizer::ParseStringAppend() passed text that could not"
+ " have been tokenized as a string: " << CEscape(text);
+ return;
+ }
+
+ // Reserve room for new string. The branch is necessary because if
+ // there is already space available the reserve() call might
+ // downsize the output.
+ const size_t new_len = text_size + output->size();
+ if (new_len > output->capacity()) {
+ output->reserve(new_len);
+ }
+
+ // Loop through the string copying characters to "output" and
+ // interpreting escape sequences. Note that any invalid escape
+ // sequences or other errors were already reported while tokenizing.
+ // In this case we do not need to produce valid results.
+ for (const char* ptr = text.c_str() + 1; *ptr != '\0'; ptr++) {
+ if (*ptr == '\\' && ptr[1] != '\0') {
+ // An escape sequence.
+ ++ptr;
+
+ if (OctalDigit::InClass(*ptr)) {
+ // An octal escape. May one, two, or three digits.
+ int code = DigitValue(*ptr);
+ if (OctalDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = code * 8 + DigitValue(*ptr);
+ }
+ if (OctalDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = code * 8 + DigitValue(*ptr);
+ }
+ output->push_back(static_cast<char>(code));
+
+ } else if (*ptr == 'x') {
+ // A hex escape. May zero, one, or two digits. (The zero case
+ // will have been caught as an error earlier.)
+ int code = 0;
+ if (HexDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = DigitValue(*ptr);
+ }
+ if (HexDigit::InClass(ptr[1])) {
+ ++ptr;
+ code = code * 16 + DigitValue(*ptr);
+ }
+ output->push_back(static_cast<char>(code));
+
+ } else if (*ptr == 'u' || *ptr == 'U') {
+ uint32 unicode;
+ const char* end = FetchUnicodePoint(ptr, &unicode);
+ if (end == ptr) {
+ // Failure: Just dump out what we saw, don't try to parse it.
+ output->push_back(*ptr);
+ } else {
+ AppendUTF8(unicode, output);
+ ptr = end - 1; // Because we're about to ++ptr.
+ }
+ } else {
+ // Some other escape code.
+ output->push_back(TranslateEscape(*ptr));
+ }
+
+ } else if (*ptr == text[0] && ptr[1] == '\0') {
+ // Ignore final quote matching the starting quote.
+ } else {
+ output->push_back(*ptr);
+ }
+ }
+}
+
+template<typename CharacterClass>
+static bool AllInClass(const string& s) {
+ for (int i = 0; i < s.size(); ++i) {
+ if (!CharacterClass::InClass(s[i]))
+ return false;
+ }
+ return true;
+}
+
+bool Tokenizer::IsIdentifier(const string& text) {
+ // Mirrors IDENTIFIER definition in Tokenizer::Next() above.
+ if (text.size() == 0)
+ return false;
+ if (!Letter::InClass(text.at(0)))
+ return false;
+ if (!AllInClass<Alphanumeric>(text.substr(1)))
+ return false;
+ return true;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/tokenizer.h b/src/google/protobuf/io/tokenizer.h
new file mode 100644
index 0000000..49885ed
--- /dev/null
+++ b/src/google/protobuf/io/tokenizer.h
@@ -0,0 +1,403 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Class for parsing tokenized text from a ZeroCopyInputStream.
+
+#ifndef GOOGLE_PROTOBUF_IO_TOKENIZER_H__
+#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__
+
+#include <string>
+#include <vector>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+class ZeroCopyInputStream; // zero_copy_stream.h
+
+// Defined in this file.
+class ErrorCollector;
+class Tokenizer;
+
+// Abstract interface for an object which collects the errors that occur
+// during parsing. A typical implementation might simply print the errors
+// to stdout.
+class LIBPROTOBUF_EXPORT ErrorCollector {
+ public:
+ inline ErrorCollector() {}
+ virtual ~ErrorCollector();
+
+ // Indicates that there was an error in the input at the given line and
+ // column numbers. The numbers are zero-based, so you may want to add
+ // 1 to each before printing them.
+ virtual void AddError(int line, int column, const string& message) = 0;
+
+ // Indicates that there was a warning in the input at the given line and
+ // column numbers. The numbers are zero-based, so you may want to add
+ // 1 to each before printing them.
+ virtual void AddWarning(int /* line */, int /* column */,
+ const string& /* message */) { }
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector);
+};
+
+// This class converts a stream of raw text into a stream of tokens for
+// the protocol definition parser to parse. The tokens recognized are
+// similar to those that make up the C language; see the TokenType enum for
+// precise descriptions. Whitespace and comments are skipped. By default,
+// C- and C++-style comments are recognized, but other styles can be used by
+// calling set_comment_style().
+class LIBPROTOBUF_EXPORT Tokenizer {
+ public:
+ // Construct a Tokenizer that reads and tokenizes text from the given
+ // input stream and writes errors to the given error_collector.
+ // The caller keeps ownership of input and error_collector.
+ Tokenizer(ZeroCopyInputStream* input, ErrorCollector* error_collector);
+ ~Tokenizer();
+
+ enum TokenType {
+ TYPE_START, // Next() has not yet been called.
+ TYPE_END, // End of input reached. "text" is empty.
+
+ TYPE_IDENTIFIER, // A sequence of letters, digits, and underscores, not
+ // starting with a digit. It is an error for a number
+ // to be followed by an identifier with no space in
+ // between.
+ TYPE_INTEGER, // A sequence of digits representing an integer. Normally
+ // the digits are decimal, but a prefix of "0x" indicates
+ // a hex number and a leading zero indicates octal, just
+ // like with C numeric literals. A leading negative sign
+ // is NOT included in the token; it's up to the parser to
+ // interpret the unary minus operator on its own.
+ TYPE_FLOAT, // A floating point literal, with a fractional part and/or
+ // an exponent. Always in decimal. Again, never
+ // negative.
+ TYPE_STRING, // A quoted sequence of escaped characters. Either single
+ // or double quotes can be used, but they must match.
+ // A string literal cannot cross a line break.
+ TYPE_SYMBOL, // Any other printable character, like '!' or '+'.
+ // Symbols are always a single character, so "!+$%" is
+ // four tokens.
+ };
+
+ // Structure representing a token read from the token stream.
+ struct Token {
+ TokenType type;
+ string text; // The exact text of the token as it appeared in
+ // the input. e.g. tokens of TYPE_STRING will still
+ // be escaped and in quotes.
+
+ // "line" and "column" specify the position of the first character of
+ // the token within the input stream. They are zero-based.
+ int line;
+ int column;
+ int end_column;
+ };
+
+ // Get the current token. This is updated when Next() is called. Before
+ // the first call to Next(), current() has type TYPE_START and no contents.
+ const Token& current();
+
+ // Return the previous token -- i.e. what current() returned before the
+ // previous call to Next().
+ const Token& previous();
+
+ // Advance to the next token. Returns false if the end of the input is
+ // reached.
+ bool Next();
+
+ // Like Next(), but also collects comments which appear between the previous
+ // and next tokens.
+ //
+ // Comments which appear to be attached to the previous token are stored
+ // in *prev_tailing_comments. Comments which appear to be attached to the
+ // next token are stored in *next_leading_comments. Comments appearing in
+ // between which do not appear to be attached to either will be added to
+ // detached_comments. Any of these parameters can be NULL to simply discard
+ // the comments.
+ //
+ // A series of line comments appearing on consecutive lines, with no other
+ // tokens appearing on those lines, will be treated as a single comment.
+ //
+ // Only the comment content is returned; comment markers (e.g. //) are
+ // stripped out. For block comments, leading whitespace and an asterisk will
+ // be stripped from the beginning of each line other than the first. Newlines
+ // are included in the output.
+ //
+ // Examples:
+ //
+ // optional int32 foo = 1; // Comment attached to foo.
+ // // Comment attached to bar.
+ // optional int32 bar = 2;
+ //
+ // optional string baz = 3;
+ // // Comment attached to baz.
+ // // Another line attached to baz.
+ //
+ // // Comment attached to qux.
+ // //
+ // // Another line attached to qux.
+ // optional double qux = 4;
+ //
+ // // Detached comment. This is not attached to qux or corge
+ // // because there are blank lines separating it from both.
+ //
+ // optional string corge = 5;
+ // /* Block comment attached
+ // * to corge. Leading asterisks
+ // * will be removed. */
+ // /* Block comment attached to
+ // * grault. */
+ // optional int32 grault = 6;
+ bool NextWithComments(string* prev_trailing_comments,
+ vector<string>* detached_comments,
+ string* next_leading_comments);
+
+ // Parse helpers ---------------------------------------------------
+
+ // Parses a TYPE_FLOAT token. This never fails, so long as the text actually
+ // comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the
+ // result is undefined (possibly an assert failure).
+ static double ParseFloat(const string& text);
+
+ // Parses a TYPE_STRING token. This never fails, so long as the text actually
+ // comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the
+ // result is undefined (possibly an assert failure).
+ static void ParseString(const string& text, string* output);
+
+ // Identical to ParseString, but appends to output.
+ static void ParseStringAppend(const string& text, string* output);
+
+ // Parses a TYPE_INTEGER token. Returns false if the result would be
+ // greater than max_value. Otherwise, returns true and sets *output to the
+ // result. If the text is not from a Token of type TYPE_INTEGER originally
+ // parsed by a Tokenizer, the result is undefined (possibly an assert
+ // failure).
+ static bool ParseInteger(const string& text, uint64 max_value,
+ uint64* output);
+
+ // Options ---------------------------------------------------------
+
+ // Set true to allow floats to be suffixed with the letter 'f'. Tokens
+ // which would otherwise be integers but which have the 'f' suffix will be
+ // forced to be interpreted as floats. For all other purposes, the 'f' is
+ // ignored.
+ void set_allow_f_after_float(bool value) { allow_f_after_float_ = value; }
+
+ // Valid values for set_comment_style().
+ enum CommentStyle {
+ // Line comments begin with "//", block comments are delimited by "/*" and
+ // "*/".
+ CPP_COMMENT_STYLE,
+ // Line comments begin with "#". No way to write block comments.
+ SH_COMMENT_STYLE
+ };
+
+ // Sets the comment style.
+ void set_comment_style(CommentStyle style) { comment_style_ = style; }
+
+ // Whether to require whitespace between a number and a field name.
+ // Default is true. Do not use this; for Google-internal cleanup only.
+ void set_require_space_after_number(bool require) {
+ require_space_after_number_ = require;
+ }
+
+ // Whether to allow string literals to span multiple lines. Default is false.
+ // Do not use this; for Google-internal cleanup only.
+ void set_allow_multiline_strings(bool allow) {
+ allow_multiline_strings_ = allow;
+ }
+
+ // External helper: validate an identifier.
+ static bool IsIdentifier(const string& text);
+
+ // -----------------------------------------------------------------
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer);
+
+ Token current_; // Returned by current().
+ Token previous_; // Returned by previous().
+
+ ZeroCopyInputStream* input_;
+ ErrorCollector* error_collector_;
+
+ char current_char_; // == buffer_[buffer_pos_], updated by NextChar().
+ const char* buffer_; // Current buffer returned from input_.
+ int buffer_size_; // Size of buffer_.
+ int buffer_pos_; // Current position within the buffer.
+ bool read_error_; // Did we previously encounter a read error?
+
+ // Line and column number of current_char_ within the whole input stream.
+ int line_;
+ int column_;
+
+ // String to which text should be appended as we advance through it.
+ // Call RecordTo(&str) to start recording and StopRecording() to stop.
+ // E.g. StartToken() calls RecordTo(¤t_.text). record_start_ is the
+ // position within the current buffer where recording started.
+ string* record_target_;
+ int record_start_;
+
+ // Options.
+ bool allow_f_after_float_;
+ CommentStyle comment_style_;
+ bool require_space_after_number_;
+ bool allow_multiline_strings_;
+
+ // Since we count columns we need to interpret tabs somehow. We'll take
+ // the standard 8-character definition for lack of any way to do better.
+ static const int kTabWidth = 8;
+
+ // -----------------------------------------------------------------
+ // Helper methods.
+
+ // Consume this character and advance to the next one.
+ void NextChar();
+
+ // Read a new buffer from the input.
+ void Refresh();
+
+ inline void RecordTo(string* target);
+ inline void StopRecording();
+
+ // Called when the current character is the first character of a new
+ // token (not including whitespace or comments).
+ inline void StartToken();
+ // Called when the current character is the first character after the
+ // end of the last token. After this returns, current_.text will
+ // contain all text consumed since StartToken() was called.
+ inline void EndToken();
+
+ // Convenience method to add an error at the current line and column.
+ void AddError(const string& message) {
+ error_collector_->AddError(line_, column_, message);
+ }
+
+ // -----------------------------------------------------------------
+ // The following four methods are used to consume tokens of specific
+ // types. They are actually used to consume all characters *after*
+ // the first, since the calling function consumes the first character
+ // in order to decide what kind of token is being read.
+
+ // Read and consume a string, ending when the given delimiter is
+ // consumed.
+ void ConsumeString(char delimiter);
+
+ // Read and consume a number, returning TYPE_FLOAT or TYPE_INTEGER
+ // depending on what was read. This needs to know if the first
+ // character was a zero in order to correctly recognize hex and octal
+ // numbers.
+ // It also needs to know if the first characted was a . to parse floating
+ // point correctly.
+ TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot);
+
+ // Consume the rest of a line.
+ void ConsumeLineComment(string* content);
+ // Consume until "*/".
+ void ConsumeBlockComment(string* content);
+
+ enum NextCommentStatus {
+ // Started a line comment.
+ LINE_COMMENT,
+
+ // Started a block comment.
+ BLOCK_COMMENT,
+
+ // Consumed a slash, then realized it wasn't a comment. current_ has
+ // been filled in with a slash token. The caller should return it.
+ SLASH_NOT_COMMENT,
+
+ // We do not appear to be starting a comment here.
+ NO_COMMENT
+ };
+
+ // If we're at the start of a new comment, consume it and return what kind
+ // of comment it is.
+ NextCommentStatus TryConsumeCommentStart();
+
+ // -----------------------------------------------------------------
+ // These helper methods make the parsing code more readable. The
+ // "character classes" referred to are defined at the top of the .cc file.
+ // Basically it is a C++ class with one method:
+ // static bool InClass(char c);
+ // The method returns true if c is a member of this "class", like "Letter"
+ // or "Digit".
+
+ // Returns true if the current character is of the given character
+ // class, but does not consume anything.
+ template<typename CharacterClass>
+ inline bool LookingAt();
+
+ // If the current character is in the given class, consume it and return
+ // true. Otherwise return false.
+ // e.g. TryConsumeOne<Letter>()
+ template<typename CharacterClass>
+ inline bool TryConsumeOne();
+
+ // Like above, but try to consume the specific character indicated.
+ inline bool TryConsume(char c);
+
+ // Consume zero or more of the given character class.
+ template<typename CharacterClass>
+ inline void ConsumeZeroOrMore();
+
+ // Consume one or more of the given character class or log the given
+ // error message.
+ // e.g. ConsumeOneOrMore<Digit>("Expected digits.");
+ template<typename CharacterClass>
+ inline void ConsumeOneOrMore(const char* error);
+};
+
+// inline methods ====================================================
+inline const Tokenizer::Token& Tokenizer::current() {
+ return current_;
+}
+
+inline const Tokenizer::Token& Tokenizer::previous() {
+ return previous_;
+}
+
+inline void Tokenizer::ParseString(const string& text, string* output) {
+ output->clear();
+ ParseStringAppend(text, output);
+}
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_TOKENIZER_H__
diff --git a/src/google/protobuf/io/tokenizer_unittest.cc b/src/google/protobuf/io/tokenizer_unittest.cc
new file mode 100644
index 0000000..20d50a2
--- /dev/null
+++ b/src/google/protobuf/io/tokenizer_unittest.cc
@@ -0,0 +1,1002 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <limits.h>
+#include <math.h>
+
+#include <vector>
+
+#include <google/protobuf/io/tokenizer.h>
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/strutil.h>
+#include <google/protobuf/stubs/substitute.h>
+#include <google/protobuf/testing/googletest.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+// ===================================================================
+// Data-Driven Test Infrastructure
+
+// TODO(kenton): This is copied from coded_stream_unittest. This is
+// temporary until these fetaures are integrated into gTest itself.
+
+// TEST_1D and TEST_2D are macros I'd eventually like to see added to
+// gTest. These macros can be used to declare tests which should be
+// run multiple times, once for each item in some input array. TEST_1D
+// tests all cases in a single input array. TEST_2D tests all
+// combinations of cases from two arrays. The arrays must be statically
+// defined such that the GOOGLE_ARRAYSIZE() macro works on them. Example:
+//
+// int kCases[] = {1, 2, 3, 4}
+// TEST_1D(MyFixture, MyTest, kCases) {
+// EXPECT_GT(kCases_case, 0);
+// }
+//
+// This test iterates through the numbers 1, 2, 3, and 4 and tests that
+// they are all grater than zero. In case of failure, the exact case
+// which failed will be printed. The case type must be printable using
+// ostream::operator<<.
+
+#define TEST_1D(FIXTURE, NAME, CASES) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType> \
+ void DoSingleCase(const CaseType& CASES##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES); i++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES " case #" << i << ": " << CASES[i]); \
+ DoSingleCase(CASES[i]); \
+ } \
+ } \
+ \
+ template <typename CaseType> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType& CASES##_case)
+
+#define TEST_2D(FIXTURE, NAME, CASES1, CASES2) \
+ class FIXTURE##_##NAME##_DD : public FIXTURE { \
+ protected: \
+ template <typename CaseType1, typename CaseType2> \
+ void DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case); \
+ }; \
+ \
+ TEST_F(FIXTURE##_##NAME##_DD, NAME) { \
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(CASES1); i++) { \
+ for (int j = 0; j < GOOGLE_ARRAYSIZE(CASES2); j++) { \
+ SCOPED_TRACE(testing::Message() \
+ << #CASES1 " case #" << i << ": " << CASES1[i] << ", " \
+ << #CASES2 " case #" << j << ": " << CASES2[j]); \
+ DoSingleCase(CASES1[i], CASES2[j]); \
+ } \
+ } \
+ } \
+ \
+ template <typename CaseType1, typename CaseType2> \
+ void FIXTURE##_##NAME##_DD::DoSingleCase(const CaseType1& CASES1##_case, \
+ const CaseType2& CASES2##_case)
+
+// -------------------------------------------------------------------
+
+// An input stream that is basically like an ArrayInputStream but sometimes
+// returns empty buffers, just to throw us off.
+class TestInputStream : public ZeroCopyInputStream {
+ public:
+ TestInputStream(const void* data, int size, int block_size)
+ : array_stream_(data, size, block_size), counter_(0) {}
+ ~TestInputStream() {}
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size) {
+ // We'll return empty buffers starting with the first buffer, and every
+ // 3 and 5 buffers after that.
+ if (counter_ % 3 == 0 || counter_ % 5 == 0) {
+ *data = NULL;
+ *size = 0;
+ ++counter_;
+ return true;
+ } else {
+ ++counter_;
+ return array_stream_.Next(data, size);
+ }
+ }
+
+ void BackUp(int count) { return array_stream_.BackUp(count); }
+ bool Skip(int count) { return array_stream_.Skip(count); }
+ int64 ByteCount() const { return array_stream_.ByteCount(); }
+
+ private:
+ ArrayInputStream array_stream_;
+ int counter_;
+};
+
+// -------------------------------------------------------------------
+
+// An error collector which simply concatenates all its errors into a big
+// block of text which can be checked.
+class TestErrorCollector : public ErrorCollector {
+ public:
+ TestErrorCollector() {}
+ ~TestErrorCollector() {}
+
+ string text_;
+
+ // implements ErrorCollector ---------------------------------------
+ void AddError(int line, int column, const string& message) {
+ strings::SubstituteAndAppend(&text_, "$0:$1: $2\n",
+ line, column, message);
+ }
+};
+
+// -------------------------------------------------------------------
+
+// We test each operation over a variety of block sizes to insure that
+// we test cases where reads cross buffer boundaries as well as cases
+// where they don't. This is sort of a brute-force approach to this,
+// but it's easy to write and easy to understand.
+const int kBlockSizes[] = {1, 2, 3, 5, 7, 13, 32, 1024};
+
+class TokenizerTest : public testing::Test {
+ protected:
+ // For easy testing.
+ uint64 ParseInteger(const string& text) {
+ uint64 result;
+ EXPECT_TRUE(Tokenizer::ParseInteger(text, kuint64max, &result));
+ return result;
+ }
+};
+
+// ===================================================================
+
+// These tests causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+// In each test case, the entire input text should parse as a single token
+// of the given type.
+struct SimpleTokenCase {
+ string input;
+ Tokenizer::TokenType type;
+};
+
+inline ostream& operator<<(ostream& out,
+ const SimpleTokenCase& test_case) {
+ return out << CEscape(test_case.input);
+}
+
+SimpleTokenCase kSimpleTokenCases[] = {
+ // Test identifiers.
+ { "hello", Tokenizer::TYPE_IDENTIFIER },
+
+ // Test integers.
+ { "123", Tokenizer::TYPE_INTEGER },
+ { "0xab6", Tokenizer::TYPE_INTEGER },
+ { "0XAB6", Tokenizer::TYPE_INTEGER },
+ { "0X1234567", Tokenizer::TYPE_INTEGER },
+ { "0x89abcdef", Tokenizer::TYPE_INTEGER },
+ { "0x89ABCDEF", Tokenizer::TYPE_INTEGER },
+ { "01234567", Tokenizer::TYPE_INTEGER },
+
+ // Test floats.
+ { "123.45", Tokenizer::TYPE_FLOAT },
+ { "1.", Tokenizer::TYPE_FLOAT },
+ { "1e3", Tokenizer::TYPE_FLOAT },
+ { "1E3", Tokenizer::TYPE_FLOAT },
+ { "1e-3", Tokenizer::TYPE_FLOAT },
+ { "1e+3", Tokenizer::TYPE_FLOAT },
+ { "1.e3", Tokenizer::TYPE_FLOAT },
+ { "1.2e3", Tokenizer::TYPE_FLOAT },
+ { ".1", Tokenizer::TYPE_FLOAT },
+ { ".1e3", Tokenizer::TYPE_FLOAT },
+ { ".1e-3", Tokenizer::TYPE_FLOAT },
+ { ".1e+3", Tokenizer::TYPE_FLOAT },
+
+ // Test strings.
+ { "'hello'", Tokenizer::TYPE_STRING },
+ { "\"foo\"", Tokenizer::TYPE_STRING },
+ { "'a\"b'", Tokenizer::TYPE_STRING },
+ { "\"a'b\"", Tokenizer::TYPE_STRING },
+ { "'a\\'b'", Tokenizer::TYPE_STRING },
+ { "\"a\\\"b\"", Tokenizer::TYPE_STRING },
+ { "'\\xf'", Tokenizer::TYPE_STRING },
+ { "'\\0'", Tokenizer::TYPE_STRING },
+
+ // Test symbols.
+ { "+", Tokenizer::TYPE_SYMBOL },
+ { ".", Tokenizer::TYPE_SYMBOL },
+};
+
+TEST_2D(TokenizerTest, SimpleTokens, kSimpleTokenCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kSimpleTokenCases_case.input.data(),
+ kSimpleTokenCases_case.input.size(),
+ kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Before Next() is called, the initial token should always be TYPE_START.
+ EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);
+ EXPECT_EQ("", tokenizer.current().text);
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(0, tokenizer.current().column);
+ EXPECT_EQ(0, tokenizer.current().end_column);
+
+ // Parse the token.
+ ASSERT_TRUE(tokenizer.Next());
+
+ // Check that it has the right type.
+ EXPECT_EQ(kSimpleTokenCases_case.type, tokenizer.current().type);
+ // Check that it contains the complete input text.
+ EXPECT_EQ(kSimpleTokenCases_case.input, tokenizer.current().text);
+ // Check that it is located at the beginning of the input
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(0, tokenizer.current().column);
+ EXPECT_EQ(kSimpleTokenCases_case.input.size(),
+ tokenizer.current().end_column);
+
+ // There should be no more input.
+ EXPECT_FALSE(tokenizer.Next());
+
+ // After Next() returns false, the token should have type TYPE_END.
+ EXPECT_EQ(Tokenizer::TYPE_END, tokenizer.current().type);
+ EXPECT_EQ("", tokenizer.current().text);
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(kSimpleTokenCases_case.input.size(), tokenizer.current().column);
+ EXPECT_EQ(kSimpleTokenCases_case.input.size(),
+ tokenizer.current().end_column);
+
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+TEST_1D(TokenizerTest, FloatSuffix, kBlockSizes) {
+ // Test the "allow_f_after_float" option.
+
+ // Set up the tokenizer.
+ const char* text = "1f 2.5f 6e3f 7F";
+ TestInputStream input(text, strlen(text), kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+ tokenizer.set_allow_f_after_float(true);
+
+ // Advance through tokens and check that they are parsed as expected.
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "1f");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "2.5f");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "6e3f");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+ ASSERT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, "7F");
+ EXPECT_EQ(tokenizer.current().type, Tokenizer::TYPE_FLOAT);
+
+ // There should be no more input.
+ EXPECT_FALSE(tokenizer.Next());
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+#endif
+
+// -------------------------------------------------------------------
+
+// In each case, the input is parsed to produce a list of tokens. The
+// last token in "output" must have type TYPE_END.
+struct MultiTokenCase {
+ string input;
+ Tokenizer::Token output[10]; // The compiler wants a constant array
+ // size for initialization to work. There
+ // is no reason this can't be increased if
+ // needed.
+};
+
+inline ostream& operator<<(ostream& out,
+ const MultiTokenCase& test_case) {
+ return out << CEscape(test_case.input);
+}
+
+MultiTokenCase kMultiTokenCases[] = {
+ // Test empty input.
+ { "", {
+ { Tokenizer::TYPE_END , "" , 0, 0 },
+ }},
+
+ // Test all token types at the same time.
+ { "foo 1 1.2 + 'bar'", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo" , 0, 0, 3 },
+ { Tokenizer::TYPE_INTEGER , "1" , 0, 4, 5 },
+ { Tokenizer::TYPE_FLOAT , "1.2" , 0, 6, 9 },
+ { Tokenizer::TYPE_SYMBOL , "+" , 0, 10, 11 },
+ { Tokenizer::TYPE_STRING , "'bar'", 0, 12, 17 },
+ { Tokenizer::TYPE_END , "" , 0, 17, 17 },
+ }},
+
+ // Test that consecutive symbols are parsed as separate tokens.
+ { "!@+%", {
+ { Tokenizer::TYPE_SYMBOL , "!" , 0, 0, 1 },
+ { Tokenizer::TYPE_SYMBOL , "@" , 0, 1, 2 },
+ { Tokenizer::TYPE_SYMBOL , "+" , 0, 2, 3 },
+ { Tokenizer::TYPE_SYMBOL , "%" , 0, 3, 4 },
+ { Tokenizer::TYPE_END , "" , 0, 4, 4 },
+ }},
+
+ // Test that newlines affect line numbers correctly.
+ { "foo bar\nrab oof", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 4, 7 },
+ { Tokenizer::TYPE_IDENTIFIER, "rab", 1, 0, 3 },
+ { Tokenizer::TYPE_IDENTIFIER, "oof", 1, 4, 7 },
+ { Tokenizer::TYPE_END , "" , 1, 7, 7 },
+ }},
+
+ // Test that tabs affect column numbers correctly.
+ { "foo\tbar \tbaz", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 8, 11 },
+ { Tokenizer::TYPE_IDENTIFIER, "baz", 0, 16, 19 },
+ { Tokenizer::TYPE_END , "" , 0, 19, 19 },
+ }},
+
+ // Test that tabs in string literals affect column numbers correctly.
+ { "\"foo\tbar\" baz", {
+ { Tokenizer::TYPE_STRING , "\"foo\tbar\"", 0, 0, 12 },
+ { Tokenizer::TYPE_IDENTIFIER, "baz" , 0, 13, 16 },
+ { Tokenizer::TYPE_END , "" , 0, 16, 16 },
+ }},
+
+ // Test that line comments are ignored.
+ { "foo // This is a comment\n"
+ "bar // This is another comment", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 0, 3 },
+ { Tokenizer::TYPE_END , "" , 1, 30, 30 },
+ }},
+
+ // Test that block comments are ignored.
+ { "foo /* This is a block comment */ bar", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 34, 37 },
+ { Tokenizer::TYPE_END , "" , 0, 37, 37 },
+ }},
+
+ // Test that sh-style comments are not ignored by default.
+ { "foo # bar\n"
+ "baz", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
+ { Tokenizer::TYPE_SYMBOL , "#" , 0, 4, 5 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 0, 6, 9 },
+ { Tokenizer::TYPE_IDENTIFIER, "baz", 1, 0, 3 },
+ { Tokenizer::TYPE_END , "" , 1, 3, 3 },
+ }},
+
+ // Test all whitespace chars
+ { "foo\n\t\r\v\fbar", {
+ { Tokenizer::TYPE_IDENTIFIER, "foo", 0, 0, 3 },
+ { Tokenizer::TYPE_IDENTIFIER, "bar", 1, 11, 14 },
+ { Tokenizer::TYPE_END , "" , 1, 14, 14 },
+ }},
+};
+
+TEST_2D(TokenizerTest, MultipleTokens, kMultiTokenCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kMultiTokenCases_case.input.data(),
+ kMultiTokenCases_case.input.size(),
+ kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Before Next() is called, the initial token should always be TYPE_START.
+ EXPECT_EQ(Tokenizer::TYPE_START, tokenizer.current().type);
+ EXPECT_EQ("", tokenizer.current().text);
+ EXPECT_EQ(0, tokenizer.current().line);
+ EXPECT_EQ(0, tokenizer.current().column);
+ EXPECT_EQ(0, tokenizer.current().end_column);
+
+ // Loop through all expected tokens.
+ int i = 0;
+ Tokenizer::Token token;
+ do {
+ token = kMultiTokenCases_case.output[i++];
+
+ SCOPED_TRACE(testing::Message() << "Token #" << i << ": " << token.text);
+
+ Tokenizer::Token previous = tokenizer.current();
+
+ // Next() should only return false when it hits the end token.
+ if (token.type != Tokenizer::TYPE_END) {
+ ASSERT_TRUE(tokenizer.Next());
+ } else {
+ ASSERT_FALSE(tokenizer.Next());
+ }
+
+ // Check that the previous token is set correctly.
+ EXPECT_EQ(previous.type, tokenizer.previous().type);
+ EXPECT_EQ(previous.text, tokenizer.previous().text);
+ EXPECT_EQ(previous.line, tokenizer.previous().line);
+ EXPECT_EQ(previous.column, tokenizer.previous().column);
+ EXPECT_EQ(previous.end_column, tokenizer.previous().end_column);
+
+ // Check that the token matches the expected one.
+ EXPECT_EQ(token.type, tokenizer.current().type);
+ EXPECT_EQ(token.text, tokenizer.current().text);
+ EXPECT_EQ(token.line, tokenizer.current().line);
+ EXPECT_EQ(token.column, tokenizer.current().column);
+ EXPECT_EQ(token.end_column, tokenizer.current().end_column);
+
+ } while (token.type != Tokenizer::TYPE_END);
+
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+// This test causes gcc 3.3.5 (and earlier?) to give the cryptic error:
+// "sorry, unimplemented: `method_call_expr' not supported by dump_expr"
+#if !defined(__GNUC__) || __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 3)
+
+TEST_1D(TokenizerTest, ShCommentStyle, kBlockSizes) {
+ // Test the "comment_style" option.
+
+ const char* text = "foo # bar\n"
+ "baz // qux\n"
+ "corge /* grault */\n"
+ "garply";
+ const char* const kTokens[] = {"foo", // "# bar" is ignored
+ "baz", "/", "/", "qux",
+ "corge", "/", "*", "grault", "*", "/",
+ "garply"};
+
+ // Set up the tokenizer.
+ TestInputStream input(text, strlen(text), kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+ tokenizer.set_comment_style(Tokenizer::SH_COMMENT_STYLE);
+
+ // Advance through tokens and check that they are parsed as expected.
+ for (int i = 0; i < GOOGLE_ARRAYSIZE(kTokens); i++) {
+ EXPECT_TRUE(tokenizer.Next());
+ EXPECT_EQ(tokenizer.current().text, kTokens[i]);
+ }
+
+ // There should be no more input.
+ EXPECT_FALSE(tokenizer.Next());
+ // There should be no errors.
+ EXPECT_TRUE(error_collector.text_.empty());
+}
+
+#endif
+
+// -------------------------------------------------------------------
+
+// In each case, the input is expected to have two tokens named "prev" and
+// "next" with comments in between.
+struct DocCommentCase {
+ string input;
+
+ const char* prev_trailing_comments;
+ const char* detached_comments[10];
+ const char* next_leading_comments;
+};
+
+inline ostream& operator<<(ostream& out,
+ const DocCommentCase& test_case) {
+ return out << CEscape(test_case.input);
+}
+
+DocCommentCase kDocCommentCases[] = {
+ {
+ "prev next",
+
+ "",
+ {},
+ ""
+ },
+
+ {
+ "prev /* ignored */ next",
+
+ "",
+ {},
+ ""
+ },
+
+ {
+ "prev // trailing comment\n"
+ "next",
+
+ " trailing comment\n",
+ {},
+ ""
+ },
+
+ {
+ "prev\n"
+ "// leading comment\n"
+ "// line 2\n"
+ "next",
+
+ "",
+ {},
+ " leading comment\n"
+ " line 2\n"
+ },
+
+ {
+ "prev\n"
+ "// trailing comment\n"
+ "// line 2\n"
+ "\n"
+ "next",
+
+ " trailing comment\n"
+ " line 2\n",
+ {},
+ ""
+ },
+
+ {
+ "prev // trailing comment\n"
+ "// leading comment\n"
+ "// line 2\n"
+ "next",
+
+ " trailing comment\n",
+ {},
+ " leading comment\n"
+ " line 2\n"
+ },
+
+ {
+ "prev /* trailing block comment */\n"
+ "/* leading block comment\n"
+ " * line 2\n"
+ " * line 3 */"
+ "next",
+
+ " trailing block comment ",
+ {},
+ " leading block comment\n"
+ " line 2\n"
+ " line 3 "
+ },
+
+ {
+ "prev\n"
+ "/* trailing block comment\n"
+ " * line 2\n"
+ " * line 3\n"
+ " */\n"
+ "/* leading block comment\n"
+ " * line 2\n"
+ " * line 3 */"
+ "next",
+
+ " trailing block comment\n"
+ " line 2\n"
+ " line 3\n",
+ {},
+ " leading block comment\n"
+ " line 2\n"
+ " line 3 "
+ },
+
+ {
+ "prev\n"
+ "// trailing comment\n"
+ "\n"
+ "// detached comment\n"
+ "// line 2\n"
+ "\n"
+ "// second detached comment\n"
+ "/* third detached comment\n"
+ " * line 2 */\n"
+ "// leading comment\n"
+ "next",
+
+ " trailing comment\n",
+ {
+ " detached comment\n"
+ " line 2\n",
+ " second detached comment\n",
+ " third detached comment\n"
+ " line 2 "
+ },
+ " leading comment\n"
+ },
+
+ {
+ "prev /**/\n"
+ "\n"
+ "// detached comment\n"
+ "\n"
+ "// leading comment\n"
+ "next",
+
+ "",
+ {
+ " detached comment\n"
+ },
+ " leading comment\n"
+ },
+
+ {
+ "prev /**/\n"
+ "// leading comment\n"
+ "next",
+
+ "",
+ {},
+ " leading comment\n"
+ },
+ };
+
+TEST_2D(TokenizerTest, DocComments, kDocCommentCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kDocCommentCases_case.input.data(),
+ kDocCommentCases_case.input.size(),
+ kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Set up a second tokenizer where we'll pass all NULLs to NextWithComments().
+ TestInputStream input2(kDocCommentCases_case.input.data(),
+ kDocCommentCases_case.input.size(),
+ kBlockSizes_case);
+ Tokenizer tokenizer2(&input2, &error_collector);
+
+ tokenizer.Next();
+ tokenizer2.Next();
+
+ EXPECT_EQ("prev", tokenizer.current().text);
+ EXPECT_EQ("prev", tokenizer2.current().text);
+
+ string prev_trailing_comments;
+ vector<string> detached_comments;
+ string next_leading_comments;
+ tokenizer.NextWithComments(&prev_trailing_comments, &detached_comments,
+ &next_leading_comments);
+ tokenizer2.NextWithComments(NULL, NULL, NULL);
+ EXPECT_EQ("next", tokenizer.current().text);
+ EXPECT_EQ("next", tokenizer2.current().text);
+
+ EXPECT_EQ(kDocCommentCases_case.prev_trailing_comments,
+ prev_trailing_comments);
+
+ for (int i = 0; i < detached_comments.size(); i++) {
+ ASSERT_LT(i, GOOGLE_ARRAYSIZE(kDocCommentCases));
+ ASSERT_TRUE(kDocCommentCases_case.detached_comments[i] != NULL);
+ EXPECT_EQ(kDocCommentCases_case.detached_comments[i],
+ detached_comments[i]);
+ }
+
+ // Verify that we matched all the detached comments.
+ EXPECT_EQ(NULL,
+ kDocCommentCases_case.detached_comments[detached_comments.size()]);
+
+ EXPECT_EQ(kDocCommentCases_case.next_leading_comments,
+ next_leading_comments);
+}
+
+// -------------------------------------------------------------------
+
+// Test parse helpers. It's not really worth setting up a full data-driven
+// test here.
+TEST_F(TokenizerTest, ParseInteger) {
+ EXPECT_EQ(0, ParseInteger("0"));
+ EXPECT_EQ(123, ParseInteger("123"));
+ EXPECT_EQ(0xabcdef12u, ParseInteger("0xabcdef12"));
+ EXPECT_EQ(0xabcdef12u, ParseInteger("0xABCDEF12"));
+ EXPECT_EQ(kuint64max, ParseInteger("0xFFFFFFFFFFFFFFFF"));
+ EXPECT_EQ(01234567, ParseInteger("01234567"));
+ EXPECT_EQ(0X123, ParseInteger("0X123"));
+
+ // Test invalid integers that may still be tokenized as integers.
+ EXPECT_EQ(0, ParseInteger("0x"));
+
+ uint64 i;
+#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
+ // Test invalid integers that will never be tokenized as integers.
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("zxy", kuint64max, &i),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("1.2", kuint64max, &i),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("08", kuint64max, &i),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("0xg", kuint64max, &i),
+ "passed text that could not have been tokenized as an integer");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseInteger("-1", kuint64max, &i),
+ "passed text that could not have been tokenized as an integer");
+#endif // PROTOBUF_HAS_DEATH_TEST
+
+ // Test overflows.
+ EXPECT_TRUE (Tokenizer::ParseInteger("0", 0, &i));
+ EXPECT_FALSE(Tokenizer::ParseInteger("1", 0, &i));
+ EXPECT_TRUE (Tokenizer::ParseInteger("1", 1, &i));
+ EXPECT_TRUE (Tokenizer::ParseInteger("12345", 12345, &i));
+ EXPECT_FALSE(Tokenizer::ParseInteger("12346", 12345, &i));
+ EXPECT_TRUE (Tokenizer::ParseInteger("0xFFFFFFFFFFFFFFFF" , kuint64max, &i));
+ EXPECT_FALSE(Tokenizer::ParseInteger("0x10000000000000000", kuint64max, &i));
+}
+
+TEST_F(TokenizerTest, ParseFloat) {
+ EXPECT_DOUBLE_EQ(1 , Tokenizer::ParseFloat("1."));
+ EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1e3"));
+ EXPECT_DOUBLE_EQ(1e3 , Tokenizer::ParseFloat("1E3"));
+ EXPECT_DOUBLE_EQ(1.5e3, Tokenizer::ParseFloat("1.5e3"));
+ EXPECT_DOUBLE_EQ(.1 , Tokenizer::ParseFloat(".1"));
+ EXPECT_DOUBLE_EQ(.25 , Tokenizer::ParseFloat(".25"));
+ EXPECT_DOUBLE_EQ(.1e3 , Tokenizer::ParseFloat(".1e3"));
+ EXPECT_DOUBLE_EQ(.25e3, Tokenizer::ParseFloat(".25e3"));
+ EXPECT_DOUBLE_EQ(.1e+3, Tokenizer::ParseFloat(".1e+3"));
+ EXPECT_DOUBLE_EQ(.1e-3, Tokenizer::ParseFloat(".1e-3"));
+ EXPECT_DOUBLE_EQ(5 , Tokenizer::ParseFloat("5"));
+ EXPECT_DOUBLE_EQ(6e-12, Tokenizer::ParseFloat("6e-12"));
+ EXPECT_DOUBLE_EQ(1.2 , Tokenizer::ParseFloat("1.2"));
+ EXPECT_DOUBLE_EQ(1.e2 , Tokenizer::ParseFloat("1.e2"));
+
+ // Test invalid integers that may still be tokenized as integers.
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1e-"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.e"));
+
+ // Test 'f' suffix.
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1f"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1.0f"));
+ EXPECT_DOUBLE_EQ(1, Tokenizer::ParseFloat("1F"));
+
+ // These should parse successfully even though they are out of range.
+ // Overflows become infinity and underflows become zero.
+ EXPECT_EQ( 0.0, Tokenizer::ParseFloat("1e-9999999999999999999999999999"));
+ EXPECT_EQ(HUGE_VAL, Tokenizer::ParseFloat("1e+9999999999999999999999999999"));
+
+#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
+ // Test invalid integers that will never be tokenized as integers.
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("zxy"),
+ "passed text that could not have been tokenized as a float");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("1-e0"),
+ "passed text that could not have been tokenized as a float");
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseFloat("-1.0"),
+ "passed text that could not have been tokenized as a float");
+#endif // PROTOBUF_HAS_DEATH_TEST
+}
+
+TEST_F(TokenizerTest, ParseString) {
+ string output;
+ Tokenizer::ParseString("'hello'", &output);
+ EXPECT_EQ("hello", output);
+ Tokenizer::ParseString("\"blah\\nblah2\"", &output);
+ EXPECT_EQ("blah\nblah2", output);
+ Tokenizer::ParseString("'\\1x\\1\\123\\739\\52\\334n\\3'", &output);
+ EXPECT_EQ("\1x\1\123\739\52\334n\3", output);
+ Tokenizer::ParseString("'\\x20\\x4'", &output);
+ EXPECT_EQ("\x20\x4", output);
+
+ // Test invalid strings that may still be tokenized as strings.
+ Tokenizer::ParseString("\"\\a\\l\\v\\t", &output); // \l is invalid
+ EXPECT_EQ("\a?\v\t", output);
+ Tokenizer::ParseString("'", &output);
+ EXPECT_EQ("", output);
+ Tokenizer::ParseString("'\\", &output);
+ EXPECT_EQ("\\", output);
+
+ // Experiment with Unicode escapes. Here are one-, two- and three-byte Unicode
+ // characters.
+ Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\U00024b62XX'", &output);
+ EXPECT_EQ("$¢€ð¤¢XX", output);
+ // Same thing encoded using UTF16.
+ Tokenizer::ParseString("'\\u0024\\u00a2\\u20ac\\ud852\\udf62XX'", &output);
+ EXPECT_EQ("$¢€ð¤¢XX", output);
+ // Here's some broken UTF16; there's a head surrogate with no tail surrogate.
+ // We just output this as if it were UTF8; it's not a defined code point, but
+ // it has a defined encoding.
+ Tokenizer::ParseString("'\\ud852XX'", &output);
+ EXPECT_EQ("\xed\xa1\x92XX", output);
+ // Malformed escape: Demons may fly out of the nose.
+ Tokenizer::ParseString("\\u0", &output);
+ EXPECT_EQ("u0", output);
+
+ // Test invalid strings that will never be tokenized as strings.
+#ifdef PROTOBUF_HAS_DEATH_TEST // death tests do not work on Windows yet
+ EXPECT_DEBUG_DEATH(Tokenizer::ParseString("", &output),
+ "passed text that could not have been tokenized as a string");
+#endif // PROTOBUF_HAS_DEATH_TEST
+}
+
+TEST_F(TokenizerTest, ParseStringAppend) {
+ // Check that ParseString and ParseStringAppend differ.
+ string output("stuff+");
+ Tokenizer::ParseStringAppend("'hello'", &output);
+ EXPECT_EQ("stuff+hello", output);
+ Tokenizer::ParseString("'hello'", &output);
+ EXPECT_EQ("hello", output);
+}
+
+// -------------------------------------------------------------------
+
+// Each case parses some input text, ignoring the tokens produced, and
+// checks that the error output matches what is expected.
+struct ErrorCase {
+ string input;
+ bool recoverable; // True if the tokenizer should be able to recover and
+ // parse more tokens after seeing this error. Cases
+ // for which this is true must end with "foo" as
+ // the last token, which the test will check for.
+ const char* errors;
+};
+
+inline ostream& operator<<(ostream& out,
+ const ErrorCase& test_case) {
+ return out << CEscape(test_case.input);
+}
+
+ErrorCase kErrorCases[] = {
+ // String errors.
+ { "'\\l' foo", true,
+ "0:2: Invalid escape sequence in string literal.\n" },
+ { "'\\X' foo", true,
+ "0:2: Invalid escape sequence in string literal.\n" },
+ { "'\\x' foo", true,
+ "0:3: Expected hex digits for escape sequence.\n" },
+ { "'foo", false,
+ "0:4: Unexpected end of string.\n" },
+ { "'bar\nfoo", true,
+ "0:4: String literals cannot cross line boundaries.\n" },
+ { "'\\u01' foo", true,
+ "0:5: Expected four hex digits for \\u escape sequence.\n" },
+ { "'\\u01' foo", true,
+ "0:5: Expected four hex digits for \\u escape sequence.\n" },
+ { "'\\uXYZ' foo", true,
+ "0:3: Expected four hex digits for \\u escape sequence.\n" },
+
+ // Integer errors.
+ { "123foo", true,
+ "0:3: Need space between number and identifier.\n" },
+
+ // Hex/octal errors.
+ { "0x foo", true,
+ "0:2: \"0x\" must be followed by hex digits.\n" },
+ { "0541823 foo", true,
+ "0:4: Numbers starting with leading zero must be in octal.\n" },
+ { "0x123z foo", true,
+ "0:5: Need space between number and identifier.\n" },
+ { "0x123.4 foo", true,
+ "0:5: Hex and octal numbers must be integers.\n" },
+ { "0123.4 foo", true,
+ "0:4: Hex and octal numbers must be integers.\n" },
+
+ // Float errors.
+ { "1e foo", true,
+ "0:2: \"e\" must be followed by exponent.\n" },
+ { "1e- foo", true,
+ "0:3: \"e\" must be followed by exponent.\n" },
+ { "1.2.3 foo", true,
+ "0:3: Already saw decimal point or exponent; can't have another one.\n" },
+ { "1e2.3 foo", true,
+ "0:3: Already saw decimal point or exponent; can't have another one.\n" },
+ { "a.1 foo", true,
+ "0:1: Need space between identifier and decimal point.\n" },
+ // allow_f_after_float not enabled, so this should be an error.
+ { "1.0f foo", true,
+ "0:3: Need space between number and identifier.\n" },
+
+ // Block comment errors.
+ { "/*", false,
+ "0:2: End-of-file inside block comment.\n"
+ "0:0: Comment started here.\n"},
+ { "/*/*/ foo", true,
+ "0:3: \"/*\" inside block comment. Block comments cannot be nested.\n"},
+
+ // Control characters. Multiple consecutive control characters should only
+ // produce one error.
+ { "\b foo", true,
+ "0:0: Invalid control characters encountered in text.\n" },
+ { "\b\b foo", true,
+ "0:0: Invalid control characters encountered in text.\n" },
+
+ // Check that control characters at end of input don't result in an
+ // infinite loop.
+ { "\b", false,
+ "0:0: Invalid control characters encountered in text.\n" },
+
+ // Check recovery from '\0'. We have to explicitly specify the length of
+ // these strings because otherwise the string constructor will just call
+ // strlen() which will see the first '\0' and think that is the end of the
+ // string.
+ { string("\0foo", 4), true,
+ "0:0: Invalid control characters encountered in text.\n" },
+ { string("\0\0foo", 5), true,
+ "0:0: Invalid control characters encountered in text.\n" },
+
+ // Check error from high order bits set
+ { "\300foo", true,
+ "0:0: Interpreting non ascii codepoint 192.\n" },
+};
+
+TEST_2D(TokenizerTest, Errors, kErrorCases, kBlockSizes) {
+ // Set up the tokenizer.
+ TestInputStream input(kErrorCases_case.input.data(),
+ kErrorCases_case.input.size(),
+ kBlockSizes_case);
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ // Ignore all input, except remember if the last token was "foo".
+ bool last_was_foo = false;
+ while (tokenizer.Next()) {
+ last_was_foo = tokenizer.current().text == "foo";
+ }
+
+ // Check that the errors match what was expected.
+ EXPECT_EQ(kErrorCases_case.errors, error_collector.text_);
+
+ // If the error was recoverable, make sure we saw "foo" after it.
+ if (kErrorCases_case.recoverable) {
+ EXPECT_TRUE(last_was_foo);
+ }
+}
+
+// -------------------------------------------------------------------
+
+TEST_1D(TokenizerTest, BackUpOnDestruction, kBlockSizes) {
+ string text = "foo bar";
+ TestInputStream input(text.data(), text.size(), kBlockSizes_case);
+
+ // Create a tokenizer, read one token, then destroy it.
+ {
+ TestErrorCollector error_collector;
+ Tokenizer tokenizer(&input, &error_collector);
+
+ tokenizer.Next();
+ }
+
+ // Only "foo" should have been read.
+ EXPECT_EQ(strlen("foo"), input.ByteCount());
+}
+
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.cc b/src/google/protobuf/io/zero_copy_stream.cc
new file mode 100644
index 0000000..186de00
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream.cc
@@ -0,0 +1,58 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <google/protobuf/io/zero_copy_stream.h>
+
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+ZeroCopyInputStream::~ZeroCopyInputStream() {}
+ZeroCopyOutputStream::~ZeroCopyOutputStream() {}
+
+
+bool ZeroCopyOutputStream::WriteAliasedRaw(const void* /* data */,
+ int /* size */) {
+ GOOGLE_LOG(FATAL) << "This ZeroCopyOutputStream doesn't support aliasing. "
+ "Reaching here usually means a ZeroCopyOutputStream "
+ "implementation bug.";
+ return false;
+}
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream.h b/src/google/protobuf/io/zero_copy_stream.h
new file mode 100644
index 0000000..52650fc
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream.h
@@ -0,0 +1,248 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains the ZeroCopyInputStream and ZeroCopyOutputStream
+// interfaces, which represent abstract I/O streams to and from which
+// protocol buffers can be read and written. For a few simple
+// implementations of these interfaces, see zero_copy_stream_impl.h.
+//
+// These interfaces are different from classic I/O streams in that they
+// try to minimize the amount of data copying that needs to be done.
+// To accomplish this, responsibility for allocating buffers is moved to
+// the stream object, rather than being the responsibility of the caller.
+// So, the stream can return a buffer which actually points directly into
+// the final data structure where the bytes are to be stored, and the caller
+// can interact directly with that buffer, eliminating an intermediate copy
+// operation.
+//
+// As an example, consider the common case in which you are reading bytes
+// from an array that is already in memory (or perhaps an mmap()ed file).
+// With classic I/O streams, you would do something like:
+// char buffer[BUFFER_SIZE];
+// input->Read(buffer, BUFFER_SIZE);
+// DoSomething(buffer, BUFFER_SIZE);
+// Then, the stream basically just calls memcpy() to copy the data from
+// the array into your buffer. With a ZeroCopyInputStream, you would do
+// this instead:
+// const void* buffer;
+// int size;
+// input->Next(&buffer, &size);
+// DoSomething(buffer, size);
+// Here, no copy is performed. The input stream returns a pointer directly
+// into the backing array, and the caller ends up reading directly from it.
+//
+// If you want to be able to read the old-fashion way, you can create
+// a CodedInputStream or CodedOutputStream wrapping these objects and use
+// their ReadRaw()/WriteRaw() methods. These will, of course, add a copy
+// step, but Coded*Stream will handle buffering so at least it will be
+// reasonably efficient.
+//
+// ZeroCopyInputStream example:
+// // Read in a file and print its contents to stdout.
+// int fd = open("myfile", O_RDONLY);
+// ZeroCopyInputStream* input = new FileInputStream(fd);
+//
+// const void* buffer;
+// int size;
+// while (input->Next(&buffer, &size)) {
+// cout.write(buffer, size);
+// }
+//
+// delete input;
+// close(fd);
+//
+// ZeroCopyOutputStream example:
+// // Copy the contents of "infile" to "outfile", using plain read() for
+// // "infile" but a ZeroCopyOutputStream for "outfile".
+// int infd = open("infile", O_RDONLY);
+// int outfd = open("outfile", O_WRONLY);
+// ZeroCopyOutputStream* output = new FileOutputStream(outfd);
+//
+// void* buffer;
+// int size;
+// while (output->Next(&buffer, &size)) {
+// int bytes = read(infd, buffer, size);
+// if (bytes < size) {
+// // Reached EOF.
+// output->BackUp(size - bytes);
+// break;
+// }
+// }
+//
+// delete output;
+// close(infd);
+// close(outfd);
+
+#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__
+#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__
+
+#include <string>
+#include <google/protobuf/stubs/common.h>
+
+namespace google {
+
+namespace protobuf {
+namespace io {
+
+// Defined in this file.
+class ZeroCopyInputStream;
+class ZeroCopyOutputStream;
+
+// Abstract interface similar to an input stream but designed to minimize
+// copying.
+class LIBPROTOBUF_EXPORT ZeroCopyInputStream {
+ public:
+ inline ZeroCopyInputStream() {}
+ virtual ~ZeroCopyInputStream();
+
+ // Obtains a chunk of data from the stream.
+ //
+ // Preconditions:
+ // * "size" and "data" are not NULL.
+ //
+ // Postconditions:
+ // * If the returned value is false, there is no more data to return or
+ // an error occurred. All errors are permanent.
+ // * Otherwise, "size" points to the actual number of bytes read and "data"
+ // points to a pointer to a buffer containing these bytes.
+ // * Ownership of this buffer remains with the stream, and the buffer
+ // remains valid only until some other method of the stream is called
+ // or the stream is destroyed.
+ // * It is legal for the returned buffer to have zero size, as long
+ // as repeatedly calling Next() eventually yields a buffer with non-zero
+ // size.
+ virtual bool Next(const void** data, int* size) = 0;
+
+ // Backs up a number of bytes, so that the next call to Next() returns
+ // data again that was already returned by the last call to Next(). This
+ // is useful when writing procedures that are only supposed to read up
+ // to a certain point in the input, then return. If Next() returns a
+ // buffer that goes beyond what you wanted to read, you can use BackUp()
+ // to return to the point where you intended to finish.
+ //
+ // Preconditions:
+ // * The last method called must have been Next().
+ // * count must be less than or equal to the size of the last buffer
+ // returned by Next().
+ //
+ // Postconditions:
+ // * The last "count" bytes of the last buffer returned by Next() will be
+ // pushed back into the stream. Subsequent calls to Next() will return
+ // the same data again before producing new data.
+ virtual void BackUp(int count) = 0;
+
+ // Skips a number of bytes. Returns false if the end of the stream is
+ // reached or some input error occurred. In the end-of-stream case, the
+ // stream is advanced to the end of the stream (so ByteCount() will return
+ // the total size of the stream).
+ virtual bool Skip(int count) = 0;
+
+ // Returns the total number of bytes read since this object was created.
+ virtual int64 ByteCount() const = 0;
+
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyInputStream);
+};
+
+// Abstract interface similar to an output stream but designed to minimize
+// copying.
+class LIBPROTOBUF_EXPORT ZeroCopyOutputStream {
+ public:
+ inline ZeroCopyOutputStream() {}
+ virtual ~ZeroCopyOutputStream();
+
+ // Obtains a buffer into which data can be written. Any data written
+ // into this buffer will eventually (maybe instantly, maybe later on)
+ // be written to the output.
+ //
+ // Preconditions:
+ // * "size" and "data" are not NULL.
+ //
+ // Postconditions:
+ // * If the returned value is false, an error occurred. All errors are
+ // permanent.
+ // * Otherwise, "size" points to the actual number of bytes in the buffer
+ // and "data" points to the buffer.
+ // * Ownership of this buffer remains with the stream, and the buffer
+ // remains valid only until some other method of the stream is called
+ // or the stream is destroyed.
+ // * Any data which the caller stores in this buffer will eventually be
+ // written to the output (unless BackUp() is called).
+ // * It is legal for the returned buffer to have zero size, as long
+ // as repeatedly calling Next() eventually yields a buffer with non-zero
+ // size.
+ virtual bool Next(void** data, int* size) = 0;
+
+ // Backs up a number of bytes, so that the end of the last buffer returned
+ // by Next() is not actually written. This is needed when you finish
+ // writing all the data you want to write, but the last buffer was bigger
+ // than you needed. You don't want to write a bunch of garbage after the
+ // end of your data, so you use BackUp() to back up.
+ //
+ // Preconditions:
+ // * The last method called must have been Next().
+ // * count must be less than or equal to the size of the last buffer
+ // returned by Next().
+ // * The caller must not have written anything to the last "count" bytes
+ // of that buffer.
+ //
+ // Postconditions:
+ // * The last "count" bytes of the last buffer returned by Next() will be
+ // ignored.
+ virtual void BackUp(int count) = 0;
+
+ // Returns the total number of bytes written since this object was created.
+ virtual int64 ByteCount() const = 0;
+
+ // Write a given chunk of data to the output. Some output streams may
+ // implement this in a way that avoids copying. Check AllowsAliasing() before
+ // calling WriteAliasedRaw(). It will GOOGLE_CHECK fail if WriteAliasedRaw() is
+ // called on a stream that does not allow aliasing.
+ //
+ // NOTE: It is caller's responsibility to ensure that the chunk of memory
+ // remains live until all of the data has been consumed from the stream.
+ virtual bool WriteAliasedRaw(const void* data, int size);
+ virtual bool AllowsAliasing() const { return false; }
+
+
+ private:
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ZeroCopyOutputStream);
+};
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_H__
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.cc b/src/google/protobuf/io/zero_copy_stream_impl.cc
new file mode 100644
index 0000000..7ec2b5d
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_impl.cc
@@ -0,0 +1,474 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#ifdef _MSC_VER
+#include <io.h>
+#else
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+#include <errno.h>
+#include <iostream>
+#include <algorithm>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/stl_util.h>
+
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+#ifdef _WIN32
+// Win32 lseek is broken: If invoked on a non-seekable file descriptor, its
+// return value is undefined. We re-define it to always produce an error.
+#define lseek(fd, offset, origin) ((off_t)-1)
+#endif
+
+namespace {
+
+// EINTR sucks.
+int close_no_eintr(int fd) {
+ int result;
+ do {
+ result = close(fd);
+ } while (result < 0 && errno == EINTR);
+ return result;
+}
+
+} // namespace
+
+
+// ===================================================================
+
+FileInputStream::FileInputStream(int file_descriptor, int block_size)
+ : copying_input_(file_descriptor),
+ impl_(©ing_input_, block_size) {
+}
+
+FileInputStream::~FileInputStream() {}
+
+bool FileInputStream::Close() {
+ return copying_input_.Close();
+}
+
+bool FileInputStream::Next(const void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void FileInputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+bool FileInputStream::Skip(int count) {
+ return impl_.Skip(count);
+}
+
+int64 FileInputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+FileInputStream::CopyingFileInputStream::CopyingFileInputStream(
+ int file_descriptor)
+ : file_(file_descriptor),
+ close_on_delete_(false),
+ is_closed_(false),
+ errno_(0),
+ previous_seek_failed_(false) {
+}
+
+FileInputStream::CopyingFileInputStream::~CopyingFileInputStream() {
+ if (close_on_delete_) {
+ if (!Close()) {
+ GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_);
+ }
+ }
+}
+
+bool FileInputStream::CopyingFileInputStream::Close() {
+ GOOGLE_CHECK(!is_closed_);
+
+ is_closed_ = true;
+ if (close_no_eintr(file_) != 0) {
+ // The docs on close() do not specify whether a file descriptor is still
+ // open after close() fails with EIO. However, the glibc source code
+ // seems to indicate that it is not.
+ errno_ = errno;
+ return false;
+ }
+
+ return true;
+}
+
+int FileInputStream::CopyingFileInputStream::Read(void* buffer, int size) {
+ GOOGLE_CHECK(!is_closed_);
+
+ int result;
+ do {
+ result = read(file_, buffer, size);
+ } while (result < 0 && errno == EINTR);
+
+ if (result < 0) {
+ // Read error (not EOF).
+ errno_ = errno;
+ }
+
+ return result;
+}
+
+int FileInputStream::CopyingFileInputStream::Skip(int count) {
+ GOOGLE_CHECK(!is_closed_);
+
+ if (!previous_seek_failed_ &&
+ lseek(file_, count, SEEK_CUR) != (off_t)-1) {
+ // Seek succeeded.
+ return count;
+ } else {
+ // Failed to seek.
+
+ // Note to self: Don't seek again. This file descriptor doesn't
+ // support it.
+ previous_seek_failed_ = true;
+
+ // Use the default implementation.
+ return CopyingInputStream::Skip(count);
+ }
+}
+
+// ===================================================================
+
+FileOutputStream::FileOutputStream(int file_descriptor, int block_size)
+ : copying_output_(file_descriptor),
+ impl_(©ing_output_, block_size) {
+}
+
+FileOutputStream::~FileOutputStream() {
+ impl_.Flush();
+}
+
+bool FileOutputStream::Close() {
+ bool flush_succeeded = impl_.Flush();
+ return copying_output_.Close() && flush_succeeded;
+}
+
+bool FileOutputStream::Flush() {
+ return impl_.Flush();
+}
+
+bool FileOutputStream::Next(void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void FileOutputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+int64 FileOutputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+FileOutputStream::CopyingFileOutputStream::CopyingFileOutputStream(
+ int file_descriptor)
+ : file_(file_descriptor),
+ close_on_delete_(false),
+ is_closed_(false),
+ errno_(0) {
+}
+
+FileOutputStream::CopyingFileOutputStream::~CopyingFileOutputStream() {
+ if (close_on_delete_) {
+ if (!Close()) {
+ GOOGLE_LOG(ERROR) << "close() failed: " << strerror(errno_);
+ }
+ }
+}
+
+bool FileOutputStream::CopyingFileOutputStream::Close() {
+ GOOGLE_CHECK(!is_closed_);
+
+ is_closed_ = true;
+ if (close_no_eintr(file_) != 0) {
+ // The docs on close() do not specify whether a file descriptor is still
+ // open after close() fails with EIO. However, the glibc source code
+ // seems to indicate that it is not.
+ errno_ = errno;
+ return false;
+ }
+
+ return true;
+}
+
+bool FileOutputStream::CopyingFileOutputStream::Write(
+ const void* buffer, int size) {
+ GOOGLE_CHECK(!is_closed_);
+ int total_written = 0;
+
+ const uint8* buffer_base = reinterpret_cast<const uint8*>(buffer);
+
+ while (total_written < size) {
+ int bytes;
+ do {
+ bytes = write(file_, buffer_base + total_written, size - total_written);
+ } while (bytes < 0 && errno == EINTR);
+
+ if (bytes <= 0) {
+ // Write error.
+
+ // FIXME(kenton): According to the man page, if write() returns zero,
+ // there was no error; write() simply did not write anything. It's
+ // unclear under what circumstances this might happen, but presumably
+ // errno won't be set in this case. I am confused as to how such an
+ // event should be handled. For now I'm treating it as an error, since
+ // retrying seems like it could lead to an infinite loop. I suspect
+ // this never actually happens anyway.
+
+ if (bytes < 0) {
+ errno_ = errno;
+ }
+ return false;
+ }
+ total_written += bytes;
+ }
+
+ return true;
+}
+
+// ===================================================================
+
+IstreamInputStream::IstreamInputStream(istream* input, int block_size)
+ : copying_input_(input),
+ impl_(©ing_input_, block_size) {
+}
+
+IstreamInputStream::~IstreamInputStream() {}
+
+bool IstreamInputStream::Next(const void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void IstreamInputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+bool IstreamInputStream::Skip(int count) {
+ return impl_.Skip(count);
+}
+
+int64 IstreamInputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+IstreamInputStream::CopyingIstreamInputStream::CopyingIstreamInputStream(
+ istream* input)
+ : input_(input) {
+}
+
+IstreamInputStream::CopyingIstreamInputStream::~CopyingIstreamInputStream() {}
+
+int IstreamInputStream::CopyingIstreamInputStream::Read(
+ void* buffer, int size) {
+ input_->read(reinterpret_cast<char*>(buffer), size);
+ int result = input_->gcount();
+ if (result == 0 && input_->fail() && !input_->eof()) {
+ return -1;
+ }
+ return result;
+}
+
+// ===================================================================
+
+OstreamOutputStream::OstreamOutputStream(ostream* output, int block_size)
+ : copying_output_(output),
+ impl_(©ing_output_, block_size) {
+}
+
+OstreamOutputStream::~OstreamOutputStream() {
+ impl_.Flush();
+}
+
+bool OstreamOutputStream::Next(void** data, int* size) {
+ return impl_.Next(data, size);
+}
+
+void OstreamOutputStream::BackUp(int count) {
+ impl_.BackUp(count);
+}
+
+int64 OstreamOutputStream::ByteCount() const {
+ return impl_.ByteCount();
+}
+
+OstreamOutputStream::CopyingOstreamOutputStream::CopyingOstreamOutputStream(
+ ostream* output)
+ : output_(output) {
+}
+
+OstreamOutputStream::CopyingOstreamOutputStream::~CopyingOstreamOutputStream() {
+}
+
+bool OstreamOutputStream::CopyingOstreamOutputStream::Write(
+ const void* buffer, int size) {
+ output_->write(reinterpret_cast<const char*>(buffer), size);
+ return output_->good();
+}
+
+// ===================================================================
+
+ConcatenatingInputStream::ConcatenatingInputStream(
+ ZeroCopyInputStream* const streams[], int count)
+ : streams_(streams), stream_count_(count), bytes_retired_(0) {
+}
+
+ConcatenatingInputStream::~ConcatenatingInputStream() {
+}
+
+bool ConcatenatingInputStream::Next(const void** data, int* size) {
+ while (stream_count_ > 0) {
+ if (streams_[0]->Next(data, size)) return true;
+
+ // That stream is done. Advance to the next one.
+ bytes_retired_ += streams_[0]->ByteCount();
+ ++streams_;
+ --stream_count_;
+ }
+
+ // No more streams.
+ return false;
+}
+
+void ConcatenatingInputStream::BackUp(int count) {
+ if (stream_count_ > 0) {
+ streams_[0]->BackUp(count);
+ } else {
+ GOOGLE_LOG(DFATAL) << "Can't BackUp() after failed Next().";
+ }
+}
+
+bool ConcatenatingInputStream::Skip(int count) {
+ while (stream_count_ > 0) {
+ // Assume that ByteCount() can be used to find out how much we actually
+ // skipped when Skip() fails.
+ int64 target_byte_count = streams_[0]->ByteCount() + count;
+ if (streams_[0]->Skip(count)) return true;
+
+ // Hit the end of the stream. Figure out how many more bytes we still have
+ // to skip.
+ int64 final_byte_count = streams_[0]->ByteCount();
+ GOOGLE_DCHECK_LT(final_byte_count, target_byte_count);
+ count = target_byte_count - final_byte_count;
+
+ // That stream is done. Advance to the next one.
+ bytes_retired_ += final_byte_count;
+ ++streams_;
+ --stream_count_;
+ }
+
+ return false;
+}
+
+int64 ConcatenatingInputStream::ByteCount() const {
+ if (stream_count_ == 0) {
+ return bytes_retired_;
+ } else {
+ return bytes_retired_ + streams_[0]->ByteCount();
+ }
+}
+
+
+// ===================================================================
+
+LimitingInputStream::LimitingInputStream(ZeroCopyInputStream* input,
+ int64 limit)
+ : input_(input), limit_(limit) {
+ prior_bytes_read_ = input_->ByteCount();
+}
+
+LimitingInputStream::~LimitingInputStream() {
+ // If we overshot the limit, back up.
+ if (limit_ < 0) input_->BackUp(-limit_);
+}
+
+bool LimitingInputStream::Next(const void** data, int* size) {
+ if (limit_ <= 0) return false;
+ if (!input_->Next(data, size)) return false;
+
+ limit_ -= *size;
+ if (limit_ < 0) {
+ // We overshot the limit. Reduce *size to hide the rest of the buffer.
+ *size += limit_;
+ }
+ return true;
+}
+
+void LimitingInputStream::BackUp(int count) {
+ if (limit_ < 0) {
+ input_->BackUp(count - limit_);
+ limit_ = count;
+ } else {
+ input_->BackUp(count);
+ limit_ += count;
+ }
+}
+
+bool LimitingInputStream::Skip(int count) {
+ if (count > limit_) {
+ if (limit_ < 0) return false;
+ input_->Skip(limit_);
+ limit_ = 0;
+ return false;
+ } else {
+ if (!input_->Skip(count)) return false;
+ limit_ -= count;
+ return true;
+ }
+}
+
+int64 LimitingInputStream::ByteCount() const {
+ if (limit_ < 0) {
+ return input_->ByteCount() + limit_ - prior_bytes_read_;
+ } else {
+ return input_->ByteCount() - prior_bytes_read_;
+ }
+}
+
+
+// ===================================================================
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream_impl.h b/src/google/protobuf/io/zero_copy_stream_impl.h
new file mode 100644
index 0000000..0746fa6
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_impl.h
@@ -0,0 +1,358 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains common implementations of the interfaces defined in
+// zero_copy_stream.h which are only included in the full (non-lite)
+// protobuf library. These implementations include Unix file descriptors
+// and C++ iostreams. See also: zero_copy_stream_impl_lite.h
+
+#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__
+#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__
+
+#include <string>
+#include <iosfwd>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+#include <google/protobuf/stubs/common.h>
+
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+
+// ===================================================================
+
+// A ZeroCopyInputStream which reads from a file descriptor.
+//
+// FileInputStream is preferred over using an ifstream with IstreamInputStream.
+// The latter will introduce an extra layer of buffering, harming performance.
+// Also, it's conceivable that FileInputStream could someday be enhanced
+// to use zero-copy file descriptors on OSs which support them.
+class LIBPROTOBUF_EXPORT FileInputStream : public ZeroCopyInputStream {
+ public:
+ // Creates a stream that reads from the given Unix file descriptor.
+ // If a block_size is given, it specifies the number of bytes that
+ // should be read and returned with each call to Next(). Otherwise,
+ // a reasonable default is used.
+ explicit FileInputStream(int file_descriptor, int block_size = -1);
+ ~FileInputStream();
+
+ // Flushes any buffers and closes the underlying file. Returns false if
+ // an error occurs during the process; use GetErrno() to examine the error.
+ // Even if an error occurs, the file descriptor is closed when this returns.
+ bool Close();
+
+ // By default, the file descriptor is not closed when the stream is
+ // destroyed. Call SetCloseOnDelete(true) to change that. WARNING:
+ // This leaves no way for the caller to detect if close() fails. If
+ // detecting close() errors is important to you, you should arrange
+ // to close the descriptor yourself.
+ void SetCloseOnDelete(bool value) { copying_input_.SetCloseOnDelete(value); }
+
+ // If an I/O error has occurred on this file descriptor, this is the
+ // errno from that error. Otherwise, this is zero. Once an error
+ // occurs, the stream is broken and all subsequent operations will
+ // fail.
+ int GetErrno() { return copying_input_.GetErrno(); }
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingFileInputStream : public CopyingInputStream {
+ public:
+ CopyingFileInputStream(int file_descriptor);
+ ~CopyingFileInputStream();
+
+ bool Close();
+ void SetCloseOnDelete(bool value) { close_on_delete_ = value; }
+ int GetErrno() { return errno_; }
+
+ // implements CopyingInputStream ---------------------------------
+ int Read(void* buffer, int size);
+ int Skip(int count);
+
+ private:
+ // The file descriptor.
+ const int file_;
+ bool close_on_delete_;
+ bool is_closed_;
+
+ // The errno of the I/O error, if one has occurred. Otherwise, zero.
+ int errno_;
+
+ // Did we try to seek once and fail? If so, we assume this file descriptor
+ // doesn't support seeking and won't try again.
+ bool previous_seek_failed_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileInputStream);
+ };
+
+ CopyingFileInputStream copying_input_;
+ CopyingInputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream which writes to a file descriptor.
+//
+// FileOutputStream is preferred over using an ofstream with
+// OstreamOutputStream. The latter will introduce an extra layer of buffering,
+// harming performance. Also, it's conceivable that FileOutputStream could
+// someday be enhanced to use zero-copy file descriptors on OSs which
+// support them.
+class LIBPROTOBUF_EXPORT FileOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Creates a stream that writes to the given Unix file descriptor.
+ // If a block_size is given, it specifies the size of the buffers
+ // that should be returned by Next(). Otherwise, a reasonable default
+ // is used.
+ explicit FileOutputStream(int file_descriptor, int block_size = -1);
+ ~FileOutputStream();
+
+ // Flushes any buffers and closes the underlying file. Returns false if
+ // an error occurs during the process; use GetErrno() to examine the error.
+ // Even if an error occurs, the file descriptor is closed when this returns.
+ bool Close();
+
+ // Flushes FileOutputStream's buffers but does not close the
+ // underlying file. No special measures are taken to ensure that
+ // underlying operating system file object is synchronized to disk.
+ bool Flush();
+
+ // By default, the file descriptor is not closed when the stream is
+ // destroyed. Call SetCloseOnDelete(true) to change that. WARNING:
+ // This leaves no way for the caller to detect if close() fails. If
+ // detecting close() errors is important to you, you should arrange
+ // to close the descriptor yourself.
+ void SetCloseOnDelete(bool value) { copying_output_.SetCloseOnDelete(value); }
+
+ // If an I/O error has occurred on this file descriptor, this is the
+ // errno from that error. Otherwise, this is zero. Once an error
+ // occurs, the stream is broken and all subsequent operations will
+ // fail.
+ int GetErrno() { return copying_output_.GetErrno(); }
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingFileOutputStream : public CopyingOutputStream {
+ public:
+ CopyingFileOutputStream(int file_descriptor);
+ ~CopyingFileOutputStream();
+
+ bool Close();
+ void SetCloseOnDelete(bool value) { close_on_delete_ = value; }
+ int GetErrno() { return errno_; }
+
+ // implements CopyingOutputStream --------------------------------
+ bool Write(const void* buffer, int size);
+
+ private:
+ // The file descriptor.
+ const int file_;
+ bool close_on_delete_;
+ bool is_closed_;
+
+ // The errno of the I/O error, if one has occurred. Otherwise, zero.
+ int errno_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingFileOutputStream);
+ };
+
+ CopyingFileOutputStream copying_output_;
+ CopyingOutputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(FileOutputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyInputStream which reads from a C++ istream.
+//
+// Note that for reading files (or anything represented by a file descriptor),
+// FileInputStream is more efficient.
+class LIBPROTOBUF_EXPORT IstreamInputStream : public ZeroCopyInputStream {
+ public:
+ // Creates a stream that reads from the given C++ istream.
+ // If a block_size is given, it specifies the number of bytes that
+ // should be read and returned with each call to Next(). Otherwise,
+ // a reasonable default is used.
+ explicit IstreamInputStream(istream* stream, int block_size = -1);
+ ~IstreamInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingIstreamInputStream : public CopyingInputStream {
+ public:
+ CopyingIstreamInputStream(istream* input);
+ ~CopyingIstreamInputStream();
+
+ // implements CopyingInputStream ---------------------------------
+ int Read(void* buffer, int size);
+ // (We use the default implementation of Skip().)
+
+ private:
+ // The stream.
+ istream* input_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingIstreamInputStream);
+ };
+
+ CopyingIstreamInputStream copying_input_;
+ CopyingInputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(IstreamInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream which writes to a C++ ostream.
+//
+// Note that for writing files (or anything represented by a file descriptor),
+// FileOutputStream is more efficient.
+class LIBPROTOBUF_EXPORT OstreamOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Creates a stream that writes to the given C++ ostream.
+ // If a block_size is given, it specifies the size of the buffers
+ // that should be returned by Next(). Otherwise, a reasonable default
+ // is used.
+ explicit OstreamOutputStream(ostream* stream, int block_size = -1);
+ ~OstreamOutputStream();
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ class LIBPROTOBUF_EXPORT CopyingOstreamOutputStream : public CopyingOutputStream {
+ public:
+ CopyingOstreamOutputStream(ostream* output);
+ ~CopyingOstreamOutputStream();
+
+ // implements CopyingOutputStream --------------------------------
+ bool Write(const void* buffer, int size);
+
+ private:
+ // The stream.
+ ostream* output_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOstreamOutputStream);
+ };
+
+ CopyingOstreamOutputStream copying_output_;
+ CopyingOutputStreamAdaptor impl_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(OstreamOutputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyInputStream which reads from several other streams in sequence.
+// ConcatenatingInputStream is unable to distinguish between end-of-stream
+// and read errors in the underlying streams, so it assumes any errors mean
+// end-of-stream. So, if the underlying streams fail for any other reason,
+// ConcatenatingInputStream may do odd things. It is suggested that you do
+// not use ConcatenatingInputStream on streams that might produce read errors
+// other than end-of-stream.
+class LIBPROTOBUF_EXPORT ConcatenatingInputStream : public ZeroCopyInputStream {
+ public:
+ // All streams passed in as well as the array itself must remain valid
+ // until the ConcatenatingInputStream is destroyed.
+ ConcatenatingInputStream(ZeroCopyInputStream* const streams[], int count);
+ ~ConcatenatingInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+
+ private:
+ // As streams are retired, streams_ is incremented and count_ is
+ // decremented.
+ ZeroCopyInputStream* const* streams_;
+ int stream_count_;
+ int64 bytes_retired_; // Bytes read from previous streams.
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ConcatenatingInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyInputStream which wraps some other stream and limits it to
+// a particular byte count.
+class LIBPROTOBUF_EXPORT LimitingInputStream : public ZeroCopyInputStream {
+ public:
+ LimitingInputStream(ZeroCopyInputStream* input, int64 limit);
+ ~LimitingInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+
+ private:
+ ZeroCopyInputStream* input_;
+ int64 limit_; // Decreases as we go, becomes negative if we overshoot.
+ int64 prior_bytes_read_; // Bytes read on underlying stream at construction
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LimitingInputStream);
+};
+
+// ===================================================================
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_H__
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.cc b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
new file mode 100644
index 0000000..083beca
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.cc
@@ -0,0 +1,438 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+
+#include <google/protobuf/io/zero_copy_stream_impl_lite.h>
+
+#include <algorithm>
+#include <limits>
+
+#include <google/protobuf/stubs/casts.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/stl_util.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+namespace {
+
+// Default block size for Copying{In,Out}putStreamAdaptor.
+static const int kDefaultBlockSize = 8192;
+
+} // namespace
+
+// ===================================================================
+
+ArrayInputStream::ArrayInputStream(const void* data, int size,
+ int block_size)
+ : data_(reinterpret_cast<const uint8*>(data)),
+ size_(size),
+ block_size_(block_size > 0 ? block_size : size),
+ position_(0),
+ last_returned_size_(0) {
+}
+
+ArrayInputStream::~ArrayInputStream() {
+}
+
+bool ArrayInputStream::Next(const void** data, int* size) {
+ if (position_ < size_) {
+ last_returned_size_ = min(block_size_, size_ - position_);
+ *data = data_ + position_;
+ *size = last_returned_size_;
+ position_ += last_returned_size_;
+ return true;
+ } else {
+ // We're at the end of the array.
+ last_returned_size_ = 0; // Don't let caller back up.
+ return false;
+ }
+}
+
+void ArrayInputStream::BackUp(int count) {
+ GOOGLE_CHECK_GT(last_returned_size_, 0)
+ << "BackUp() can only be called after a successful Next().";
+ GOOGLE_CHECK_LE(count, last_returned_size_);
+ GOOGLE_CHECK_GE(count, 0);
+ position_ -= count;
+ last_returned_size_ = 0; // Don't let caller back up further.
+}
+
+bool ArrayInputStream::Skip(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+ last_returned_size_ = 0; // Don't let caller back up.
+ if (count > size_ - position_) {
+ position_ = size_;
+ return false;
+ } else {
+ position_ += count;
+ return true;
+ }
+}
+
+int64 ArrayInputStream::ByteCount() const {
+ return position_;
+}
+
+
+// ===================================================================
+
+ArrayOutputStream::ArrayOutputStream(void* data, int size, int block_size)
+ : data_(reinterpret_cast<uint8*>(data)),
+ size_(size),
+ block_size_(block_size > 0 ? block_size : size),
+ position_(0),
+ last_returned_size_(0) {
+}
+
+ArrayOutputStream::~ArrayOutputStream() {
+}
+
+bool ArrayOutputStream::Next(void** data, int* size) {
+ if (position_ < size_) {
+ last_returned_size_ = min(block_size_, size_ - position_);
+ *data = data_ + position_;
+ *size = last_returned_size_;
+ position_ += last_returned_size_;
+ return true;
+ } else {
+ // We're at the end of the array.
+ last_returned_size_ = 0; // Don't let caller back up.
+ return false;
+ }
+}
+
+void ArrayOutputStream::BackUp(int count) {
+ GOOGLE_CHECK_GT(last_returned_size_, 0)
+ << "BackUp() can only be called after a successful Next().";
+ GOOGLE_CHECK_LE(count, last_returned_size_);
+ GOOGLE_CHECK_GE(count, 0);
+ position_ -= count;
+ last_returned_size_ = 0; // Don't let caller back up further.
+}
+
+int64 ArrayOutputStream::ByteCount() const {
+ return position_;
+}
+
+// ===================================================================
+
+StringOutputStream::StringOutputStream(string* target)
+ : target_(target) {
+}
+
+StringOutputStream::~StringOutputStream() {
+}
+
+bool StringOutputStream::Next(void** data, int* size) {
+ GOOGLE_CHECK_NE(NULL, target_);
+ int old_size = target_->size();
+
+ // Grow the string.
+ if (old_size < target_->capacity()) {
+ // Resize the string to match its capacity, since we can get away
+ // without a memory allocation this way.
+ STLStringResizeUninitialized(target_, target_->capacity());
+ } else {
+ // Size has reached capacity, try to double the size.
+ if (old_size > std::numeric_limits<int>::max() / 2) {
+ // Can not double the size otherwise it is going to cause integer
+ // overflow in the expression below: old_size * 2 ";
+ GOOGLE_LOG(ERROR) << "Cannot allocate buffer larger than kint32max for "
+ << "StringOutputStream.";
+ return false;
+ }
+ // Double the size, also make sure that the new size is at least
+ // kMinimumSize.
+ STLStringResizeUninitialized(
+ target_,
+ max(old_size * 2,
+ kMinimumSize + 0)); // "+ 0" works around GCC4 weirdness.
+ }
+
+ *data = mutable_string_data(target_) + old_size;
+ *size = target_->size() - old_size;
+ return true;
+}
+
+void StringOutputStream::BackUp(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+ GOOGLE_CHECK_NE(NULL, target_);
+ GOOGLE_CHECK_LE(count, target_->size());
+ target_->resize(target_->size() - count);
+}
+
+int64 StringOutputStream::ByteCount() const {
+ GOOGLE_CHECK_NE(NULL, target_);
+ return target_->size();
+}
+
+void StringOutputStream::SetString(string* target) {
+ target_ = target;
+}
+
+// ===================================================================
+
+LazyStringOutputStream::LazyStringOutputStream(
+ ResultCallback<string*>* callback)
+ : StringOutputStream(NULL),
+ callback_(GOOGLE_CHECK_NOTNULL(callback)),
+ string_is_set_(false) {
+}
+
+LazyStringOutputStream::~LazyStringOutputStream() {
+}
+
+bool LazyStringOutputStream::Next(void** data, int* size) {
+ if (!string_is_set_) {
+ SetString(callback_->Run());
+ string_is_set_ = true;
+ }
+ return StringOutputStream::Next(data, size);
+}
+
+int64 LazyStringOutputStream::ByteCount() const {
+ return string_is_set_ ? StringOutputStream::ByteCount() : 0;
+}
+
+// ===================================================================
+
+CopyingInputStream::~CopyingInputStream() {}
+
+int CopyingInputStream::Skip(int count) {
+ char junk[4096];
+ int skipped = 0;
+ while (skipped < count) {
+ int bytes = Read(junk, min(count - skipped,
+ implicit_cast<int>(sizeof(junk))));
+ if (bytes <= 0) {
+ // EOF or read error.
+ return skipped;
+ }
+ skipped += bytes;
+ }
+ return skipped;
+}
+
+CopyingInputStreamAdaptor::CopyingInputStreamAdaptor(
+ CopyingInputStream* copying_stream, int block_size)
+ : copying_stream_(copying_stream),
+ owns_copying_stream_(false),
+ failed_(false),
+ position_(0),
+ buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize),
+ buffer_used_(0),
+ backup_bytes_(0) {
+}
+
+CopyingInputStreamAdaptor::~CopyingInputStreamAdaptor() {
+ if (owns_copying_stream_) {
+ delete copying_stream_;
+ }
+}
+
+bool CopyingInputStreamAdaptor::Next(const void** data, int* size) {
+ if (failed_) {
+ // Already failed on a previous read.
+ return false;
+ }
+
+ AllocateBufferIfNeeded();
+
+ if (backup_bytes_ > 0) {
+ // We have data left over from a previous BackUp(), so just return that.
+ *data = buffer_.get() + buffer_used_ - backup_bytes_;
+ *size = backup_bytes_;
+ backup_bytes_ = 0;
+ return true;
+ }
+
+ // Read new data into the buffer.
+ buffer_used_ = copying_stream_->Read(buffer_.get(), buffer_size_);
+ if (buffer_used_ <= 0) {
+ // EOF or read error. We don't need the buffer anymore.
+ if (buffer_used_ < 0) {
+ // Read error (not EOF).
+ failed_ = true;
+ }
+ FreeBuffer();
+ return false;
+ }
+ position_ += buffer_used_;
+
+ *size = buffer_used_;
+ *data = buffer_.get();
+ return true;
+}
+
+void CopyingInputStreamAdaptor::BackUp(int count) {
+ GOOGLE_CHECK(backup_bytes_ == 0 && buffer_.get() != NULL)
+ << " BackUp() can only be called after Next().";
+ GOOGLE_CHECK_LE(count, buffer_used_)
+ << " Can't back up over more bytes than were returned by the last call"
+ " to Next().";
+ GOOGLE_CHECK_GE(count, 0)
+ << " Parameter to BackUp() can't be negative.";
+
+ backup_bytes_ = count;
+}
+
+bool CopyingInputStreamAdaptor::Skip(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+
+ if (failed_) {
+ // Already failed on a previous read.
+ return false;
+ }
+
+ // First skip any bytes left over from a previous BackUp().
+ if (backup_bytes_ >= count) {
+ // We have more data left over than we're trying to skip. Just chop it.
+ backup_bytes_ -= count;
+ return true;
+ }
+
+ count -= backup_bytes_;
+ backup_bytes_ = 0;
+
+ int skipped = copying_stream_->Skip(count);
+ position_ += skipped;
+ return skipped == count;
+}
+
+int64 CopyingInputStreamAdaptor::ByteCount() const {
+ return position_ - backup_bytes_;
+}
+
+void CopyingInputStreamAdaptor::AllocateBufferIfNeeded() {
+ if (buffer_.get() == NULL) {
+ buffer_.reset(new uint8[buffer_size_]);
+ }
+}
+
+void CopyingInputStreamAdaptor::FreeBuffer() {
+ GOOGLE_CHECK_EQ(backup_bytes_, 0);
+ buffer_used_ = 0;
+ buffer_.reset();
+}
+
+// ===================================================================
+
+CopyingOutputStream::~CopyingOutputStream() {}
+
+CopyingOutputStreamAdaptor::CopyingOutputStreamAdaptor(
+ CopyingOutputStream* copying_stream, int block_size)
+ : copying_stream_(copying_stream),
+ owns_copying_stream_(false),
+ failed_(false),
+ position_(0),
+ buffer_size_(block_size > 0 ? block_size : kDefaultBlockSize),
+ buffer_used_(0) {
+}
+
+CopyingOutputStreamAdaptor::~CopyingOutputStreamAdaptor() {
+ WriteBuffer();
+ if (owns_copying_stream_) {
+ delete copying_stream_;
+ }
+}
+
+bool CopyingOutputStreamAdaptor::Flush() {
+ return WriteBuffer();
+}
+
+bool CopyingOutputStreamAdaptor::Next(void** data, int* size) {
+ if (buffer_used_ == buffer_size_) {
+ if (!WriteBuffer()) return false;
+ }
+
+ AllocateBufferIfNeeded();
+
+ *data = buffer_.get() + buffer_used_;
+ *size = buffer_size_ - buffer_used_;
+ buffer_used_ = buffer_size_;
+ return true;
+}
+
+void CopyingOutputStreamAdaptor::BackUp(int count) {
+ GOOGLE_CHECK_GE(count, 0);
+ GOOGLE_CHECK_EQ(buffer_used_, buffer_size_)
+ << " BackUp() can only be called after Next().";
+ GOOGLE_CHECK_LE(count, buffer_used_)
+ << " Can't back up over more bytes than were returned by the last call"
+ " to Next().";
+
+ buffer_used_ -= count;
+}
+
+int64 CopyingOutputStreamAdaptor::ByteCount() const {
+ return position_ + buffer_used_;
+}
+
+bool CopyingOutputStreamAdaptor::WriteBuffer() {
+ if (failed_) {
+ // Already failed on a previous write.
+ return false;
+ }
+
+ if (buffer_used_ == 0) return true;
+
+ if (copying_stream_->Write(buffer_.get(), buffer_used_)) {
+ position_ += buffer_used_;
+ buffer_used_ = 0;
+ return true;
+ } else {
+ failed_ = true;
+ FreeBuffer();
+ return false;
+ }
+}
+
+void CopyingOutputStreamAdaptor::AllocateBufferIfNeeded() {
+ if (buffer_ == NULL) {
+ buffer_.reset(new uint8[buffer_size_]);
+ }
+}
+
+void CopyingOutputStreamAdaptor::FreeBuffer() {
+ buffer_used_ = 0;
+ buffer_.reset();
+}
+
+// ===================================================================
+
+} // namespace io
+} // namespace protobuf
+} // namespace google
diff --git a/src/google/protobuf/io/zero_copy_stream_impl_lite.h b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
new file mode 100644
index 0000000..1c397de
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_impl_lite.h
@@ -0,0 +1,410 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// This file contains common implementations of the interfaces defined in
+// zero_copy_stream.h which are included in the "lite" protobuf library.
+// These implementations cover I/O on raw arrays and strings, as well as
+// adaptors which make it easy to implement streams based on traditional
+// streams. Of course, many users will probably want to write their own
+// implementations of these interfaces specific to the particular I/O
+// abstractions they prefer to use, but these should cover the most common
+// cases.
+
+#ifndef GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__
+#define GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__
+
+#include <memory>
+#ifndef _SHARED_PTR_H
+#include <google/protobuf/stubs/shared_ptr.h>
+#endif
+#include <string>
+#include <iosfwd>
+#include <google/protobuf/io/zero_copy_stream.h>
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/scoped_ptr.h>
+#include <google/protobuf/stubs/stl_util.h>
+
+
+namespace google {
+namespace protobuf {
+namespace io {
+
+// ===================================================================
+
+// A ZeroCopyInputStream backed by an in-memory array of bytes.
+class LIBPROTOBUF_EXPORT ArrayInputStream : public ZeroCopyInputStream {
+ public:
+ // Create an InputStream that returns the bytes pointed to by "data".
+ // "data" remains the property of the caller but must remain valid until
+ // the stream is destroyed. If a block_size is given, calls to Next()
+ // will return data blocks no larger than the given size. Otherwise, the
+ // first call to Next() returns the entire array. block_size is mainly
+ // useful for testing; in production you would probably never want to set
+ // it.
+ ArrayInputStream(const void* data, int size, int block_size = -1);
+ ~ArrayInputStream();
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+
+ private:
+ const uint8* const data_; // The byte array.
+ const int size_; // Total size of the array.
+ const int block_size_; // How many bytes to return at a time.
+
+ int position_;
+ int last_returned_size_; // How many bytes we returned last time Next()
+ // was called (used for error checking only).
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayInputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream backed by an in-memory array of bytes.
+class LIBPROTOBUF_EXPORT ArrayOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Create an OutputStream that writes to the bytes pointed to by "data".
+ // "data" remains the property of the caller but must remain valid until
+ // the stream is destroyed. If a block_size is given, calls to Next()
+ // will return data blocks no larger than the given size. Otherwise, the
+ // first call to Next() returns the entire array. block_size is mainly
+ // useful for testing; in production you would probably never want to set
+ // it.
+ ArrayOutputStream(void* data, int size, int block_size = -1);
+ ~ArrayOutputStream();
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ uint8* const data_; // The byte array.
+ const int size_; // Total size of the array.
+ const int block_size_; // How many bytes to return at a time.
+
+ int position_;
+ int last_returned_size_; // How many bytes we returned last time Next()
+ // was called (used for error checking only).
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ArrayOutputStream);
+};
+
+// ===================================================================
+
+// A ZeroCopyOutputStream which appends bytes to a string.
+class LIBPROTOBUF_EXPORT StringOutputStream : public ZeroCopyOutputStream {
+ public:
+ // Create a StringOutputStream which appends bytes to the given string.
+ // The string remains property of the caller, but it is mutated in arbitrary
+ // ways and MUST NOT be accessed in any way until you're done with the
+ // stream. Either be sure there's no further usage, or (safest) destroy the
+ // stream before using the contents.
+ //
+ // Hint: If you call target->reserve(n) before creating the stream,
+ // the first call to Next() will return at least n bytes of buffer
+ // space.
+ explicit StringOutputStream(string* target);
+ ~StringOutputStream();
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ protected:
+ void SetString(string* target);
+
+ private:
+ static const int kMinimumSize = 16;
+
+ string* target_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(StringOutputStream);
+};
+
+// LazyStringOutputStream is a StringOutputStream with lazy acquisition of
+// the output string from a callback. The string is owned externally, and not
+// deleted in the stream destructor.
+class LIBPROTOBUF_EXPORT LazyStringOutputStream : public StringOutputStream {
+ public:
+ // Callback should be permanent (non-self-deleting). Ownership is transferred
+ // to the LazyStringOutputStream.
+ explicit LazyStringOutputStream(ResultCallback<string*>* callback);
+ ~LazyStringOutputStream();
+
+ // implements ZeroCopyOutputStream, overriding StringOutputStream -----------
+ bool Next(void** data, int* size);
+ int64 ByteCount() const;
+
+ private:
+ const scoped_ptr<ResultCallback<string*> > callback_;
+ bool string_is_set_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(LazyStringOutputStream);
+};
+
+// Note: There is no StringInputStream. Instead, just create an
+// ArrayInputStream as follows:
+// ArrayInputStream input(str.data(), str.size());
+
+// ===================================================================
+
+// A generic traditional input stream interface.
+//
+// Lots of traditional input streams (e.g. file descriptors, C stdio
+// streams, and C++ iostreams) expose an interface where every read
+// involves copying bytes into a buffer. If you want to take such an
+// interface and make a ZeroCopyInputStream based on it, simply implement
+// CopyingInputStream and then use CopyingInputStreamAdaptor.
+//
+// CopyingInputStream implementations should avoid buffering if possible.
+// CopyingInputStreamAdaptor does its own buffering and will read data
+// in large blocks.
+class LIBPROTOBUF_EXPORT CopyingInputStream {
+ public:
+ virtual ~CopyingInputStream();
+
+ // Reads up to "size" bytes into the given buffer. Returns the number of
+ // bytes read. Read() waits until at least one byte is available, or
+ // returns zero if no bytes will ever become available (EOF), or -1 if a
+ // permanent read error occurred.
+ virtual int Read(void* buffer, int size) = 0;
+
+ // Skips the next "count" bytes of input. Returns the number of bytes
+ // actually skipped. This will always be exactly equal to "count" unless
+ // EOF was reached or a permanent read error occurred.
+ //
+ // The default implementation just repeatedly calls Read() into a scratch
+ // buffer.
+ virtual int Skip(int count);
+};
+
+// A ZeroCopyInputStream which reads from a CopyingInputStream. This is
+// useful for implementing ZeroCopyInputStreams that read from traditional
+// streams. Note that this class is not really zero-copy.
+//
+// If you want to read from file descriptors or C++ istreams, this is
+// already implemented for you: use FileInputStream or IstreamInputStream
+// respectively.
+class LIBPROTOBUF_EXPORT CopyingInputStreamAdaptor : public ZeroCopyInputStream {
+ public:
+ // Creates a stream that reads from the given CopyingInputStream.
+ // If a block_size is given, it specifies the number of bytes that
+ // should be read and returned with each call to Next(). Otherwise,
+ // a reasonable default is used. The caller retains ownership of
+ // copying_stream unless SetOwnsCopyingStream(true) is called.
+ explicit CopyingInputStreamAdaptor(CopyingInputStream* copying_stream,
+ int block_size = -1);
+ ~CopyingInputStreamAdaptor();
+
+ // Call SetOwnsCopyingStream(true) to tell the CopyingInputStreamAdaptor to
+ // delete the underlying CopyingInputStream when it is destroyed.
+ void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; }
+
+ // implements ZeroCopyInputStream ----------------------------------
+ bool Next(const void** data, int* size);
+ void BackUp(int count);
+ bool Skip(int count);
+ int64 ByteCount() const;
+
+ private:
+ // Insures that buffer_ is not NULL.
+ void AllocateBufferIfNeeded();
+ // Frees the buffer and resets buffer_used_.
+ void FreeBuffer();
+
+ // The underlying copying stream.
+ CopyingInputStream* copying_stream_;
+ bool owns_copying_stream_;
+
+ // True if we have seen a permenant error from the underlying stream.
+ bool failed_;
+
+ // The current position of copying_stream_, relative to the point where
+ // we started reading.
+ int64 position_;
+
+ // Data is read into this buffer. It may be NULL if no buffer is currently
+ // in use. Otherwise, it points to an array of size buffer_size_.
+ google::protobuf::scoped_array<uint8> buffer_;
+ const int buffer_size_;
+
+ // Number of valid bytes currently in the buffer (i.e. the size last
+ // returned by Next()). 0 <= buffer_used_ <= buffer_size_.
+ int buffer_used_;
+
+ // Number of bytes in the buffer which were backed up over by a call to
+ // BackUp(). These need to be returned again.
+ // 0 <= backup_bytes_ <= buffer_used_
+ int backup_bytes_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingInputStreamAdaptor);
+};
+
+// ===================================================================
+
+// A generic traditional output stream interface.
+//
+// Lots of traditional output streams (e.g. file descriptors, C stdio
+// streams, and C++ iostreams) expose an interface where every write
+// involves copying bytes from a buffer. If you want to take such an
+// interface and make a ZeroCopyOutputStream based on it, simply implement
+// CopyingOutputStream and then use CopyingOutputStreamAdaptor.
+//
+// CopyingOutputStream implementations should avoid buffering if possible.
+// CopyingOutputStreamAdaptor does its own buffering and will write data
+// in large blocks.
+class LIBPROTOBUF_EXPORT CopyingOutputStream {
+ public:
+ virtual ~CopyingOutputStream();
+
+ // Writes "size" bytes from the given buffer to the output. Returns true
+ // if successful, false on a write error.
+ virtual bool Write(const void* buffer, int size) = 0;
+};
+
+// A ZeroCopyOutputStream which writes to a CopyingOutputStream. This is
+// useful for implementing ZeroCopyOutputStreams that write to traditional
+// streams. Note that this class is not really zero-copy.
+//
+// If you want to write to file descriptors or C++ ostreams, this is
+// already implemented for you: use FileOutputStream or OstreamOutputStream
+// respectively.
+class LIBPROTOBUF_EXPORT CopyingOutputStreamAdaptor : public ZeroCopyOutputStream {
+ public:
+ // Creates a stream that writes to the given Unix file descriptor.
+ // If a block_size is given, it specifies the size of the buffers
+ // that should be returned by Next(). Otherwise, a reasonable default
+ // is used.
+ explicit CopyingOutputStreamAdaptor(CopyingOutputStream* copying_stream,
+ int block_size = -1);
+ ~CopyingOutputStreamAdaptor();
+
+ // Writes all pending data to the underlying stream. Returns false if a
+ // write error occurred on the underlying stream. (The underlying
+ // stream itself is not necessarily flushed.)
+ bool Flush();
+
+ // Call SetOwnsCopyingStream(true) to tell the CopyingOutputStreamAdaptor to
+ // delete the underlying CopyingOutputStream when it is destroyed.
+ void SetOwnsCopyingStream(bool value) { owns_copying_stream_ = value; }
+
+ // implements ZeroCopyOutputStream ---------------------------------
+ bool Next(void** data, int* size);
+ void BackUp(int count);
+ int64 ByteCount() const;
+
+ private:
+ // Write the current buffer, if it is present.
+ bool WriteBuffer();
+ // Insures that buffer_ is not NULL.
+ void AllocateBufferIfNeeded();
+ // Frees the buffer.
+ void FreeBuffer();
+
+ // The underlying copying stream.
+ CopyingOutputStream* copying_stream_;
+ bool owns_copying_stream_;
+
+ // True if we have seen a permenant error from the underlying stream.
+ bool failed_;
+
+ // The current position of copying_stream_, relative to the point where
+ // we started writing.
+ int64 position_;
+
+ // Data is written from this buffer. It may be NULL if no buffer is
+ // currently in use. Otherwise, it points to an array of size buffer_size_.
+ google::protobuf::scoped_array<uint8> buffer_;
+ const int buffer_size_;
+
+ // Number of valid bytes currently in the buffer (i.e. the size last
+ // returned by Next()). When BackUp() is called, we just reduce this.
+ // 0 <= buffer_used_ <= buffer_size_.
+ int buffer_used_;
+
+ GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(CopyingOutputStreamAdaptor);
+};
+
+// ===================================================================
+
+// mutable_string_data() and as_string_data() are workarounds to improve
+// the performance of writing new data to an existing string. Unfortunately
+// the methods provided by the string class are suboptimal, and using memcpy()
+// is mildly annoying because it requires its pointer args to be non-NULL even
+// if we ask it to copy 0 bytes. Furthermore, string_as_array() has the
+// property that it always returns NULL if its arg is the empty string, exactly
+// what we want to avoid if we're using it in conjunction with memcpy()!
+// With C++11, the desired memcpy() boils down to memcpy(..., &(*s)[0], size),
+// where s is a string*. Without C++11, &(*s)[0] is not guaranteed to be safe,
+// so we use string_as_array(), and live with the extra logic that tests whether
+// *s is empty.
+
+// Return a pointer to mutable characters underlying the given string. The
+// return value is valid until the next time the string is resized. We
+// trust the caller to treat the return value as an array of length s->size().
+inline char* mutable_string_data(string* s) {
+#ifdef LANG_CXX11
+ // This should be simpler & faster than string_as_array() because the latter
+ // is guaranteed to return NULL when *s is empty, so it has to check for that.
+ return &(*s)[0];
+#else
+ return string_as_array(s);
+#endif
+}
+
+// as_string_data(s) is equivalent to
+// ({ char* p = mutable_string_data(s); make_pair(p, p != NULL); })
+// Sometimes it's faster: in some scenarios p cannot be NULL, and then the
+// code can avoid that check.
+inline std::pair<char*, bool> as_string_data(string* s) {
+ char *p = mutable_string_data(s);
+#ifdef LANG_CXX11
+ return std::make_pair(p, true);
+#else
+ return make_pair(p, p != NULL);
+#endif
+}
+
+} // namespace io
+} // namespace protobuf
+
+} // namespace google
+#endif // GOOGLE_PROTOBUF_IO_ZERO_COPY_STREAM_IMPL_LITE_H__
diff --git a/src/google/protobuf/io/zero_copy_stream_unittest.cc b/src/google/protobuf/io/zero_copy_stream_unittest.cc
new file mode 100644
index 0000000..8c7358c
--- /dev/null
+++ b/src/google/protobuf/io/zero_copy_stream_unittest.cc
@@ -0,0 +1,1007 @@
+// Protocol Buffers - Google's data interchange format
+// Copyright 2008 Google Inc. All rights reserved.
+// https://developers.google.com/protocol-buffers/
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+// Author: kenton@google.com (Kenton Varda)
+// Based on original Protocol Buffers design by
+// Sanjay Ghemawat, Jeff Dean, and others.
+//
+// Testing strategy: For each type of I/O (array, string, file, etc.) we
+// create an output stream and write some data to it, then create a
+// corresponding input stream to read the same data back and expect it to
+// match. When the data is written, it is written in several small chunks
+// of varying sizes, with a BackUp() after each chunk. It is read back
+// similarly, but with chunks separated at different points. The whole
+// process is run with a variety of block sizes for both the input and
+// the output.
+//
+// TODO(kenton): Rewrite this test to bring it up to the standards of all
+// the other proto2 tests. May want to wait for gTest to implement
+// "parametized tests" so that one set of tests can be used on all the
+// implementations.
+
+
+#ifdef _MSC_VER
+#include <io.h>
+#else
+#include <unistd.h>
+#endif
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <memory>
+#ifndef _SHARED_PTR_H
+#include <google/protobuf/stubs/shared_ptr.h>
+#endif
+#include <sstream>
+
+#include <google/protobuf/io/zero_copy_stream_impl.h>
+#include <google/protobuf/io/coded_stream.h>
+
+#if HAVE_ZLIB
+#include <google/protobuf/io/gzip_stream.h>
+#endif
+
+#include <google/protobuf/stubs/common.h>
+#include <google/protobuf/stubs/logging.h>
+#include <google/protobuf/stubs/scoped_ptr.h>
+#include <google/protobuf/testing/googletest.h>
+#include <google/protobuf/testing/file.h>
+#include <gtest/gtest.h>
+
+namespace google {
+namespace protobuf {
+namespace io {
+namespace {
+
+#ifdef _WIN32
+#define pipe(fds) _pipe(fds, 4096, O_BINARY)
+#endif
+
+#ifndef O_BINARY
+#ifdef _O_BINARY
+#define O_BINARY _O_BINARY
+#else
+#define O_BINARY 0 // If this isn't defined, the platform doesn't need it.
+#endif
+#endif
+
+class IoTest : public testing::Test {
+ protected:
+ // Test helpers.
+
+ // Helper to write an array of data to an output stream.
+ bool WriteToOutput(ZeroCopyOutputStream* output, const void* data, int size);
+ // Helper to read a fixed-length array of data from an input stream.
+ int ReadFromInput(ZeroCopyInputStream* input, void* data, int size);
+ // Write a string to the output stream.
+ void WriteString(ZeroCopyOutputStream* output, const string& str);
+ // Read a number of bytes equal to the size of the given string and checks
+ // that it matches the string.
+ void ReadString(ZeroCopyInputStream* input, const string& str);
+ // Writes some text to the output stream in a particular order. Returns
+ // the number of bytes written, incase the caller needs that to set up an
+ // input stream.
+ int WriteStuff(ZeroCopyOutputStream* output);
+ // Reads text from an input stream and expects it to match what
+ // WriteStuff() writes.
+ void ReadStuff(ZeroCopyInputStream* input);
+
+ // Similar to WriteStuff, but performs more sophisticated testing.
+ int WriteStuffLarge(ZeroCopyOutputStream* output);
+ // Reads and tests a stream that should have been written to
+ // via WriteStuffLarge().
+ void ReadStuffLarge(ZeroCopyInputStream* input);
+
+#if HAVE_ZLIB
+ string Compress(const string& data, const GzipOutputStream::Options& options);
+ string Uncompress(const string& data);
+#endif
+
+ static const int kBlockSizes[];
+ static const int kBlockSizeCount;
+};
+
+const int IoTest::kBlockSizes[] = {-1, 1, 2, 5, 7, 10, 23, 64};
+const int IoTest::kBlockSizeCount = GOOGLE_ARRAYSIZE(IoTest::kBlockSizes);
+
+bool IoTest::WriteToOutput(ZeroCopyOutputStream* output,
+ const void* data, int size) {
+ const uint8* in = reinterpret_cast<const uint8*>(data);
+ int in_size = size;
+
+ void* out;
+ int out_size;
+
+ while (true) {
+ if (!output->Next(&out, &out_size)) {
+ return false;
+ }
+ EXPECT_GT(out_size, 0);
+
+ if (in_size <= out_size) {
+ memcpy(out, in, in_size);
+ output->BackUp(out_size - in_size);
+ return true;
+ }
+
+ memcpy(out, in, out_size);
+ in += out_size;
+ in_size -= out_size;
+ }
+}
+
+#define MAX_REPEATED_ZEROS 100
+
+int IoTest::ReadFromInput(ZeroCopyInputStream* input, void* data, int size) {
+ uint8* out = reinterpret_cast<uint8*>(data);
+ int out_size = size;
+
+ const void* in;
+ int in_size = 0;
+
+ int repeated_zeros = 0;
+
+ while (true) {
+ if (!input->Next(&in, &in_size)) {
+ return size - out_size;
+ }
+ EXPECT_GT(in_size, -1);
+ if (in_size == 0) {
+ repeated_zeros++;
+ } else {
+ repeated_zeros = 0;
+ }
+ EXPECT_LT(repeated_zeros, MAX_REPEATED_ZEROS);
+
+ if (out_size <= in_size) {
+ memcpy(out, in, out_size);
+ if (in_size > out_size) {
+ input->BackUp(in_size - out_size);
+ }
+ return size; // Copied all of it.
+ }
+
+ memcpy(out, in, in_size);
+ out += in_size;
+ out_size -= in_size;
+ }
+}
+
+void IoTest::WriteString(ZeroCopyOutputStream* output, const string& str) {
+ EXPECT_TRUE(WriteToOutput(output, str.c_str(), str.size()));
+}
+
+void IoTest::ReadString(ZeroCopyInputStream* input, const string& str) {
+ google::protobuf::scoped_array<char> buffer(new char[str.size() + 1]);
+ buffer[str.size()] = '\0';
+ EXPECT_EQ(ReadFromInput(input, buffer.get(), str.size()), str.size());
+ EXPECT_STREQ(str.c_str(), buffer.get());
+}
+
+int IoTest::WriteStuff(ZeroCopyOutputStream* output) {
+ WriteString(output, "Hello world!\n");
+ WriteString(output, "Some te");
+ WriteString(output, "xt. Blah blah.");
+ WriteString(output, "abcdefg");
+ WriteString(output, "01234567890123456789");
+ WriteString(output, "foobar");
+
+ EXPECT_EQ(output->ByteCount(), 68);
+
+ int result = output->ByteCount();
+ return result;
+}
+
+// Reads text from an input stream and expects it to match what WriteStuff()
+// writes.
+void IoTest::ReadStuff(ZeroCopyInputStream* input) {
+ ReadString(input, "Hello world!\n");
+ ReadString(input, "Some text. ");
+ ReadString(input, "Blah ");
+ ReadString(input, "blah.");
+ ReadString(input, "abcdefg");
+ EXPECT_TRUE(input->Skip(20));
+ ReadString(input, "foo");
+ ReadString(input, "bar");
+
+ EXPECT_EQ(input->ByteCount(), 68);
+
+ uint8 byte;
+ EXPECT_EQ(ReadFromInput(input, &byte, 1), 0);
+}
+
+int IoTest::WriteStuffLarge(ZeroCopyOutputStream* output) {
+ WriteString(output, "Hello world!\n");
+ WriteString(output, "Some te");
+ WriteString(output, "xt. Blah blah.");
+ WriteString(output, string(100000, 'x')); // A very long string
+ WriteString(output, string(100000, 'y')); // A very long string
+ WriteString(output, "01234567890123456789");
+
+ EXPECT_EQ(output->ByteCount(), 200055);
+
+ int result = output->ByteCount();
+ return result;
+}
+
+// Reads text from an input stream and expects it to match what WriteStuff()
+// writes.
+void IoTest::ReadStuffLarge(ZeroCopyInputStream* input) {
+ ReadString(input, "Hello world!\nSome text. ");
+ EXPECT_TRUE(input->Skip(5));
+ ReadString(input, "blah.");
+ EXPECT_TRUE(input->Skip(100000 - 10));
+ ReadString(input, string(10, 'x') + string(100000 - 20000, 'y'));
+ EXPECT_TRUE(input->Skip(20000 - 10));
+ ReadString(input, "yyyyyyyyyy01234567890123456789");
+
+ EXPECT_EQ(input->ByteCount(), 200055);
+
+ uint8 byte;
+ EXPECT_EQ(ReadFromInput(input, &byte, 1), 0);
+}
+
+// ===================================================================
+
+TEST_F(IoTest, ArrayIo) {
+ const int kBufferSize = 256;
+ uint8 buffer[kBufferSize];
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ int size;
+ {
+ ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+ size = WriteStuff(&output);
+ }
+ {
+ ArrayInputStream input(buffer, size, kBlockSizes[j]);
+ ReadStuff(&input);
+ }
+ }
+ }
+}
+
+TEST_F(IoTest, TwoSessionWrite) {
+ // Test that two concatenated write sessions read correctly
+
+ static const char* strA = "0123456789";
+ static const char* strB = "WhirledPeas";
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+ char* temp_buffer = new char[40];
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ ArrayOutputStream* output =
+ new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]);
+ CodedOutputStream* coded_output = new CodedOutputStream(output);
+ coded_output->WriteVarint32(strlen(strA));
+ coded_output->WriteRaw(strA, strlen(strA));
+ delete coded_output; // flush
+ int64 pos = output->ByteCount();
+ delete output;
+ output = new ArrayOutputStream(
+ buffer + pos, kBufferSize - pos, kBlockSizes[i]);
+ coded_output = new CodedOutputStream(output);
+ coded_output->WriteVarint32(strlen(strB));
+ coded_output->WriteRaw(strB, strlen(strB));
+ delete coded_output; // flush
+ int64 size = pos + output->ByteCount();
+ delete output;
+
+ ArrayInputStream* input =
+ new ArrayInputStream(buffer, size, kBlockSizes[j]);
+ CodedInputStream* coded_input = new CodedInputStream(input);
+ uint32 insize;
+ EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+ EXPECT_EQ(strlen(strA), insize);
+ EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+ EXPECT_EQ(0, memcmp(temp_buffer, strA, insize));
+
+ EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+ EXPECT_EQ(strlen(strB), insize);
+ EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+ EXPECT_EQ(0, memcmp(temp_buffer, strB, insize));
+
+ delete coded_input;
+ delete input;
+ }
+ }
+
+ delete [] temp_buffer;
+ delete [] buffer;
+}
+
+#if HAVE_ZLIB
+TEST_F(IoTest, GzipIo) {
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ for (int z = 0; z < kBlockSizeCount; z++) {
+ int gzip_buffer_size = kBlockSizes[z];
+ int size;
+ {
+ ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+ GzipOutputStream::Options options;
+ options.format = GzipOutputStream::GZIP;
+ if (gzip_buffer_size != -1) {
+ options.buffer_size = gzip_buffer_size;
+ }
+ GzipOutputStream gzout(&output, options);
+ WriteStuff(&gzout);
+ gzout.Close();
+ size = output.ByteCount();
+ }
+ {
+ ArrayInputStream input(buffer, size, kBlockSizes[j]);
+ GzipInputStream gzin(
+ &input, GzipInputStream::GZIP, gzip_buffer_size);
+ ReadStuff(&gzin);
+ }
+ }
+ }
+ }
+ delete [] buffer;
+}
+
+TEST_F(IoTest, GzipIoWithFlush) {
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+ // We start with i = 4 as we want a block size > 6. With block size <= 6
+ // Flush() fills up the entire 2K buffer with flush markers and the test
+ // fails. See documentation for Flush() for more detail.
+ for (int i = 4; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ for (int z = 0; z < kBlockSizeCount; z++) {
+ int gzip_buffer_size = kBlockSizes[z];
+ int size;
+ {
+ ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+ GzipOutputStream::Options options;
+ options.format = GzipOutputStream::GZIP;
+ if (gzip_buffer_size != -1) {
+ options.buffer_size = gzip_buffer_size;
+ }
+ GzipOutputStream gzout(&output, options);
+ WriteStuff(&gzout);
+ EXPECT_TRUE(gzout.Flush());
+ gzout.Close();
+ size = output.ByteCount();
+ }
+ {
+ ArrayInputStream input(buffer, size, kBlockSizes[j]);
+ GzipInputStream gzin(
+ &input, GzipInputStream::GZIP, gzip_buffer_size);
+ ReadStuff(&gzin);
+ }
+ }
+ }
+ }
+ delete [] buffer;
+}
+
+TEST_F(IoTest, GzipIoContiguousFlushes) {
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+
+ int block_size = kBlockSizes[4];
+ int gzip_buffer_size = block_size;
+ int size;
+
+ ArrayOutputStream output(buffer, kBufferSize, block_size);
+ GzipOutputStream::Options options;
+ options.format = GzipOutputStream::GZIP;
+ if (gzip_buffer_size != -1) {
+ options.buffer_size = gzip_buffer_size;
+ }
+ GzipOutputStream gzout(&output, options);
+ WriteStuff(&gzout);
+ EXPECT_TRUE(gzout.Flush());
+ EXPECT_TRUE(gzout.Flush());
+ gzout.Close();
+ size = output.ByteCount();
+
+ ArrayInputStream input(buffer, size, block_size);
+ GzipInputStream gzin(
+ &input, GzipInputStream::GZIP, gzip_buffer_size);
+ ReadStuff(&gzin);
+
+ delete [] buffer;
+}
+
+TEST_F(IoTest, GzipIoReadAfterFlush) {
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+
+ int block_size = kBlockSizes[4];
+ int gzip_buffer_size = block_size;
+ int size;
+ ArrayOutputStream output(buffer, kBufferSize, block_size);
+ GzipOutputStream::Options options;
+ options.format = GzipOutputStream::GZIP;
+ if (gzip_buffer_size != -1) {
+ options.buffer_size = gzip_buffer_size;
+ }
+
+ GzipOutputStream gzout(&output, options);
+ WriteStuff(&gzout);
+ EXPECT_TRUE(gzout.Flush());
+ size = output.ByteCount();
+
+ ArrayInputStream input(buffer, size, block_size);
+ GzipInputStream gzin(
+ &input, GzipInputStream::GZIP, gzip_buffer_size);
+ ReadStuff(&gzin);
+
+ gzout.Close();
+
+ delete [] buffer;
+}
+
+TEST_F(IoTest, ZlibIo) {
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ for (int z = 0; z < kBlockSizeCount; z++) {
+ int gzip_buffer_size = kBlockSizes[z];
+ int size;
+ {
+ ArrayOutputStream output(buffer, kBufferSize, kBlockSizes[i]);
+ GzipOutputStream::Options options;
+ options.format = GzipOutputStream::ZLIB;
+ if (gzip_buffer_size != -1) {
+ options.buffer_size = gzip_buffer_size;
+ }
+ GzipOutputStream gzout(&output, options);
+ WriteStuff(&gzout);
+ gzout.Close();
+ size = output.ByteCount();
+ }
+ {
+ ArrayInputStream input(buffer, size, kBlockSizes[j]);
+ GzipInputStream gzin(
+ &input, GzipInputStream::ZLIB, gzip_buffer_size);
+ ReadStuff(&gzin);
+ }
+ }
+ }
+ }
+ delete [] buffer;
+}
+
+TEST_F(IoTest, ZlibIoInputAutodetect) {
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+ int size;
+ {
+ ArrayOutputStream output(buffer, kBufferSize);
+ GzipOutputStream::Options options;
+ options.format = GzipOutputStream::ZLIB;
+ GzipOutputStream gzout(&output, options);
+ WriteStuff(&gzout);
+ gzout.Close();
+ size = output.ByteCount();
+ }
+ {
+ ArrayInputStream input(buffer, size);
+ GzipInputStream gzin(&input, GzipInputStream::AUTO);
+ ReadStuff(&gzin);
+ }
+ {
+ ArrayOutputStream output(buffer, kBufferSize);
+ GzipOutputStream::Options options;
+ options.format = GzipOutputStream::GZIP;
+ GzipOutputStream gzout(&output, options);
+ WriteStuff(&gzout);
+ gzout.Close();
+ size = output.ByteCount();
+ }
+ {
+ ArrayInputStream input(buffer, size);
+ GzipInputStream gzin(&input, GzipInputStream::AUTO);
+ ReadStuff(&gzin);
+ }
+ delete [] buffer;
+}
+
+string IoTest::Compress(const string& data,
+ const GzipOutputStream::Options& options) {
+ string result;
+ {
+ StringOutputStream output(&result);
+ GzipOutputStream gzout(&output, options);
+ WriteToOutput(&gzout, data.data(), data.size());
+ }
+ return result;
+}
+
+string IoTest::Uncompress(const string& data) {
+ string result;
+ {
+ ArrayInputStream input(data.data(), data.size());
+ GzipInputStream gzin(&input);
+ const void* buffer;
+ int size;
+ while (gzin.Next(&buffer, &size)) {
+ result.append(reinterpret_cast<const char*>(buffer), size);
+ }
+ }
+ return result;
+}
+
+TEST_F(IoTest, CompressionOptions) {
+ // Some ad-hoc testing of compression options.
+
+ string golden;
+ GOOGLE_CHECK_OK(File::GetContents(
+ TestSourceDir() +
+ "/google/protobuf/testdata/golden_message",
+ &golden, true));
+
+ GzipOutputStream::Options options;
+ string gzip_compressed = Compress(golden, options);
+
+ options.compression_level = 0;
+ string not_compressed = Compress(golden, options);
+
+ // Try zlib compression for fun.
+ options = GzipOutputStream::Options();
+ options.format = GzipOutputStream::ZLIB;
+ string zlib_compressed = Compress(golden, options);
+
+ // Uncompressed should be bigger than the original since it should have some
+ // sort of header.
+ EXPECT_GT(not_compressed.size(), golden.size());
+
+ // Higher compression levels should result in smaller sizes.
+ EXPECT_LT(zlib_compressed.size(), not_compressed.size());
+
+ // ZLIB format should differ from GZIP format.
+ EXPECT_TRUE(zlib_compressed != gzip_compressed);
+
+ // Everything should decompress correctly.
+ EXPECT_TRUE(Uncompress(not_compressed) == golden);
+ EXPECT_TRUE(Uncompress(gzip_compressed) == golden);
+ EXPECT_TRUE(Uncompress(zlib_compressed) == golden);
+}
+
+TEST_F(IoTest, TwoSessionWriteGzip) {
+ // Test that two concatenated gzip streams can be read correctly
+
+ static const char* strA = "0123456789";
+ static const char* strB = "QuickBrownFox";
+ const int kBufferSize = 2*1024;
+ uint8* buffer = new uint8[kBufferSize];
+ char* temp_buffer = new char[40];
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ ArrayOutputStream* output =
+ new ArrayOutputStream(buffer, kBufferSize, kBlockSizes[i]);
+ GzipOutputStream* gzout = new GzipOutputStream(output);
+ CodedOutputStream* coded_output = new CodedOutputStream(gzout);
+ int32 outlen = strlen(strA) + 1;
+ coded_output->WriteVarint32(outlen);
+ coded_output->WriteRaw(strA, outlen);
+ delete coded_output; // flush
+ delete gzout; // flush
+ int64 pos = output->ByteCount();
+ delete output;
+ output = new ArrayOutputStream(
+ buffer + pos, kBufferSize - pos, kBlockSizes[i]);
+ gzout = new GzipOutputStream(output);
+ coded_output = new CodedOutputStream(gzout);
+ outlen = strlen(strB) + 1;
+ coded_output->WriteVarint32(outlen);
+ coded_output->WriteRaw(strB, outlen);
+ delete coded_output; // flush
+ delete gzout; // flush
+ int64 size = pos + output->ByteCount();
+ delete output;
+
+ ArrayInputStream* input =
+ new ArrayInputStream(buffer, size, kBlockSizes[j]);
+ GzipInputStream* gzin = new GzipInputStream(input);
+ CodedInputStream* coded_input = new CodedInputStream(gzin);
+ uint32 insize;
+ EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+ EXPECT_EQ(strlen(strA) + 1, insize);
+ EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+ EXPECT_EQ(0, memcmp(temp_buffer, strA, insize))
+ << "strA=" << strA << " in=" << temp_buffer;
+
+ EXPECT_TRUE(coded_input->ReadVarint32(&insize));
+ EXPECT_EQ(strlen(strB) + 1, insize);
+ EXPECT_TRUE(coded_input->ReadRaw(temp_buffer, insize));
+ EXPECT_EQ(0, memcmp(temp_buffer, strB, insize))
+ << " out_block_size=" << kBlockSizes[i]
+ << " in_block_size=" << kBlockSizes[j]
+ << " pos=" << pos
+ << " size=" << size
+ << " strB=" << strB << " in=" << temp_buffer;
+
+ delete coded_input;
+ delete gzin;
+ delete input;
+ }
+ }
+
+ delete [] temp_buffer;
+ delete [] buffer;
+}
+
+TEST_F(IoTest, GzipInputByteCountAfterClosed) {
+ string golden = "abcdefghijklmnopqrstuvwxyz";
+ string compressed = Compress(golden, GzipOutputStream::Options());
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ ArrayInputStream arr_input(compressed.data(), compressed.size(),
+ kBlockSizes[i]);
+ GzipInputStream gz_input(&arr_input);
+ const void* buffer;
+ int size;
+ while (gz_input.Next(&buffer, &size)) {
+ EXPECT_LE(gz_input.ByteCount(), golden.size());
+ }
+ EXPECT_EQ(golden.size(), gz_input.ByteCount());
+ }
+}
+
+TEST_F(IoTest, GzipInputByteCountAfterClosedConcatenatedStreams) {
+ string golden1 = "abcdefghijklmnopqrstuvwxyz";
+ string golden2 = "the quick brown fox jumps over the lazy dog";
+ const size_t total_size = golden1.size() + golden2.size();
+ string compressed = Compress(golden1, GzipOutputStream::Options()) +
+ Compress(golden2, GzipOutputStream::Options());
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ ArrayInputStream arr_input(compressed.data(), compressed.size(),
+ kBlockSizes[i]);
+ GzipInputStream gz_input(&arr_input);
+ const void* buffer;
+ int size;
+ while (gz_input.Next(&buffer, &size)) {
+ EXPECT_LE(gz_input.ByteCount(), total_size);
+ }
+ EXPECT_EQ(total_size, gz_input.ByteCount());
+ }
+}
+#endif
+
+// There is no string input, only string output. Also, it doesn't support
+// explicit block sizes. So, we'll only run one test and we'll use
+// ArrayInput to read back the results.
+TEST_F(IoTest, StringIo) {
+ string str;
+ {
+ StringOutputStream output(&str);
+ WriteStuff(&output);
+ }
+ {
+ ArrayInputStream input(str.data(), str.size());
+ ReadStuff(&input);
+ }
+}
+
+
+// To test files, we create a temporary file, write, read, truncate, repeat.
+TEST_F(IoTest, FileIo) {
+ string filename = TestTempDir() + "/zero_copy_stream_test_file";
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ // Make a temporary file.
+ int file =
+ open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777);
+ ASSERT_GE(file, 0);
+
+ {
+ FileOutputStream output(file, kBlockSizes[i]);
+ WriteStuff(&output);
+ EXPECT_EQ(0, output.GetErrno());
+ }
+
+ // Rewind.
+ ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1);
+
+ {
+ FileInputStream input(file, kBlockSizes[j]);
+ ReadStuff(&input);
+ EXPECT_EQ(0, input.GetErrno());
+ }
+
+ close(file);
+ }
+ }
+}
+
+#if HAVE_ZLIB
+TEST_F(IoTest, GzipFileIo) {
+ string filename = TestTempDir() + "/zero_copy_stream_test_file";
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ // Make a temporary file.
+ int file =
+ open(filename.c_str(), O_RDWR | O_CREAT | O_TRUNC | O_BINARY, 0777);
+ ASSERT_GE(file, 0);
+ {
+ FileOutputStream output(file, kBlockSizes[i]);
+ GzipOutputStream gzout(&output);
+ WriteStuffLarge(&gzout);
+ gzout.Close();
+ output.Flush();
+ EXPECT_EQ(0, output.GetErrno());
+ }
+
+ // Rewind.
+ ASSERT_NE(lseek(file, 0, SEEK_SET), (off_t)-1);
+
+ {
+ FileInputStream input(file, kBlockSizes[j]);
+ GzipInputStream gzin(&input);
+ ReadStuffLarge(&gzin);
+ EXPECT_EQ(0, input.GetErrno());
+ }
+
+ close(file);
+ }
+ }
+}
+#endif
+
+// MSVC raises various debugging exceptions if we try to use a file
+// descriptor of -1, defeating our tests below. This class will disable
+// these debug assertions while in scope.
+class MsvcDebugDisabler {
+ public:
+#if defined(_MSC_VER) && _MSC_VER >= 1400
+ MsvcDebugDisabler() {
+ old_handler_ = _set_invalid_parameter_handler(MyHandler);
+ old_mode_ = _CrtSetReportMode(_CRT_ASSERT, 0);
+ }
+ ~MsvcDebugDisabler() {
+ old_handler_ = _set_invalid_parameter_handler(old_handler_);
+ old_mode_ = _CrtSetReportMode(_CRT_ASSERT, old_mode_);
+ }
+
+ static void MyHandler(const wchar_t *expr,
+ const wchar_t *func,
+ const wchar_t *file,
+ unsigned int line,
+ uintptr_t pReserved) {
+ // do nothing
+ }
+
+ _invalid_parameter_handler old_handler_;
+ int old_mode_;
+#else
+ // Dummy constructor and destructor to ensure that GCC doesn't complain
+ // that debug_disabler is an unused variable.
+ MsvcDebugDisabler() {}
+ ~MsvcDebugDisabler() {}
+#endif
+};
+
+// Test that FileInputStreams report errors correctly.
+TEST_F(IoTest, FileReadError) {
+ MsvcDebugDisabler debug_disabler;
+
+ // -1 = invalid file descriptor.
+ FileInputStream input(-1);
+
+ const void* buffer;
+ int size;
+ EXPECT_FALSE(input.Next(&buffer, &size));
+ EXPECT_EQ(EBADF, input.GetErrno());
+}
+
+// Test that FileOutputStreams report errors correctly.
+TEST_F(IoTest, FileWriteError) {
+ MsvcDebugDisabler debug_disabler;
+
+ // -1 = invalid file descriptor.
+ FileOutputStream input(-1);
+
+ void* buffer;
+ int size;
+
+ // The first call to Next() succeeds because it doesn't have anything to
+ // write yet.
+ EXPECT_TRUE(input.Next(&buffer, &size));
+
+ // Second call fails.
+ EXPECT_FALSE(input.Next(&buffer, &size));
+
+ EXPECT_EQ(EBADF, input.GetErrno());
+}
+
+// Pipes are not seekable, so File{Input,Output}Stream ends up doing some
+// different things to handle them. We'll test by writing to a pipe and
+// reading back from it.
+TEST_F(IoTest, PipeIo) {
+ int files[2];
+
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ // Need to create a new pipe each time because ReadStuff() expects
+ // to see EOF at the end.
+ ASSERT_EQ(pipe(files), 0);
+
+ {
+ FileOutputStream output(files[1], kBlockSizes[i]);
+ WriteStuff(&output);
+ EXPECT_EQ(0, output.GetErrno());
+ }
+ close(files[1]); // Send EOF.
+
+ {
+ FileInputStream input(files[0], kBlockSizes[j]);
+ ReadStuff(&input);
+ EXPECT_EQ(0, input.GetErrno());
+ }
+ close(files[0]);
+ }
+ }
+}
+
+// Test using C++ iostreams.
+TEST_F(IoTest, IostreamIo) {
+ for (int i = 0; i < kBlockSizeCount; i++) {
+ for (int j = 0; j < kBlockSizeCount; j++) {
+ {
+ stringstream stream;
+
+ {
+ OstreamOutputStream output(&stream, kBlockSizes[i]);
+ WriteStuff(&output);
+ EXPECT_FALSE(stream.fail());
+ }
+
+ {
+ IstreamInputStream input(&stream, kBlockSizes[j]);
+ ReadStuff(&input);
+ EXPECT_TRUE(stream.eof());
+ }
+ }
+
+ {
+ stringstream stream;
+
+ {
+ OstreamOutputStream output(&stream, kBlockSizes[i]);
+ WriteStuffLarge(&output);
+ EXPECT_FALSE(stream.fail());
+ }
+
+ {
+ IstreamInputStream input(&stream, kBlockSizes[j]);
+ ReadStuffLarge(&input);
+ EXPECT_TRUE(stream.eof());
+ }
+ }
+ }
+ }
+}
+
+// To test ConcatenatingInputStream, we create several ArrayInputStreams
+// covering a buffer and then concatenate them.
+TEST_F(IoTest, ConcatenatingInputStream) {
+ const int kBufferSize = 256;
+ uint8 buffer[kBufferSize];
+
+ // Fill the buffer.
+ ArrayOutputStream output(buffer, kBufferSize);
+ WriteStuff(&output);
+
+ // Now split it up into multiple streams of varying sizes.
+ ASSERT_EQ(68, output.ByteCount()); // Test depends on this.
+ ArrayInputStream input1(buffer , 12);
+ ArrayInputStream input2(buffer + 12, 7);
+ ArrayInputStream input3(buffer + 19, 6);
+ ArrayInputStream input4(buffer + 25, 15);
+ ArrayInputStream input5(buffer + 40, 0);
+ // Note: We want to make sure we have a stream boundary somewhere between
+ // bytes 42 and 62, which is the range that it Skip()ed by ReadStuff(). This
+ // tests that a bug that existed in the original code for Skip() is fixed.
+ ArrayInputStream input6(buffer + 40, 10);
+ ArrayInputStream input7(buffer + 50, 18); // Total = 68 bytes.
+
+ ZeroCopyInputStream* streams[] =
+ {&input1, &input2, &input3, &input4, &input5, &input6, &input7};
+
+ // Create the concatenating stream and read.
+ ConcatenatingInputStream input(streams, GOOGLE_ARRAYSIZE(streams));
+ ReadStuff(&input);
+}
+
+// To test LimitingInputStream, we write our golden text to a buffer, then
+// create an ArrayInputStream that contains the whole buffer (not just the
+// bytes written), then use a LimitingInputStream to limit it just to the
+// bytes written.
+TEST_F(IoTest, LimitingInputStream) {
+ const int kBufferSize = 256;
+ uint8 buffer[kBufferSize];
+
+ // Fill the buffer.
+ ArrayOutputStream output(buffer, kBufferSize);
+ WriteStuff(&output);
+
+ // Set up input.
+ ArrayInputStream array_input(buffer, kBufferSize);
+ LimitingInputStream input(&array_input, output.ByteCount());
+
+ ReadStuff(&input);
+}
+
+// Checks that ByteCount works correctly for LimitingInputStreams where the
+// underlying stream has already been read.
+TEST_F(IoTest, LimitingInputStreamByteCount) {
+ const int kHalfBufferSize = 128;
+ const int kBufferSize = kHalfBufferSize * 2;
+ uint8 buffer[kBufferSize];
+
+ // Set up input. Only allow half to be read at once.
+ ArrayInputStream array_input(buffer, kBufferSize, kHalfBufferSize);
+ const void* data;
+ int size;
+ EXPECT_TRUE(array_input.Next(&data, &size));
+ EXPECT_EQ(kHalfBufferSize, array_input.ByteCount());
+ // kHalfBufferSize - 1 to test limiting logic as well.
+ LimitingInputStream input(&array_input, kHalfBufferSize - 1);
+ EXPECT_EQ(0, input.ByteCount());
+ EXPECT_TRUE(input.Next(&data, &size));
+ EXPECT_EQ(kHalfBufferSize - 1 , input.ByteCount());
+}
+
+// Check that a zero-size array doesn't confuse the code.
+TEST(ZeroSizeArray, Input) {
+ ArrayInputStream input(NULL, 0);
+ const void* data;
+ int size;
+ EXPECT_FALSE(input.Next(&data, &size));
+}
+
+TEST(ZeroSizeArray, Output) {
+ ArrayOutputStream output(NULL, 0);
+ void* data;
+ int size;
+ EXPECT_FALSE(output.Next(&data, &size));
+}
+
+} // namespace
+} // namespace io
+} // namespace protobuf
+} // namespace google