Blame - src/google/protobuf/stubs/strutil.cc - RealtimeRoboticsGroup/test

blob: 552d416f6342e2ada1c179b521d783dd3f34e789 [file] [log] [blame]

Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1	// Protocol Buffers - Google's data interchange format
				2	// Copyright 2008 Google Inc. All rights reserved.
				3	// https://developers.google.com/protocol-buffers/
				4	//
				5	// Redistribution and use in source and binary forms, with or without
				6	// modification, are permitted provided that the following conditions are
				7	// met:
				8	//
				9	// * Redistributions of source code must retain the above copyright
				10	// notice, this list of conditions and the following disclaimer.
				11	// * Redistributions in binary form must reproduce the above
				12	// copyright notice, this list of conditions and the following disclaimer
				13	// in the documentation and/or other materials provided with the
				14	// distribution.
				15	// * Neither the name of Google Inc. nor the names of its
				16	// contributors may be used to endorse or promote products derived from
				17	// this software without specific prior written permission.
				18	//
				19	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
				20	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
				21	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
				22	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
				23	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
				24	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
				25	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
				26	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
				27	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
				28	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
				29	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
				30
				31	// from google3/strings/strutil.cc
				32
				33	#include <google/protobuf/stubs/strutil.h>
				34	#include <google/protobuf/stubs/mathlimits.h>
				35
				36	#include <errno.h>
				37	#include <float.h> // FLT_DIG and DBL_DIG
				38	#include <limits>
				39	#include <limits.h>
				40	#include <stdio.h>
				41	#include <iterator>
				42
				43	#include <google/protobuf/stubs/stl_util.h>
				44
				45	#ifdef _WIN32
				46	// MSVC has only _snprintf, not snprintf.
				47	//
				48	// MinGW has both snprintf and _snprintf, but they appear to be different
				49	// functions. The former is buggy. When invoked like so:
				50	// char buffer[32];
				51	// snprintf(buffer, 32, "%.*g\n", FLT_DIG, 1.23e10f);
				52	// it prints "1.23000e+10". This is plainly wrong: %g should never print
				53	// trailing zeros after the decimal point. For some reason this bug only
				54	// occurs with some input values, not all. In any case, _snprintf does the
				55	// right thing, so we use it.
				56	#define snprintf _snprintf
				57	#endif
				58
				59	namespace google {
				60	namespace protobuf {
				61
				62	// These are defined as macros on some platforms. #undef them so that we can
				63	// redefine them.
				64	#undef isxdigit
				65	#undef isprint
				66
				67	// The definitions of these in ctype.h change based on locale. Since our
				68	// string manipulation is all in relation to the protocol buffer and C++
				69	// languages, we always want to use the C locale. So, we re-define these
				70	// exactly as we want them.
				71	inline bool isxdigit(char c) {
				72	return ('0' <= c && c <= '9') \|\|
				73	('a' <= c && c <= 'f') \|\|
				74	('A' <= c && c <= 'F');
				75	}
				76
				77	inline bool isprint(char c) {
				78	return c >= 0x20 && c <= 0x7E;
				79	}
				80
				81	// ----------------------------------------------------------------------
				82	// StripString
				83	// Replaces any occurrence of the character 'remove' (or the characters
				84	// in 'remove') with the character 'replacewith'.
				85	// ----------------------------------------------------------------------
				86	void StripString(string* s, const char* remove, char replacewith) {
				87	const char * str_start = s->c_str();
				88	const char * str = str_start;
				89	for (str = strpbrk(str, remove);
				90	str != NULL;
				91	str = strpbrk(str + 1, remove)) {
				92	(*s)[str - str_start] = replacewith;
				93	}
				94	}
				95
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	96	// ----------------------------------------------------------------------
				97	// ReplaceCharacters
				98	// Replaces any occurrence of the character 'remove' (or the characters
				99	// in 'remove') with the character 'replacewith'.
				100	// ----------------------------------------------------------------------
				101	void ReplaceCharacters(string s, const char remove, char replacewith) {
				102	const char *str_start = s->c_str();
				103	const char *str = str_start;
				104	for (str = strpbrk(str, remove);
				105	str != NULL;
				106	str = strpbrk(str + 1, remove)) {
				107	(*s)[str - str_start] = replacewith;
				108	}
				109	}
				110
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	111	void StripWhitespace(string* str) {
				112	int str_length = str->length();
				113
				114	// Strip off leading whitespace.
				115	int first = 0;
				116	while (first < str_length && ascii_isspace(str->at(first))) {
				117	++first;
				118	}
				119	// If entire string is white space.
				120	if (first == str_length) {
				121	str->clear();
				122	return;
				123	}
				124	if (first > 0) {
				125	str->erase(0, first);
				126	str_length -= first;
				127	}
				128
				129	// Strip off trailing whitespace.
				130	int last = str_length - 1;
				131	while (last >= 0 && ascii_isspace(str->at(last))) {
				132	--last;
				133	}
				134	if (last != (str_length - 1) && last >= 0) {
				135	str->erase(last + 1, string::npos);
				136	}
				137	}
				138
				139	// ----------------------------------------------------------------------
				140	// StringReplace()
				141	// Replace the "old" pattern with the "new" pattern in a string,
				142	// and append the result to "res". If replace_all is false,
				143	// it only replaces the first instance of "old."
				144	// ----------------------------------------------------------------------
				145
				146	void StringReplace(const string& s, const string& oldsub,
				147	const string& newsub, bool replace_all,
				148	string* res) {
				149	if (oldsub.empty()) {
				150	res->append(s); // if empty, append the given string.
				151	return;
				152	}
				153
				154	string::size_type start_pos = 0;
				155	string::size_type pos;
				156	do {
				157	pos = s.find(oldsub, start_pos);
				158	if (pos == string::npos) {
				159	break;
				160	}
				161	res->append(s, start_pos, pos - start_pos);
				162	res->append(newsub);
				163	start_pos = pos + oldsub.size(); // start searching again after the "old"
				164	} while (replace_all);
				165	res->append(s, start_pos, s.length() - start_pos);
				166	}
				167
				168	// ----------------------------------------------------------------------
				169	// StringReplace()
				170	// Give me a string and two patterns "old" and "new", and I replace
				171	// the first instance of "old" in the string with "new", if it
				172	// exists. If "global" is true; call this repeatedly until it
				173	// fails. RETURN a new string, regardless of whether the replacement
				174	// happened or not.
				175	// ----------------------------------------------------------------------
				176
				177	string StringReplace(const string& s, const string& oldsub,
				178	const string& newsub, bool replace_all) {
				179	string ret;
				180	StringReplace(s, oldsub, newsub, replace_all, &ret);
				181	return ret;
				182	}
				183
				184	// ----------------------------------------------------------------------
				185	// SplitStringUsing()
				186	// Split a string using a character delimiter. Append the components
				187	// to 'result'.
				188	//
				189	// Note: For multi-character delimiters, this routine will split on ANY of
				190	// the characters in the string, not the entire string as a single delimiter.
				191	// ----------------------------------------------------------------------
				192	template <typename ITR>
				193	static inline
				194	void SplitStringToIteratorUsing(const string& full,
				195	const char* delim,
				196	ITR& result) {
				197	// Optimize the common case where delim is a single character.
				198	if (delim[0] != '\0' && delim[1] == '\0') {
				199	char c = delim[0];
				200	const char* p = full.data();
				201	const char* end = p + full.size();
				202	while (p != end) {
				203	if (*p == c) {
				204	++p;
				205	} else {
				206	const char* start = p;
				207	while (++p != end && *p != c);
				208	*result++ = string(start, p - start);
				209	}
				210	}
				211	return;
				212	}
				213
				214	string::size_type begin_index, end_index;
				215	begin_index = full.find_first_not_of(delim);
				216	while (begin_index != string::npos) {
				217	end_index = full.find_first_of(delim, begin_index);
				218	if (end_index == string::npos) {
				219	*result++ = full.substr(begin_index);
				220	return;
				221	}
				222	*result++ = full.substr(begin_index, (end_index - begin_index));
				223	begin_index = full.find_first_not_of(delim, end_index);
				224	}
				225	}
				226
				227	void SplitStringUsing(const string& full,
				228	const char* delim,
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	229	std::vector<string>* result) {
				230	std::back_insert_iterator< std::vector<string> > it(*result);
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	231	SplitStringToIteratorUsing(full, delim, it);
				232	}
				233
				234	// Split a string using a character delimiter. Append the components
				235	// to 'result'. If there are consecutive delimiters, this function
				236	// will return corresponding empty strings. The string is split into
				237	// at most the specified number of pieces greedily. This means that the
				238	// last piece may possibly be split further. To split into as many pieces
				239	// as possible, specify 0 as the number of pieces.
				240	//
				241	// If "full" is the empty string, yields an empty string as the only value.
				242	//
				243	// If "pieces" is negative for some reason, it returns the whole string
				244	// ----------------------------------------------------------------------
				245	template <typename StringType, typename ITR>
				246	static inline
				247	void SplitStringToIteratorAllowEmpty(const StringType& full,
				248	const char* delim,
				249	int pieces,
				250	ITR& result) {
				251	string::size_type begin_index, end_index;
				252	begin_index = 0;
				253
				254	for (int i = 0; (i < pieces-1) \|\| (pieces == 0); i++) {
				255	end_index = full.find_first_of(delim, begin_index);
				256	if (end_index == string::npos) {
				257	*result++ = full.substr(begin_index);
				258	return;
				259	}
				260	*result++ = full.substr(begin_index, (end_index - begin_index));
				261	begin_index = end_index + 1;
				262	}
				263	*result++ = full.substr(begin_index);
				264	}
				265
				266	void SplitStringAllowEmpty(const string& full, const char* delim,
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	267	std::vector<string>* result) {
				268	std::back_insert_iterator<std::vector<string> > it(*result);
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	269	SplitStringToIteratorAllowEmpty(full, delim, 0, it);
				270	}
				271
				272	// ----------------------------------------------------------------------
				273	// JoinStrings()
				274	// This merges a vector of string components with delim inserted
				275	// as separaters between components.
				276	//
				277	// ----------------------------------------------------------------------
				278	template <class ITERATOR>
				279	static void JoinStringsIterator(const ITERATOR& start,
				280	const ITERATOR& end,
				281	const char* delim,
				282	string* result) {
				283	GOOGLE_CHECK(result != NULL);
				284	result->clear();
				285	int delim_length = strlen(delim);
				286
				287	// Precompute resulting length so we can reserve() memory in one shot.
				288	int length = 0;
				289	for (ITERATOR iter = start; iter != end; ++iter) {
				290	if (iter != start) {
				291	length += delim_length;
				292	}
				293	length += iter->size();
				294	}
				295	result->reserve(length);
				296
				297	// Now combine everything.
				298	for (ITERATOR iter = start; iter != end; ++iter) {
				299	if (iter != start) {
				300	result->append(delim, delim_length);
				301	}
				302	result->append(iter->data(), iter->size());
				303	}
				304	}
				305
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	306	void JoinStrings(const std::vector<string>& components,
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	307	const char* delim,
				308	string * result) {
				309	JoinStringsIterator(components.begin(), components.end(), delim, result);
				310	}
				311
				312	// ----------------------------------------------------------------------
				313	// UnescapeCEscapeSequences()
				314	// This does all the unescaping that C does: \ooo, \r, \n, etc
				315	// Returns length of resulting string.
				316	// The implementation of \x parses any positive number of hex digits,
				317	// but it is an error if the value requires more than 8 bits, and the
				318	// result is truncated to 8 bits.
				319	//
				320	// The second call stores its errors in a supplied string vector.
				321	// If the string vector pointer is NULL, it reports the errors with LOG().
				322	// ----------------------------------------------------------------------
				323
				324	#define IS_OCTAL_DIGIT(c) (((c) >= '0') && ((c) <= '7'))
				325
				326	// Protocol buffers doesn't ever care about errors, but I don't want to remove
				327	// the code.
				328	#define LOG_STRING(LEVEL, VECTOR) GOOGLE_LOG_IF(LEVEL, false)
				329
				330	int UnescapeCEscapeSequences(const char* source, char* dest) {
				331	return UnescapeCEscapeSequences(source, dest, NULL);
				332	}
				333
				334	int UnescapeCEscapeSequences(const char* source, char* dest,
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	335	std::vector<string> *errors) {
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	336	GOOGLE_DCHECK(errors == NULL) << "Error reporting not implemented.";
				337
				338	char* d = dest;
				339	const char* p = source;
				340
				341	// Small optimization for case where source = dest and there's no escaping
				342	while ( p == d && p != '\0' && p != '\\' )
				343	p++, d++;
				344
				345	while (*p != '\0') {
				346	if (*p != '\\') {
				347	d++ = p++;
				348	} else {
				349	switch ( *++p ) { // skip past the '\\'
				350	case '\0':
				351	LOG_STRING(ERROR, errors) << "String cannot end with \\";
				352	*d = '\0';
				353	return d - dest; // we're done with p
				354	case 'a': *d++ = '\a'; break;
				355	case 'b': *d++ = '\b'; break;
				356	case 'f': *d++ = '\f'; break;
				357	case 'n': *d++ = '\n'; break;
				358	case 'r': *d++ = '\r'; break;
				359	case 't': *d++ = '\t'; break;
				360	case 'v': *d++ = '\v'; break;
				361	case '\\': *d++ = '\\'; break;
				362	case '?': *d++ = '\?'; break; // \? Who knew?
				363	case '\'': *d++ = '\''; break;
				364	case '"': *d++ = '\"'; break;
				365	case '0': case '1': case '2': case '3': // octal digit: 1 to 3 digits
				366	case '4': case '5': case '6': case '7': {
				367	char ch = *p - '0';
				368	if ( IS_OCTAL_DIGIT(p[1]) )
				369	ch = ch * 8 + *++p - '0';
				370	if ( IS_OCTAL_DIGIT(p[1]) ) // safe (and easy) to do this twice
				371	ch = ch * 8 + *++p - '0'; // now points at last digit
				372	*d++ = ch;
				373	break;
				374	}
				375	case 'x': case 'X': {
				376	if (!isxdigit(p[1])) {
				377	if (p[1] == '\0') {
				378	LOG_STRING(ERROR, errors) << "String cannot end with \\x";
				379	} else {
				380	LOG_STRING(ERROR, errors) <<
				381	"\\x cannot be followed by non-hex digit: \\" << *p << p[1];
				382	}
				383	break;
				384	}
				385	unsigned int ch = 0;
				386	const char *hex_start = p;
				387	while (isxdigit(p[1])) // arbitrarily many hex digits
				388	ch = (ch << 4) + hex_digit_to_int(*++p);
				389	if (ch > 0xFF)
				390	LOG_STRING(ERROR, errors) << "Value of " <<
				391	"\\" << string(hex_start, p+1-hex_start) << " exceeds 8 bits";
				392	*d++ = ch;
				393	break;
				394	}
				395	#if 0 // TODO(kenton): Support \u and \U? Requires runetochar().
				396	case 'u': {
				397	// \uhhhh => convert 4 hex digits to UTF-8
				398	char32 rune = 0;
				399	const char *hex_start = p;
				400	for (int i = 0; i < 4; ++i) {
				401	if (isxdigit(p[1])) { // Look one char ahead.
				402	rune = (rune << 4) + hex_digit_to_int(*++p); // Advance p.
				403	} else {
				404	LOG_STRING(ERROR, errors)
				405	<< "\\u must be followed by 4 hex digits: \\"
				406	<< string(hex_start, p+1-hex_start);
				407	break;
				408	}
				409	}
				410	d += runetochar(d, &rune);
				411	break;
				412	}
				413	case 'U': {
				414	// \Uhhhhhhhh => convert 8 hex digits to UTF-8
				415	char32 rune = 0;
				416	const char *hex_start = p;
				417	for (int i = 0; i < 8; ++i) {
				418	if (isxdigit(p[1])) { // Look one char ahead.
				419	// Don't change rune until we're sure this
				420	// is within the Unicode limit, but do advance p.
				421	char32 newrune = (rune << 4) + hex_digit_to_int(*++p);
				422	if (newrune > 0x10FFFF) {
				423	LOG_STRING(ERROR, errors)
				424	<< "Value of \\"
				425	<< string(hex_start, p + 1 - hex_start)
				426	<< " exceeds Unicode limit (0x10FFFF)";
				427	break;
				428	} else {
				429	rune = newrune;
				430	}
				431	} else {
				432	LOG_STRING(ERROR, errors)
				433	<< "\\U must be followed by 8 hex digits: \\"
				434	<< string(hex_start, p+1-hex_start);
				435	break;
				436	}
				437	}
				438	d += runetochar(d, &rune);
				439	break;
				440	}
				441	#endif
				442	default:
				443	LOG_STRING(ERROR, errors) << "Unknown escape sequence: \\" << *p;
				444	}
				445	p++; // read past letter we escaped
				446	}
				447	}
				448	*d = '\0';
				449	return d - dest;
				450	}
				451
				452	// ----------------------------------------------------------------------
				453	// UnescapeCEscapeString()
				454	// This does the same thing as UnescapeCEscapeSequences, but creates
				455	// a new string. The caller does not need to worry about allocating
				456	// a dest buffer. This should be used for non performance critical
				457	// tasks such as printing debug messages. It is safe for src and dest
				458	// to be the same.
				459	//
				460	// The second call stores its errors in a supplied string vector.
				461	// If the string vector pointer is NULL, it reports the errors with LOG().
				462	//
				463	// In the first and second calls, the length of dest is returned. In the
				464	// the third call, the new string is returned.
				465	// ----------------------------------------------------------------------
				466	int UnescapeCEscapeString(const string& src, string* dest) {
				467	return UnescapeCEscapeString(src, dest, NULL);
				468	}
				469
				470	int UnescapeCEscapeString(const string& src, string* dest,
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	471	std::vector<string> *errors) {
				472	std::unique_ptr<char[]> unescaped(new char[src.size() + 1]);
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	473	int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), errors);
				474	GOOGLE_CHECK(dest);
				475	dest->assign(unescaped.get(), len);
				476	return len;
				477	}
				478
				479	string UnescapeCEscapeString(const string& src) {
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	480	std::unique_ptr<char[]> unescaped(new char[src.size() + 1]);
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	481	int len = UnescapeCEscapeSequences(src.c_str(), unescaped.get(), NULL);
				482	return string(unescaped.get(), len);
				483	}
				484
				485	// ----------------------------------------------------------------------
				486	// CEscapeString()
				487	// CHexEscapeString()
				488	// Copies 'src' to 'dest', escaping dangerous characters using
				489	// C-style escape sequences. This is very useful for preparing query
				490	// flags. 'src' and 'dest' should not overlap. The 'Hex' version uses
				491	// hexadecimal rather than octal sequences.
				492	// Returns the number of bytes written to 'dest' (not including the \0)
				493	// or -1 if there was insufficient space.
				494	//
				495	// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped.
				496	// ----------------------------------------------------------------------
				497	int CEscapeInternal(const char* src, int src_len, char* dest,
				498	int dest_len, bool use_hex, bool utf8_safe) {
				499	const char* src_end = src + src_len;
				500	int used = 0;
				501	bool last_hex_escape = false; // true if last output char was \xNN
				502
				503	for (; src < src_end; src++) {
				504	if (dest_len - used < 2) // Need space for two letter escape
				505	return -1;
				506
				507	bool is_hex_escape = false;
				508	switch (*src) {
				509	case '\n': dest[used++] = '\\'; dest[used++] = 'n'; break;
				510	case '\r': dest[used++] = '\\'; dest[used++] = 'r'; break;
				511	case '\t': dest[used++] = '\\'; dest[used++] = 't'; break;
				512	case '\"': dest[used++] = '\\'; dest[used++] = '\"'; break;
				513	case '\'': dest[used++] = '\\'; dest[used++] = '\''; break;
				514	case '\\': dest[used++] = '\\'; dest[used++] = '\\'; break;
				515	default:
				516	// Note that if we emit \xNN and the src character after that is a hex
				517	// digit then that digit must be escaped too to prevent it being
				518	// interpreted as part of the character code by C.
				519	if ((!utf8_safe \|\| static_cast<uint8>(*src) < 0x80) &&
				520	(!isprint(*src) \|\|
				521	(last_hex_escape && isxdigit(*src)))) {
				522	if (dest_len - used < 4) // need space for 4 letter escape
				523	return -1;
				524	sprintf(dest + used, (use_hex ? "\\x%02x" : "\\%03o"),
				525	static_cast<uint8>(*src));
				526	is_hex_escape = use_hex;
				527	used += 4;
				528	} else {
				529	dest[used++] = *src; break;
				530	}
				531	}
				532	last_hex_escape = is_hex_escape;
				533	}
				534
				535	if (dest_len - used < 1) // make sure that there is room for \0
				536	return -1;
				537
				538	dest[used] = '\0'; // doesn't count towards return value though
				539	return used;
				540	}
				541
				542	// Calculates the length of the C-style escaped version of 'src'.
				543	// Assumes that non-printable characters are escaped using octal sequences, and
				544	// that UTF-8 bytes are not handled specially.
				545	static inline size_t CEscapedLength(StringPiece src) {
				546	static char c_escaped_len[256] = {
				547	4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 4, 4, 2, 4, 4, // \t, \n, \r
				548	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				549	1, 1, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, // ", '
				550	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // '0'..'9'
				551	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'A'..'O'
				552	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, // 'P'..'Z', '\'
				553	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 'a'..'o'
				554	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, // 'p'..'z', DEL
				555	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				556	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				557	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				558	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				559	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				560	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				561	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				562	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
				563	};
				564
				565	size_t escaped_len = 0;
				566	for (int i = 0; i < src.size(); ++i) {
				567	unsigned char c = static_cast<unsigned char>(src[i]);
				568	escaped_len += c_escaped_len[c];
				569	}
				570	return escaped_len;
				571	}
				572
				573	// ----------------------------------------------------------------------
				574	// Escapes 'src' using C-style escape sequences, and appends the escaped string
				575	// to 'dest'. This version is faster than calling CEscapeInternal as it computes
				576	// the required space using a lookup table, and also does not do any special
				577	// handling for Hex or UTF-8 characters.
				578	// ----------------------------------------------------------------------
				579	void CEscapeAndAppend(StringPiece src, string* dest) {
				580	size_t escaped_len = CEscapedLength(src);
				581	if (escaped_len == src.size()) {
				582	dest->append(src.data(), src.size());
				583	return;
				584	}
				585
				586	size_t cur_dest_len = dest->size();
				587	dest->resize(cur_dest_len + escaped_len);
				588	char* append_ptr = &(*dest)[cur_dest_len];
				589
				590	for (int i = 0; i < src.size(); ++i) {
				591	unsigned char c = static_cast<unsigned char>(src[i]);
				592	switch (c) {
				593	case '\n': append_ptr++ = '\\'; append_ptr++ = 'n'; break;
				594	case '\r': append_ptr++ = '\\'; append_ptr++ = 'r'; break;
				595	case '\t': append_ptr++ = '\\'; append_ptr++ = 't'; break;
				596	case '\"': append_ptr++ = '\\'; append_ptr++ = '\"'; break;
				597	case '\'': append_ptr++ = '\\'; append_ptr++ = '\''; break;
				598	case '\\': append_ptr++ = '\\'; append_ptr++ = '\\'; break;
				599	default:
				600	if (!isprint(c)) {
				601	*append_ptr++ = '\\';
				602	*append_ptr++ = '0' + c / 64;
				603	*append_ptr++ = '0' + (c % 64) / 8;
				604	*append_ptr++ = '0' + c % 8;
				605	} else {
				606	*append_ptr++ = c;
				607	}
				608	break;
				609	}
				610	}
				611	}
				612
				613	string CEscape(const string& src) {
				614	string dest;
				615	CEscapeAndAppend(src, &dest);
				616	return dest;
				617	}
				618
				619	namespace strings {
				620
				621	string Utf8SafeCEscape(const string& src) {
				622	const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	623	std::unique_ptr<char[]> dest(new char[dest_length]);
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	624	const int len = CEscapeInternal(src.data(), src.size(),
				625	dest.get(), dest_length, false, true);
				626	GOOGLE_DCHECK_GE(len, 0);
				627	return string(dest.get(), len);
				628	}
				629
				630	string CHexEscape(const string& src) {
				631	const int dest_length = src.size() * 4 + 1; // Maximum possible expansion
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	632	std::unique_ptr<char[]> dest(new char[dest_length]);
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	633	const int len = CEscapeInternal(src.data(), src.size(),
				634	dest.get(), dest_length, true, false);
				635	GOOGLE_DCHECK_GE(len, 0);
				636	return string(dest.get(), len);
				637	}
				638
				639	} // namespace strings
				640
				641	// ----------------------------------------------------------------------
				642	// strto32_adaptor()
				643	// strtou32_adaptor()
				644	// Implementation of strto[u]l replacements that have identical
				645	// overflow and underflow characteristics for both ILP-32 and LP-64
				646	// platforms, including errno preservation in error-free calls.
				647	// ----------------------------------------------------------------------
				648
				649	int32 strto32_adaptor(const char nptr, char *endptr, int base) {
				650	const int saved_errno = errno;
				651	errno = 0;
				652	const long result = strtol(nptr, endptr, base);
				653	if (errno == ERANGE && result == LONG_MIN) {
				654	return kint32min;
				655	} else if (errno == ERANGE && result == LONG_MAX) {
				656	return kint32max;
				657	} else if (errno == 0 && result < kint32min) {
				658	errno = ERANGE;
				659	return kint32min;
				660	} else if (errno == 0 && result > kint32max) {
				661	errno = ERANGE;
				662	return kint32max;
				663	}
				664	if (errno == 0)
				665	errno = saved_errno;
				666	return static_cast<int32>(result);
				667	}
				668
				669	uint32 strtou32_adaptor(const char nptr, char *endptr, int base) {
				670	const int saved_errno = errno;
				671	errno = 0;
				672	const unsigned long result = strtoul(nptr, endptr, base);
				673	if (errno == ERANGE && result == ULONG_MAX) {
				674	return kuint32max;
				675	} else if (errno == 0 && result > kuint32max) {
				676	errno = ERANGE;
				677	return kuint32max;
				678	}
				679	if (errno == 0)
				680	errno = saved_errno;
				681	return static_cast<uint32>(result);
				682	}
				683
				684	inline bool safe_parse_sign(string* text /inout/,
				685	bool* negative_ptr /output/) {
				686	const char* start = text->data();
				687	const char* end = start + text->size();
				688
				689	// Consume whitespace.
				690	while (start < end && (start[0] == ' ')) {
				691	++start;
				692	}
				693	while (start < end && (end[-1] == ' ')) {
				694	--end;
				695	}
				696	if (start >= end) {
				697	return false;
				698	}
				699
				700	// Consume sign.
				701	*negative_ptr = (start[0] == '-');
				702	if (*negative_ptr \|\| start[0] == '+') {
				703	++start;
				704	if (start >= end) {
				705	return false;
				706	}
				707	}
				708	*text = text->substr(start - text->data(), end - start);
				709	return true;
				710	}
				711
				712	template<typename IntType>
				713	bool safe_parse_positive_int(
				714	string text, IntType* value_p) {
				715	int base = 10;
				716	IntType value = 0;
				717	const IntType vmax = std::numeric_limits<IntType>::max();
				718	assert(vmax > 0);
				719	assert(vmax >= base);
				720	const IntType vmax_over_base = vmax / base;
				721	const char* start = text.data();
				722	const char* end = start + text.size();
				723	// loop over digits
				724	for (; start < end; ++start) {
				725	unsigned char c = static_cast<unsigned char>(start[0]);
				726	int digit = c - '0';
				727	if (digit >= base \|\| digit < 0) {
				728	*value_p = value;
				729	return false;
				730	}
				731	if (value > vmax_over_base) {
				732	*value_p = vmax;
				733	return false;
				734	}
				735	value *= base;
				736	if (value > vmax - digit) {
				737	*value_p = vmax;
				738	return false;
				739	}
				740	value += digit;
				741	}
				742	*value_p = value;
				743	return true;
				744	}
				745
				746	template<typename IntType>
				747	bool safe_parse_negative_int(
				748	const string& text, IntType* value_p) {
				749	int base = 10;
				750	IntType value = 0;
				751	const IntType vmin = std::numeric_limits<IntType>::min();
				752	assert(vmin < 0);
				753	assert(vmin <= 0 - base);
				754	IntType vmin_over_base = vmin / base;
				755	// 2003 c++ standard [expr.mul]
				756	// "... the sign of the remainder is implementation-defined."
				757	// Although (vmin/base)*base + vmin%base is always vmin.
				758	// 2011 c++ standard tightens the spec but we cannot rely on it.
				759	if (vmin % base > 0) {
				760	vmin_over_base += 1;
				761	}
				762	const char* start = text.data();
				763	const char* end = start + text.size();
				764	// loop over digits
				765	for (; start < end; ++start) {
				766	unsigned char c = static_cast<unsigned char>(start[0]);
				767	int digit = c - '0';
				768	if (digit >= base \|\| digit < 0) {
				769	*value_p = value;
				770	return false;
				771	}
				772	if (value < vmin_over_base) {
				773	*value_p = vmin;
				774	return false;
				775	}
				776	value *= base;
				777	if (value < vmin + digit) {
				778	*value_p = vmin;
				779	return false;
				780	}
				781	value -= digit;
				782	}
				783	*value_p = value;
				784	return true;
				785	}
				786
				787	template<typename IntType>
				788	bool safe_int_internal(string text, IntType* value_p) {
				789	*value_p = 0;
				790	bool negative;
				791	if (!safe_parse_sign(&text, &negative)) {
				792	return false;
				793	}
				794	if (!negative) {
				795	return safe_parse_positive_int(text, value_p);
				796	} else {
				797	return safe_parse_negative_int(text, value_p);
				798	}
				799	}
				800
				801	template<typename IntType>
				802	bool safe_uint_internal(string text, IntType* value_p) {
				803	*value_p = 0;
				804	bool negative;
				805	if (!safe_parse_sign(&text, &negative) \|\| negative) {
				806	return false;
				807	}
				808	return safe_parse_positive_int(text, value_p);
				809	}
				810
				811	// ----------------------------------------------------------------------
				812	// FastIntToBuffer()
				813	// FastInt64ToBuffer()
				814	// FastHexToBuffer()
				815	// FastHex64ToBuffer()
				816	// FastHex32ToBuffer()
				817	// ----------------------------------------------------------------------
				818
				819	// Offset into buffer where FastInt64ToBuffer places the end of string
				820	// null character. Also used by FastInt64ToBufferLeft.
				821	static const int kFastInt64ToBufferOffset = 21;
				822
				823	char FastInt64ToBuffer(int64 i, char buffer) {
				824	// We could collapse the positive and negative sections, but that
				825	// would be slightly slower for positive numbers...
				826	// 22 bytes is enough to store -2**64, -18446744073709551616.
				827	char* p = buffer + kFastInt64ToBufferOffset;
				828	*p-- = '\0';
				829	if (i >= 0) {
				830	do {
				831	*p-- = '0' + i % 10;
				832	i /= 10;
				833	} while (i > 0);
				834	return p + 1;
				835	} else {
				836	// On different platforms, % and / have different behaviors for
				837	// negative numbers, so we need to jump through hoops to make sure
				838	// we don't divide negative numbers.
				839	if (i > -10) {
				840	i = -i;
				841	*p-- = '0' + i;
				842	*p = '-';
				843	return p;
				844	} else {
				845	// Make sure we aren't at MIN_INT, in which case we can't say i = -i
				846	i = i + 10;
				847	i = -i;
				848	*p-- = '0' + i % 10;
				849	// Undo what we did a moment ago
				850	i = i / 10 + 1;
				851	do {
				852	*p-- = '0' + i % 10;
				853	i /= 10;
				854	} while (i > 0);
				855	*p = '-';
				856	return p;
				857	}
				858	}
				859	}
				860
				861	// Offset into buffer where FastInt32ToBuffer places the end of string
				862	// null character. Also used by FastInt32ToBufferLeft
				863	static const int kFastInt32ToBufferOffset = 11;
				864
				865	// Yes, this is a duplicate of FastInt64ToBuffer. But, we need this for the
				866	// compiler to generate 32 bit arithmetic instructions. It's much faster, at
				867	// least with 32 bit binaries.
				868	char FastInt32ToBuffer(int32 i, char buffer) {
				869	// We could collapse the positive and negative sections, but that
				870	// would be slightly slower for positive numbers...
				871	// 12 bytes is enough to store -2**32, -4294967296.
				872	char* p = buffer + kFastInt32ToBufferOffset;
				873	*p-- = '\0';
				874	if (i >= 0) {
				875	do {
				876	*p-- = '0' + i % 10;
				877	i /= 10;
				878	} while (i > 0);
				879	return p + 1;
				880	} else {
				881	// On different platforms, % and / have different behaviors for
				882	// negative numbers, so we need to jump through hoops to make sure
				883	// we don't divide negative numbers.
				884	if (i > -10) {
				885	i = -i;
				886	*p-- = '0' + i;
				887	*p = '-';
				888	return p;
				889	} else {
				890	// Make sure we aren't at MIN_INT, in which case we can't say i = -i
				891	i = i + 10;
				892	i = -i;
				893	*p-- = '0' + i % 10;
				894	// Undo what we did a moment ago
				895	i = i / 10 + 1;
				896	do {
				897	*p-- = '0' + i % 10;
				898	i /= 10;
				899	} while (i > 0);
				900	*p = '-';
				901	return p;
				902	}
				903	}
				904	}
				905
				906	char FastHexToBuffer(int i, char buffer) {
				907	GOOGLE_CHECK(i >= 0) << "FastHexToBuffer() wants non-negative integers, not " << i;
				908
				909	static const char *hexdigits = "0123456789abcdef";
				910	char *p = buffer + 21;
				911	*p-- = '\0';
				912	do {
				913	*p-- = hexdigits[i & 15]; // mod by 16
				914	i >>= 4; // divide by 16
				915	} while (i > 0);
				916	return p + 1;
				917	}
				918
				919	char InternalFastHexToBuffer(uint64 value, char buffer, int num_byte) {
				920	static const char *hexdigits = "0123456789abcdef";
				921	buffer[num_byte] = '\0';
				922	for (int i = num_byte - 1; i >= 0; i--) {
				923	#ifdef _M_X64
				924	// MSVC x64 platform has a bug optimizing the uint32(value) in the #else
				925	// block. Given that the uint32 cast was to improve performance on 32-bit
				926	// platforms, we use 64-bit '&' directly.
				927	buffer[i] = hexdigits[value & 0xf];
				928	#else
				929	buffer[i] = hexdigits[uint32(value) & 0xf];
				930	#endif
				931	value >>= 4;
				932	}
				933	return buffer;
				934	}
				935
				936	char FastHex64ToBuffer(uint64 value, char buffer) {
				937	return InternalFastHexToBuffer(value, buffer, 16);
				938	}
				939
				940	char FastHex32ToBuffer(uint32 value, char buffer) {
				941	return InternalFastHexToBuffer(value, buffer, 8);
				942	}
				943
				944	// ----------------------------------------------------------------------
				945	// FastInt32ToBufferLeft()
				946	// FastUInt32ToBufferLeft()
				947	// FastInt64ToBufferLeft()
				948	// FastUInt64ToBufferLeft()
				949	//
				950	// Like the Fast*ToBuffer() functions above, these are intended for speed.
				951	// Unlike the Fast*ToBuffer() functions, however, these functions write
				952	// their output to the beginning of the buffer (hence the name, as the
				953	// output is left-aligned). The caller is responsible for ensuring that
				954	// the buffer has enough space to hold the output.
				955	//
				956	// Returns a pointer to the end of the string (i.e. the null character
				957	// terminating the string).
				958	// ----------------------------------------------------------------------
				959
				960	static const char two_ASCII_digits[100][2] = {
				961	{'0','0'}, {'0','1'}, {'0','2'}, {'0','3'}, {'0','4'},
				962	{'0','5'}, {'0','6'}, {'0','7'}, {'0','8'}, {'0','9'},
				963	{'1','0'}, {'1','1'}, {'1','2'}, {'1','3'}, {'1','4'},
				964	{'1','5'}, {'1','6'}, {'1','7'}, {'1','8'}, {'1','9'},
				965	{'2','0'}, {'2','1'}, {'2','2'}, {'2','3'}, {'2','4'},
				966	{'2','5'}, {'2','6'}, {'2','7'}, {'2','8'}, {'2','9'},
				967	{'3','0'}, {'3','1'}, {'3','2'}, {'3','3'}, {'3','4'},
				968	{'3','5'}, {'3','6'}, {'3','7'}, {'3','8'}, {'3','9'},
				969	{'4','0'}, {'4','1'}, {'4','2'}, {'4','3'}, {'4','4'},
				970	{'4','5'}, {'4','6'}, {'4','7'}, {'4','8'}, {'4','9'},
				971	{'5','0'}, {'5','1'}, {'5','2'}, {'5','3'}, {'5','4'},
				972	{'5','5'}, {'5','6'}, {'5','7'}, {'5','8'}, {'5','9'},
				973	{'6','0'}, {'6','1'}, {'6','2'}, {'6','3'}, {'6','4'},
				974	{'6','5'}, {'6','6'}, {'6','7'}, {'6','8'}, {'6','9'},
				975	{'7','0'}, {'7','1'}, {'7','2'}, {'7','3'}, {'7','4'},
				976	{'7','5'}, {'7','6'}, {'7','7'}, {'7','8'}, {'7','9'},
				977	{'8','0'}, {'8','1'}, {'8','2'}, {'8','3'}, {'8','4'},
				978	{'8','5'}, {'8','6'}, {'8','7'}, {'8','8'}, {'8','9'},
				979	{'9','0'}, {'9','1'}, {'9','2'}, {'9','3'}, {'9','4'},
				980	{'9','5'}, {'9','6'}, {'9','7'}, {'9','8'}, {'9','9'}
				981	};
				982
				983	char* FastUInt32ToBufferLeft(uint32 u, char* buffer) {
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	984	uint32 digits;
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	985	const char *ASCII_digits = NULL;
				986	// The idea of this implementation is to trim the number of divides to as few
				987	// as possible by using multiplication and subtraction rather than mod (%),
				988	// and by outputting two digits at a time rather than one.
				989	// The huge-number case is first, in the hopes that the compiler will output
				990	// that case in one branch-free block of code, and only output conditional
				991	// branches into it from below.
				992	if (u >= 1000000000) { // >= 1,000,000,000
				993	digits = u / 100000000; // 100,000,000
				994	ASCII_digits = two_ASCII_digits[digits];
				995	buffer[0] = ASCII_digits[0];
				996	buffer[1] = ASCII_digits[1];
				997	buffer += 2;
				998	sublt100_000_000:
				999	u -= digits * 100000000; // 100,000,000
				1000	lt100_000_000:
				1001	digits = u / 1000000; // 1,000,000
				1002	ASCII_digits = two_ASCII_digits[digits];
				1003	buffer[0] = ASCII_digits[0];
				1004	buffer[1] = ASCII_digits[1];
				1005	buffer += 2;
				1006	sublt1_000_000:
				1007	u -= digits * 1000000; // 1,000,000
				1008	lt1_000_000:
				1009	digits = u / 10000; // 10,000
				1010	ASCII_digits = two_ASCII_digits[digits];
				1011	buffer[0] = ASCII_digits[0];
				1012	buffer[1] = ASCII_digits[1];
				1013	buffer += 2;
				1014	sublt10_000:
				1015	u -= digits * 10000; // 10,000
				1016	lt10_000:
				1017	digits = u / 100;
				1018	ASCII_digits = two_ASCII_digits[digits];
				1019	buffer[0] = ASCII_digits[0];
				1020	buffer[1] = ASCII_digits[1];
				1021	buffer += 2;
				1022	sublt100:
				1023	u -= digits * 100;
				1024	lt100:
				1025	digits = u;
				1026	ASCII_digits = two_ASCII_digits[digits];
				1027	buffer[0] = ASCII_digits[0];
				1028	buffer[1] = ASCII_digits[1];
				1029	buffer += 2;
				1030	done:
				1031	*buffer = 0;
				1032	return buffer;
				1033	}
				1034
				1035	if (u < 100) {
				1036	digits = u;
				1037	if (u >= 10) goto lt100;
				1038	*buffer++ = '0' + digits;
				1039	goto done;
				1040	}
				1041	if (u < 10000) { // 10,000
				1042	if (u >= 1000) goto lt10_000;
				1043	digits = u / 100;
				1044	*buffer++ = '0' + digits;
				1045	goto sublt100;
				1046	}
				1047	if (u < 1000000) { // 1,000,000
				1048	if (u >= 100000) goto lt1_000_000;
				1049	digits = u / 10000; // 10,000
				1050	*buffer++ = '0' + digits;
				1051	goto sublt10_000;
				1052	}
				1053	if (u < 100000000) { // 100,000,000
				1054	if (u >= 10000000) goto lt100_000_000;
				1055	digits = u / 1000000; // 1,000,000
				1056	*buffer++ = '0' + digits;
				1057	goto sublt1_000_000;
				1058	}
				1059	// we already know that u < 1,000,000,000
				1060	digits = u / 100000000; // 100,000,000
				1061	*buffer++ = '0' + digits;
				1062	goto sublt100_000_000;
				1063	}
				1064
				1065	char* FastInt32ToBufferLeft(int32 i, char* buffer) {
				1066	uint32 u = i;
				1067	if (i < 0) {
				1068	*buffer++ = '-';
				1069	u = -i;
				1070	}
				1071	return FastUInt32ToBufferLeft(u, buffer);
				1072	}
				1073
				1074	char* FastUInt64ToBufferLeft(uint64 u64, char* buffer) {
				1075	int digits;
				1076	const char *ASCII_digits = NULL;
				1077
				1078	uint32 u = static_cast<uint32>(u64);
				1079	if (u == u64) return FastUInt32ToBufferLeft(u, buffer);
				1080
				1081	uint64 top_11_digits = u64 / 1000000000;
				1082	buffer = FastUInt64ToBufferLeft(top_11_digits, buffer);
				1083	u = u64 - (top_11_digits * 1000000000);
				1084
				1085	digits = u / 10000000; // 10,000,000
				1086	GOOGLE_DCHECK_LT(digits, 100);
				1087	ASCII_digits = two_ASCII_digits[digits];
				1088	buffer[0] = ASCII_digits[0];
				1089	buffer[1] = ASCII_digits[1];
				1090	buffer += 2;
				1091	u -= digits * 10000000; // 10,000,000
				1092	digits = u / 100000; // 100,000
				1093	ASCII_digits = two_ASCII_digits[digits];
				1094	buffer[0] = ASCII_digits[0];
				1095	buffer[1] = ASCII_digits[1];
				1096	buffer += 2;
				1097	u -= digits * 100000; // 100,000
				1098	digits = u / 1000; // 1,000
				1099	ASCII_digits = two_ASCII_digits[digits];
				1100	buffer[0] = ASCII_digits[0];
				1101	buffer[1] = ASCII_digits[1];
				1102	buffer += 2;
				1103	u -= digits * 1000; // 1,000
				1104	digits = u / 10;
				1105	ASCII_digits = two_ASCII_digits[digits];
				1106	buffer[0] = ASCII_digits[0];
				1107	buffer[1] = ASCII_digits[1];
				1108	buffer += 2;
				1109	u -= digits * 10;
				1110	digits = u;
				1111	*buffer++ = '0' + digits;
				1112	*buffer = 0;
				1113	return buffer;
				1114	}
				1115
				1116	char* FastInt64ToBufferLeft(int64 i, char* buffer) {
				1117	uint64 u = i;
				1118	if (i < 0) {
				1119	*buffer++ = '-';
				1120	u = -i;
				1121	}
				1122	return FastUInt64ToBufferLeft(u, buffer);
				1123	}
				1124
				1125	// ----------------------------------------------------------------------
				1126	// SimpleItoa()
				1127	// Description: converts an integer to a string.
				1128	//
				1129	// Return value: string
				1130	// ----------------------------------------------------------------------
				1131
				1132	string SimpleItoa(int i) {
				1133	char buffer[kFastToBufferSize];
				1134	return (sizeof(i) == 4) ?
				1135	FastInt32ToBuffer(i, buffer) :
				1136	FastInt64ToBuffer(i, buffer);
				1137	}
				1138
				1139	string SimpleItoa(unsigned int i) {
				1140	char buffer[kFastToBufferSize];
				1141	return string(buffer, (sizeof(i) == 4) ?
				1142	FastUInt32ToBufferLeft(i, buffer) :
				1143	FastUInt64ToBufferLeft(i, buffer));
				1144	}
				1145
				1146	string SimpleItoa(long i) {
				1147	char buffer[kFastToBufferSize];
				1148	return (sizeof(i) == 4) ?
				1149	FastInt32ToBuffer(i, buffer) :
				1150	FastInt64ToBuffer(i, buffer);
				1151	}
				1152
				1153	string SimpleItoa(unsigned long i) {
				1154	char buffer[kFastToBufferSize];
				1155	return string(buffer, (sizeof(i) == 4) ?
				1156	FastUInt32ToBufferLeft(i, buffer) :
				1157	FastUInt64ToBufferLeft(i, buffer));
				1158	}
				1159
				1160	string SimpleItoa(long long i) {
				1161	char buffer[kFastToBufferSize];
				1162	return (sizeof(i) == 4) ?
				1163	FastInt32ToBuffer(i, buffer) :
				1164	FastInt64ToBuffer(i, buffer);
				1165	}
				1166
				1167	string SimpleItoa(unsigned long long i) {
				1168	char buffer[kFastToBufferSize];
				1169	return string(buffer, (sizeof(i) == 4) ?
				1170	FastUInt32ToBufferLeft(i, buffer) :
				1171	FastUInt64ToBufferLeft(i, buffer));
				1172	}
				1173
				1174	// ----------------------------------------------------------------------
				1175	// SimpleDtoa()
				1176	// SimpleFtoa()
				1177	// DoubleToBuffer()
				1178	// FloatToBuffer()
				1179	// We want to print the value without losing precision, but we also do
				1180	// not want to print more digits than necessary. This turns out to be
				1181	// trickier than it sounds. Numbers like 0.2 cannot be represented
				1182	// exactly in binary. If we print 0.2 with a very large precision,
				1183	// e.g. "%.50g", we get "0.2000000000000000111022302462515654042363167".
				1184	// On the other hand, if we set the precision too low, we lose
				1185	// significant digits when printing numbers that actually need them.
				1186	// It turns out there is no precision value that does the right thing
				1187	// for all numbers.
				1188	//
				1189	// Our strategy is to first try printing with a precision that is never
				1190	// over-precise, then parse the result with strtod() to see if it
				1191	// matches. If not, we print again with a precision that will always
				1192	// give a precise result, but may use more digits than necessary.
				1193	//
				1194	// An arguably better strategy would be to use the algorithm described
				1195	// in "How to Print Floating-Point Numbers Accurately" by Steele &
				1196	// White, e.g. as implemented by David M. Gay's dtoa(). It turns out,
				1197	// however, that the following implementation is about as fast as
				1198	// DMG's code. Furthermore, DMG's code locks mutexes, which means it
				1199	// will not scale well on multi-core machines. DMG's code is slightly
				1200	// more accurate (in that it will never use more digits than
				1201	// necessary), but this is probably irrelevant for most users.
				1202	//
				1203	// Rob Pike and Ken Thompson also have an implementation of dtoa() in
				1204	// third_party/fmt/fltfmt.cc. Their implementation is similar to this
				1205	// one in that it makes guesses and then uses strtod() to check them.
				1206	// Their implementation is faster because they use their own code to
				1207	// generate the digits in the first place rather than use snprintf(),
				1208	// thus avoiding format string parsing overhead. However, this makes
				1209	// it considerably more complicated than the following implementation,
				1210	// and it is embedded in a larger library. If speed turns out to be
				1211	// an issue, we could re-implement this in terms of their
				1212	// implementation.
				1213	// ----------------------------------------------------------------------
				1214
				1215	string SimpleDtoa(double value) {
				1216	char buffer[kDoubleToBufferSize];
				1217	return DoubleToBuffer(value, buffer);
				1218	}
				1219
				1220	string SimpleFtoa(float value) {
				1221	char buffer[kFloatToBufferSize];
				1222	return FloatToBuffer(value, buffer);
				1223	}
				1224
				1225	static inline bool IsValidFloatChar(char c) {
				1226	return ('0' <= c && c <= '9') \|\|
				1227	c == 'e' \|\| c == 'E' \|\|
				1228	c == '+' \|\| c == '-';
				1229	}
				1230
				1231	void DelocalizeRadix(char* buffer) {
				1232	// Fast check: if the buffer has a normal decimal point, assume no
				1233	// translation is needed.
				1234	if (strchr(buffer, '.') != NULL) return;
				1235
				1236	// Find the first unknown character.
				1237	while (IsValidFloatChar(*buffer)) ++buffer;
				1238
				1239	if (*buffer == '\0') {
				1240	// No radix character found.
				1241	return;
				1242	}
				1243
				1244	// We are now pointing at the locale-specific radix character. Replace it
				1245	// with '.'.
				1246	*buffer = '.';
				1247	++buffer;
				1248
				1249	if (!IsValidFloatChar(buffer) && buffer != '\0') {
				1250	// It appears the radix was a multi-byte character. We need to remove the
				1251	// extra bytes.
				1252	char* target = buffer;
				1253	do { ++buffer; } while (!IsValidFloatChar(buffer) && buffer != '\0');
				1254	memmove(target, buffer, strlen(buffer) + 1);
				1255	}
				1256	}
				1257
				1258	char* DoubleToBuffer(double value, char* buffer) {
				1259	// DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
				1260	// platforms these days. Just in case some system exists where DBL_DIG
				1261	// is significantly larger -- and risks overflowing our buffer -- we have
				1262	// this assert.
				1263	GOOGLE_COMPILE_ASSERT(DBL_DIG < 20, DBL_DIG_is_too_big);
				1264
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1265	if (value == std::numeric_limits<double>::infinity()) {
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1266	strcpy(buffer, "inf");
				1267	return buffer;
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1268	} else if (value == -std::numeric_limits<double>::infinity()) {
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1269	strcpy(buffer, "-inf");
				1270	return buffer;
				1271	} else if (MathLimits<double>::IsNaN(value)) {
				1272	strcpy(buffer, "nan");
				1273	return buffer;
				1274	}
				1275
				1276	int snprintf_result =
				1277	snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG, value);
				1278
				1279	// The snprintf should never overflow because the buffer is significantly
				1280	// larger than the precision we asked for.
				1281	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
				1282
				1283	// We need to make parsed_value volatile in order to force the compiler to
				1284	// write it out to the stack. Otherwise, it may keep the value in a
				1285	// register, and if it does that, it may keep it as a long double instead
				1286	// of a double. This long double may have extra bits that make it compare
				1287	// unequal to "value" even though it would be exactly equal if it were
				1288	// truncated to a double.
				1289	volatile double parsed_value = strtod(buffer, NULL);
				1290	if (parsed_value != value) {
				1291	int snprintf_result =
				1292	snprintf(buffer, kDoubleToBufferSize, "%.*g", DBL_DIG+2, value);
				1293
				1294	// Should never overflow; see above.
				1295	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kDoubleToBufferSize);
				1296	}
				1297
				1298	DelocalizeRadix(buffer);
				1299	return buffer;
				1300	}
				1301
				1302	static int memcasecmp(const char s1, const char s2, size_t len) {
				1303	const unsigned char us1 = reinterpret_cast<const unsigned char >(s1);
				1304	const unsigned char us2 = reinterpret_cast<const unsigned char >(s2);
				1305
				1306	for ( int i = 0; i < len; i++ ) {
				1307	const int diff =
				1308	static_cast<int>(static_cast<unsigned char>(ascii_tolower(us1[i]))) -
				1309	static_cast<int>(static_cast<unsigned char>(ascii_tolower(us2[i])));
				1310	if (diff != 0) return diff;
				1311	}
				1312	return 0;
				1313	}
				1314
				1315	inline bool CaseEqual(StringPiece s1, StringPiece s2) {
				1316	if (s1.size() != s2.size()) return false;
				1317	return memcasecmp(s1.data(), s2.data(), s1.size()) == 0;
				1318	}
				1319
				1320	bool safe_strtob(StringPiece str, bool* value) {
				1321	GOOGLE_CHECK(value != NULL) << "NULL output boolean given.";
				1322	if (CaseEqual(str, "true") \|\| CaseEqual(str, "t") \|\|
				1323	CaseEqual(str, "yes") \|\| CaseEqual(str, "y") \|\|
				1324	CaseEqual(str, "1")) {
				1325	*value = true;
				1326	return true;
				1327	}
				1328	if (CaseEqual(str, "false") \|\| CaseEqual(str, "f") \|\|
				1329	CaseEqual(str, "no") \|\| CaseEqual(str, "n") \|\|
				1330	CaseEqual(str, "0")) {
				1331	*value = false;
				1332	return true;
				1333	}
				1334	return false;
				1335	}
				1336
				1337	bool safe_strtof(const char* str, float* value) {
				1338	char* endptr;
				1339	errno = 0; // errno only gets set on errors
				1340	#if defined(_WIN32) \|\| defined (__hpux) // has no strtof()
				1341	*value = strtod(str, &endptr);
				1342	#else
				1343	*value = strtof(str, &endptr);
				1344	#endif
				1345	return str != 0 && endptr == 0 && errno == 0;
				1346	}
				1347
				1348	bool safe_strtod(const char* str, double* value) {
				1349	char* endptr;
				1350	*value = strtod(str, &endptr);
				1351	if (endptr != str) {
				1352	while (ascii_isspace(*endptr)) ++endptr;
				1353	}
				1354	// Ignore range errors from strtod. The values it
				1355	// returns on underflow and overflow are the right
				1356	// fallback in a robust setting.
				1357	return str != '\0' && endptr == '\0';
				1358	}
				1359
				1360	bool safe_strto32(const string& str, int32* value) {
				1361	return safe_int_internal(str, value);
				1362	}
				1363
				1364	bool safe_strtou32(const string& str, uint32* value) {
				1365	return safe_uint_internal(str, value);
				1366	}
				1367
				1368	bool safe_strto64(const string& str, int64* value) {
				1369	return safe_int_internal(str, value);
				1370	}
				1371
				1372	bool safe_strtou64(const string& str, uint64* value) {
				1373	return safe_uint_internal(str, value);
				1374	}
				1375
				1376	char* FloatToBuffer(float value, char* buffer) {
				1377	// FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
				1378	// platforms these days. Just in case some system exists where FLT_DIG
				1379	// is significantly larger -- and risks overflowing our buffer -- we have
				1380	// this assert.
				1381	GOOGLE_COMPILE_ASSERT(FLT_DIG < 10, FLT_DIG_is_too_big);
				1382
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1383	if (value == std::numeric_limits<double>::infinity()) {
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1384	strcpy(buffer, "inf");
				1385	return buffer;
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1386	} else if (value == -std::numeric_limits<double>::infinity()) {
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1387	strcpy(buffer, "-inf");
				1388	return buffer;
				1389	} else if (MathLimits<float>::IsNaN(value)) {
				1390	strcpy(buffer, "nan");
				1391	return buffer;
				1392	}
				1393
				1394	int snprintf_result =
				1395	snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG, value);
				1396
				1397	// The snprintf should never overflow because the buffer is significantly
				1398	// larger than the precision we asked for.
				1399	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
				1400
				1401	float parsed_value;
				1402	if (!safe_strtof(buffer, &parsed_value) \|\| parsed_value != value) {
				1403	int snprintf_result =
Austin Schuh	40c1652	2018-10-28 20:27:54 -0700	[diff] [blame^]	1404	snprintf(buffer, kFloatToBufferSize, "%.*g", FLT_DIG+3, value);
Brian Silverman	9c614bc	2016-02-15 20:20:02 -0500	[diff] [blame]	1405
				1406	// Should never overflow; see above.
				1407	GOOGLE_DCHECK(snprintf_result > 0 && snprintf_result < kFloatToBufferSize);
				1408	}
				1409
				1410	DelocalizeRadix(buffer);
				1411	return buffer;
				1412	}
				1413
				1414	namespace strings {
				1415
				1416	AlphaNum::AlphaNum(strings::Hex hex) {
				1417	char *const end = &digits[kFastToBufferSize];
				1418	char *writer = end;
				1419	uint64 value = hex.value;
				1420	uint64 width = hex.spec;
				1421	// We accomplish minimum width by OR'ing in 0x10000 to the user's value,
				1422	// where 0x10000 is the smallest hex number that is as wide as the user
				1423	// asked for.
				1424	uint64 mask = ((static_cast<uint64>(1) << (width - 1) * 4)) \| value;
				1425	static const char hexdigits[] = "0123456789abcdef";
				1426	do {
				1427	*--writer = hexdigits[value & 0xF];
				1428	value >>= 4;
				1429	mask >>= 4;
				1430	} while (mask != 0);
				1431	piece_data_ = writer;
				1432	piece_size_ = end - writer;
				1433	}
				1434
				1435	} // namespace strings
				1436
				1437	// ----------------------------------------------------------------------
				1438	// StrCat()
				1439	// This merges the given strings or integers, with no delimiter. This
				1440	// is designed to be the fastest possible way to construct a string out
				1441	// of a mix of raw C strings, C++ strings, and integer values.
				1442	// ----------------------------------------------------------------------
				1443
				1444	// Append is merely a version of memcpy that returns the address of the byte
				1445	// after the area just overwritten. It comes in multiple flavors to minimize
				1446	// call overhead.
				1447	static char Append1(char out, const AlphaNum &x) {
				1448	memcpy(out, x.data(), x.size());
				1449	return out + x.size();
				1450	}
				1451
				1452	static char Append2(char out, const AlphaNum &x1, const AlphaNum &x2) {
				1453	memcpy(out, x1.data(), x1.size());
				1454	out += x1.size();
				1455
				1456	memcpy(out, x2.data(), x2.size());
				1457	return out + x2.size();
				1458	}
				1459
				1460	static char Append4(char out,
				1461	const AlphaNum &x1, const AlphaNum &x2,
				1462	const AlphaNum &x3, const AlphaNum &x4) {
				1463	memcpy(out, x1.data(), x1.size());
				1464	out += x1.size();
				1465
				1466	memcpy(out, x2.data(), x2.size());
				1467	out += x2.size();
				1468
				1469	memcpy(out, x3.data(), x3.size());
				1470	out += x3.size();
				1471
				1472	memcpy(out, x4.data(), x4.size());
				1473	return out + x4.size();
				1474	}
				1475
				1476	string StrCat(const AlphaNum &a, const AlphaNum &b) {
				1477	string result;
				1478	result.resize(a.size() + b.size());
				1479	char const begin = &result.begin();
				1480	char *out = Append2(begin, a, b);
				1481	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1482	return result;
				1483	}
				1484
				1485	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
				1486	string result;
				1487	result.resize(a.size() + b.size() + c.size());
				1488	char const begin = &result.begin();
				1489	char *out = Append2(begin, a, b);
				1490	out = Append1(out, c);
				1491	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1492	return result;
				1493	}
				1494
				1495	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1496	const AlphaNum &d) {
				1497	string result;
				1498	result.resize(a.size() + b.size() + c.size() + d.size());
				1499	char const begin = &result.begin();
				1500	char *out = Append4(begin, a, b, c, d);
				1501	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1502	return result;
				1503	}
				1504
				1505	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1506	const AlphaNum &d, const AlphaNum &e) {
				1507	string result;
				1508	result.resize(a.size() + b.size() + c.size() + d.size() + e.size());
				1509	char const begin = &result.begin();
				1510	char *out = Append4(begin, a, b, c, d);
				1511	out = Append1(out, e);
				1512	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1513	return result;
				1514	}
				1515
				1516	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1517	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f) {
				1518	string result;
				1519	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1520	f.size());
				1521	char const begin = &result.begin();
				1522	char *out = Append4(begin, a, b, c, d);
				1523	out = Append2(out, e, f);
				1524	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1525	return result;
				1526	}
				1527
				1528	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1529	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
				1530	const AlphaNum &g) {
				1531	string result;
				1532	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1533	f.size() + g.size());
				1534	char const begin = &result.begin();
				1535	char *out = Append4(begin, a, b, c, d);
				1536	out = Append2(out, e, f);
				1537	out = Append1(out, g);
				1538	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1539	return result;
				1540	}
				1541
				1542	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1543	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
				1544	const AlphaNum &g, const AlphaNum &h) {
				1545	string result;
				1546	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1547	f.size() + g.size() + h.size());
				1548	char const begin = &result.begin();
				1549	char *out = Append4(begin, a, b, c, d);
				1550	out = Append4(out, e, f, g, h);
				1551	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1552	return result;
				1553	}
				1554
				1555	string StrCat(const AlphaNum &a, const AlphaNum &b, const AlphaNum &c,
				1556	const AlphaNum &d, const AlphaNum &e, const AlphaNum &f,
				1557	const AlphaNum &g, const AlphaNum &h, const AlphaNum &i) {
				1558	string result;
				1559	result.resize(a.size() + b.size() + c.size() + d.size() + e.size() +
				1560	f.size() + g.size() + h.size() + i.size());
				1561	char const begin = &result.begin();
				1562	char *out = Append4(begin, a, b, c, d);
				1563	out = Append4(out, e, f, g, h);
				1564	out = Append1(out, i);
				1565	GOOGLE_DCHECK_EQ(out, begin + result.size());
				1566	return result;
				1567	}
				1568
				1569	// It's possible to call StrAppend with a char * pointer that is partway into
				1570	// the string we're appending to. However the results of this are random.
				1571	// Therefore, check for this in debug mode. Use unsigned math so we only have
				1572	// to do one comparison.
				1573	#define GOOGLE_DCHECK_NO_OVERLAP(dest, src) \
				1574	GOOGLE_DCHECK_GT(uintptr_t((src).data() - (dest).data()), \
				1575	uintptr_t((dest).size()))
				1576
				1577	void StrAppend(string *result, const AlphaNum &a) {
				1578	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1579	result->append(a.data(), a.size());
				1580	}
				1581
				1582	void StrAppend(string *result, const AlphaNum &a, const AlphaNum &b) {
				1583	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1584	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
				1585	string::size_type old_size = result->size();
				1586	result->resize(old_size + a.size() + b.size());
				1587	char const begin = &result->begin();
				1588	char *out = Append2(begin + old_size, a, b);
				1589	GOOGLE_DCHECK_EQ(out, begin + result->size());
				1590	}
				1591
				1592	void StrAppend(string *result,
				1593	const AlphaNum &a, const AlphaNum &b, const AlphaNum &c) {
				1594	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1595	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
				1596	GOOGLE_DCHECK_NO_OVERLAP(*result, c);
				1597	string::size_type old_size = result->size();
				1598	result->resize(old_size + a.size() + b.size() + c.size());
				1599	char const begin = &result->begin();
				1600	char *out = Append2(begin + old_size, a, b);
				1601	out = Append1(out, c);
				1602	GOOGLE_DCHECK_EQ(out, begin + result->size());
				1603	}
				1604
				1605	void StrAppend(string *result,
				1606	const AlphaNum &a, const AlphaNum &b,
				1607	const AlphaNum &c, const AlphaNum &d) {
				1608	GOOGLE_DCHECK_NO_OVERLAP(*result, a);
				1609	GOOGLE_DCHECK_NO_OVERLAP(*result, b);
				1610	GOOGLE_DCHECK_NO_OVERLAP(*result, c);
				1611	GOOGLE_DCHECK_NO_OVERLAP(*result, d);
				1612	string::size_type old_size = result->size();
				1613	result->resize(old_size + a.size() + b.size() + c.size() + d.size());
				1614	char const begin = &result->begin();
				1615	char *out = Append4(begin + old_size, a, b, c, d);
				1616	GOOGLE_DCHECK_EQ(out, begin + result->size());
				1617	}
				1618
				1619	int GlobalReplaceSubstring(const string& substring,
				1620	const string& replacement,
				1621	string* s) {
				1622	GOOGLE_CHECK(s != NULL);
				1623	if (s->empty() \|\| substring.empty())
				1624	return 0;
				1625	string tmp;
				1626	int num_replacements = 0;
				1627	int pos = 0;
				1628	for (int match_pos = s->find(substring.data(), pos, substring.length());
				1629	match_pos != string::npos;
				1630	pos = match_pos + substring.length(),
				1631	match_pos = s->find(substring.data(), pos, substring.length())) {
				1632	++num_replacements;
				1633	// Append the original content before the match.
				1634	tmp.append(*s, pos, match_pos - pos);
				1635	// Append the replacement for the match.
				1636	tmp.append(replacement.begin(), replacement.end());
				1637	}
				1638	// Append the content after the last match. If no replacements were made, the
				1639	// original string is left untouched.
				1640	if (num_replacements > 0) {
				1641	tmp.append(*s, pos, s->length() - pos);
				1642	s->swap(tmp);
				1643	}
				1644	return num_replacements;
				1645	}
				1646
				1647	int CalculateBase64EscapedLen(int input_len, bool do_padding) {
				1648	// Base64 encodes three bytes of input at a time. If the input is not
				1649	// divisible by three, we pad as appropriate.
				1650	//
				1651	// (from http://tools.ietf.org/html/rfc3548)
				1652	// Special processing is performed if fewer than 24 bits are available
				1653	// at the end of the data being encoded. A full encoding quantum is
				1654	// always completed at the end of a quantity. When fewer than 24 input
				1655	// bits are available in an input group, zero bits are added (on the
				1656	// right) to form an integral number of 6-bit groups. Padding at the
				1657	// end of the data is performed using the '=' character. Since all base
				1658	// 64 input is an integral number of octets, only the following cases
				1659	// can arise:
				1660
				1661
				1662	// Base64 encodes each three bytes of input into four bytes of output.
				1663	int len = (input_len / 3) * 4;
				1664
				1665	if (input_len % 3 == 0) {
				1666	// (from http://tools.ietf.org/html/rfc3548)
				1667	// (1) the final quantum of encoding input is an integral multiple of 24
				1668	// bits; here, the final unit of encoded output will be an integral
				1669	// multiple of 4 characters with no "=" padding,
				1670	} else if (input_len % 3 == 1) {
				1671	// (from http://tools.ietf.org/html/rfc3548)
				1672	// (2) the final quantum of encoding input is exactly 8 bits; here, the
				1673	// final unit of encoded output will be two characters followed by two
				1674	// "=" padding characters, or
				1675	len += 2;
				1676	if (do_padding) {
				1677	len += 2;
				1678	}
				1679	} else { // (input_len % 3 == 2)
				1680	// (from http://tools.ietf.org/html/rfc3548)
				1681	// (3) the final quantum of encoding input is exactly 16 bits; here, the
				1682	// final unit of encoded output will be three characters followed by one
				1683	// "=" padding character.
				1684	len += 3;
				1685	if (do_padding) {
				1686	len += 1;
				1687	}
				1688	}
				1689
				1690	assert(len >= input_len); // make sure we didn't overflow
				1691	return len;
				1692	}
				1693
				1694	// Base64Escape does padding, so this calculation includes padding.
				1695	int CalculateBase64EscapedLen(int input_len) {
				1696	return CalculateBase64EscapedLen(input_len, true);
				1697	}
				1698
				1699	// ----------------------------------------------------------------------
				1700	// int Base64Unescape() - base64 decoder
				1701	// int Base64Escape() - base64 encoder
				1702	// int WebSafeBase64Unescape() - Google's variation of base64 decoder
				1703	// int WebSafeBase64Escape() - Google's variation of base64 encoder
				1704	//
				1705	// Check out
				1706	// http://tools.ietf.org/html/rfc2045 for formal description, but what we
				1707	// care about is that...
				1708	// Take the encoded stuff in groups of 4 characters and turn each
				1709	// character into a code 0 to 63 thus:
				1710	// A-Z map to 0 to 25
				1711	// a-z map to 26 to 51
				1712	// 0-9 map to 52 to 61
				1713	// +(- for WebSafe) maps to 62
				1714	// /(_ for WebSafe) maps to 63
				1715	// There will be four numbers, all less than 64 which can be represented
				1716	// by a 6 digit binary number (aaaaaa, bbbbbb, cccccc, dddddd respectively).
				1717	// Arrange the 6 digit binary numbers into three bytes as such:
				1718	// aaaaaabb bbbbcccc ccdddddd
				1719	// Equals signs (one or two) are used at the end of the encoded block to
				1720	// indicate that the text was not an integer multiple of three bytes long.
				1721	// ----------------------------------------------------------------------
				1722
				1723	int Base64UnescapeInternal(const char *src_param, int szsrc,
				1724	char *dest, int szdest,
				1725	const signed char* unbase64) {
				1726	static const char kPad64Equals = '=';
				1727	static const char kPad64Dot = '.';
				1728
				1729	int decode = 0;
				1730	int destidx = 0;
				1731	int state = 0;
				1732	unsigned int ch = 0;
				1733	unsigned int temp = 0;
				1734
				1735	// If "char" is signed by default, using *src as an array index results in
				1736	// accessing negative array elements. Treat the input as a pointer to
				1737	// unsigned char to avoid this.
				1738	const unsigned char src = reinterpret_cast<const unsigned char>(src_param);
				1739
				1740	// The GET_INPUT macro gets the next input character, skipping
				1741	// over any whitespace, and stopping when we reach the end of the
				1742	// string or when we read any non-data character. The arguments are
				1743	// an arbitrary identifier (used as a label for goto) and the number
				1744	// of data bytes that must remain in the input to avoid aborting the
				1745	// loop.
				1746	#define GET_INPUT(label, remain) \
				1747	label: \
				1748	--szsrc; \
				1749	ch = *src++; \
				1750	decode = unbase64[ch]; \
				1751	if (decode < 0) { \
				1752	if (ascii_isspace(ch) && szsrc >= remain) \
				1753	goto label; \
				1754	state = 4 - remain; \
				1755	break; \
				1756	}
				1757
				1758	// if dest is null, we're just checking to see if it's legal input
				1759	// rather than producing output. (I suspect this could just be done
				1760	// with a regexp...). We duplicate the loop so this test can be
				1761	// outside it instead of in every iteration.
				1762
				1763	if (dest) {
				1764	// This loop consumes 4 input bytes and produces 3 output bytes
				1765	// per iteration. We can't know at the start that there is enough
				1766	// data left in the string for a full iteration, so the loop may
				1767	// break out in the middle; if so 'state' will be set to the
				1768	// number of input bytes read.
				1769
				1770	while (szsrc >= 4) {
				1771	// We'll start by optimistically assuming that the next four
				1772	// bytes of the string (src[0..3]) are four good data bytes
				1773	// (that is, no nulls, whitespace, padding chars, or illegal
				1774	// chars). We need to test src[0..2] for nulls individually
				1775	// before constructing temp to preserve the property that we
				1776	// never read past a null in the string (no matter how long
				1777	// szsrc claims the string is).
				1778
				1779	if (!src[0] \|\| !src[1] \|\| !src[2] \|\|
				1780	(temp = ((unsigned(unbase64[src[0]]) << 18) \|
				1781	(unsigned(unbase64[src[1]]) << 12) \|
				1782	(unsigned(unbase64[src[2]]) << 6) \|
				1783	(unsigned(unbase64[src[3]])))) & 0x80000000) {
				1784	// Iff any of those four characters was bad (null, illegal,
				1785	// whitespace, padding), then temp's high bit will be set
				1786	// (because unbase64[] is -1 for all bad characters).
				1787	//
				1788	// We'll back up and resort to the slower decoder, which knows
				1789	// how to handle those cases.
				1790
				1791	GET_INPUT(first, 4);
				1792	temp = decode;
				1793	GET_INPUT(second, 3);
				1794	temp = (temp << 6) \| decode;
				1795	GET_INPUT(third, 2);
				1796	temp = (temp << 6) \| decode;
				1797	GET_INPUT(fourth, 1);
				1798	temp = (temp << 6) \| decode;
				1799	} else {
				1800	// We really did have four good data bytes, so advance four
				1801	// characters in the string.
				1802
				1803	szsrc -= 4;
				1804	src += 4;
				1805	decode = -1;
				1806	ch = '\0';
				1807	}
				1808
				1809	// temp has 24 bits of input, so write that out as three bytes.
				1810
				1811	if (destidx+3 > szdest) return -1;
				1812	dest[destidx+2] = temp;
				1813	temp >>= 8;
				1814	dest[destidx+1] = temp;
				1815	temp >>= 8;
				1816	dest[destidx] = temp;
				1817	destidx += 3;
				1818	}
				1819	} else {
				1820	while (szsrc >= 4) {
				1821	if (!src[0] \|\| !src[1] \|\| !src[2] \|\|
				1822	(temp = ((unsigned(unbase64[src[0]]) << 18) \|
				1823	(unsigned(unbase64[src[1]]) << 12) \|
				1824	(unsigned(unbase64[src[2]]) << 6) \|
				1825	(unsigned(unbase64[src[3]])))) & 0x80000000) {
				1826	GET_INPUT(first_no_dest, 4);
				1827	GET_INPUT(second_no_dest, 3);
				1828	GET_INPUT(third_no_dest, 2);
				1829	GET_INPUT(fourth_no_dest, 1);
				1830	} else {
				1831	szsrc -= 4;
				1832	src += 4;
				1833	decode = -1;
				1834	ch = '\0';
				1835	}
				1836	destidx += 3;
				1837	}
				1838	}
				1839
				1840	#undef GET_INPUT
				1841
				1842	// if the loop terminated because we read a bad character, return
				1843	// now.
				1844	if (decode < 0 && ch != '\0' &&
				1845	ch != kPad64Equals && ch != kPad64Dot && !ascii_isspace(ch))
				1846	return -1;
				1847
				1848	if (ch == kPad64Equals \|\| ch == kPad64Dot) {
				1849	// if we stopped by hitting an '=' or '.', un-read that character -- we'll
				1850	// look at it again when we count to check for the proper number of
				1851	// equals signs at the end.
				1852	++szsrc;
				1853	--src;
				1854	} else {
				1855	// This loop consumes 1 input byte per iteration. It's used to
				1856	// clean up the 0-3 input bytes remaining when the first, faster
				1857	// loop finishes. 'temp' contains the data from 'state' input
				1858	// characters read by the first loop.
				1859	while (szsrc > 0) {
				1860	--szsrc;
				1861	ch = *src++;
				1862	decode = unbase64[ch];
				1863	if (decode < 0) {
				1864	if (ascii_isspace(ch)) {
				1865	continue;
				1866	} else if (ch == '\0') {
				1867	break;
				1868	} else if (ch == kPad64Equals \|\| ch == kPad64Dot) {
				1869	// back up one character; we'll read it again when we check
				1870	// for the correct number of pad characters at the end.
				1871	++szsrc;
				1872	--src;
				1873	break;
				1874	} else {
				1875	return -1;
				1876	}
				1877	}
				1878
				1879	// Each input character gives us six bits of output.
				1880	temp = (temp << 6) \| decode;
				1881	++state;
				1882	if (state == 4) {
				1883	// If we've accumulated 24 bits of output, write that out as
				1884	// three bytes.
				1885	if (dest) {
				1886	if (destidx+3 > szdest) return -1;
				1887	dest[destidx+2] = temp;
				1888	temp >>= 8;
				1889	dest[destidx+1] = temp;
				1890	temp >>= 8;
				1891	dest[destidx] = temp;
				1892	}
				1893	destidx += 3;
				1894	state = 0;
				1895	temp = 0;
				1896	}
				1897	}
				1898	}
				1899
				1900	// Process the leftover data contained in 'temp' at the end of the input.
				1901	int expected_equals = 0;
				1902	switch (state) {
				1903	case 0:
				1904	// Nothing left over; output is a multiple of 3 bytes.
				1905	break;
				1906
				1907	case 1:
				1908	// Bad input; we have 6 bits left over.
				1909	return -1;
				1910
				1911	case 2:
				1912	// Produce one more output byte from the 12 input bits we have left.
				1913	if (dest) {
				1914	if (destidx+1 > szdest) return -1;
				1915	temp >>= 4;
				1916	dest[destidx] = temp;
				1917	}
				1918	++destidx;
				1919	expected_equals = 2;
				1920	break;
				1921
				1922	case 3:
				1923	// Produce two more output bytes from the 18 input bits we have left.
				1924	if (dest) {
				1925	if (destidx+2 > szdest) return -1;
				1926	temp >>= 2;
				1927	dest[destidx+1] = temp;
				1928	temp >>= 8;
				1929	dest[destidx] = temp;
				1930	}
				1931	destidx += 2;
				1932	expected_equals = 1;
				1933	break;
				1934
				1935	default:
				1936	// state should have no other values at this point.
				1937	GOOGLE_LOG(FATAL) << "This can't happen; base64 decoder state = " << state;
				1938	}
				1939
				1940	// The remainder of the string should be all whitespace, mixed with
				1941	// exactly 0 equals signs, or exactly 'expected_equals' equals
				1942	// signs. (Always accepting 0 equals signs is a google extension
				1943	// not covered in the RFC, as is accepting dot as the pad character.)
				1944
				1945	int equals = 0;
				1946	while (szsrc > 0 && *src) {
				1947	if (src == kPad64Equals \|\| src == kPad64Dot)
				1948	++equals;
				1949	else if (!ascii_isspace(*src))
				1950	return -1;
				1951	--szsrc;
				1952	++src;
				1953	}
				1954
				1955	return (equals == 0 \|\| equals == expected_equals) ? destidx : -1;
				1956	}
				1957
				1958	// The arrays below were generated by the following code
				1959	// #include <sys/time.h>
				1960	// #include <stdlib.h>
				1961	// #include <string.h>
				1962	// main()
				1963	// {
				1964	// static const char Base64[] =
				1965	// "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
				1966	// char *pos;
				1967	// int idx, i, j;
				1968	// printf(" ");
				1969	// for (i = 0; i < 255; i += 8) {
				1970	// for (j = i; j < i + 8; j++) {
				1971	// pos = strchr(Base64, j);
				1972	// if ((pos == NULL) \|\| (j == 0))
				1973	// idx = -1;
				1974	// else
				1975	// idx = pos - Base64;
				1976	// if (idx == -1)
				1977	// printf(" %2d, ", idx);
				1978	// else
				1979	// printf(" %2d/%c/,", idx, j);
				1980	// }
				1981	// printf("\n ");
				1982	// }
				1983	// }
				1984	//
				1985	// where the value of "Base64[]" was replaced by one of the base-64 conversion
				1986	// tables from the functions below.
				1987	static const signed char kUnBase64[] = {
				1988	-1, -1, -1, -1, -1, -1, -1, -1,
				1989	-1, -1, -1, -1, -1, -1, -1, -1,
				1990	-1, -1, -1, -1, -1, -1, -1, -1,
				1991	-1, -1, -1, -1, -1, -1, -1, -1,
				1992	-1, -1, -1, -1, -1, -1, -1, -1,
				1993	-1, -1, -1, 62/+/, -1, -1, -1, 63// /,
				1994	52/0/, 53/1/, 54/2/, 55/3/, 56/4/, 57/5/, 58/6/, 59/7/,
				1995	60/8/, 61/9/, -1, -1, -1, -1, -1, -1,
				1996	-1, 0/A/, 1/B/, 2/C/, 3/D/, 4/E/, 5/F/, 6/G/,
				1997	07/H/, 8/I/, 9/J/, 10/K/, 11/L/, 12/M/, 13/N/, 14/O/,
				1998	15/P/, 16/Q/, 17/R/, 18/S/, 19/T/, 20/U/, 21/V/, 22/W/,
				1999	23/X/, 24/Y/, 25/Z/, -1, -1, -1, -1, -1,
				2000	-1, 26/a/, 27/b/, 28/c/, 29/d/, 30/e/, 31/f/, 32/g/,
				2001	33/h/, 34/i/, 35/j/, 36/k/, 37/l/, 38/m/, 39/n/, 40/o/,
				2002	41/p/, 42/q/, 43/r/, 44/s/, 45/t/, 46/u/, 47/v/, 48/w/,
				2003	49/x/, 50/y/, 51/z/, -1, -1, -1, -1, -1,
				2004	-1, -1, -1, -1, -1, -1, -1, -1,
				2005	-1, -1, -1, -1, -1, -1, -1, -1,
				2006	-1, -1, -1, -1, -1, -1, -1, -1,
				2007	-1, -1, -1, -1, -1, -1, -1, -1,
				2008	-1, -1, -1, -1, -1, -1, -1, -1,
				2009	-1, -1, -1, -1, -1, -1, -1, -1,
				2010	-1, -1, -1, -1, -1, -1, -1, -1,
				2011	-1, -1, -1, -1, -1, -1, -1, -1,
				2012	-1, -1, -1, -1, -1, -1, -1, -1,
				2013	-1, -1, -1, -1, -1, -1, -1, -1,
				2014	-1, -1, -1, -1, -1, -1, -1, -1,
				2015	-1, -1, -1, -1, -1, -1, -1, -1,
				2016	-1, -1, -1, -1, -1, -1, -1, -1,
				2017	-1, -1, -1, -1, -1, -1, -1, -1,
				2018	-1, -1, -1, -1, -1, -1, -1, -1,
				2019	-1, -1, -1, -1, -1, -1, -1, -1
				2020	};
				2021	static const signed char kUnWebSafeBase64[] = {
				2022	-1, -1, -1, -1, -1, -1, -1, -1,
				2023	-1, -1, -1, -1, -1, -1, -1, -1,
				2024	-1, -1, -1, -1, -1, -1, -1, -1,
				2025	-1, -1, -1, -1, -1, -1, -1, -1,
				2026	-1, -1, -1, -1, -1, -1, -1, -1,
				2027	-1, -1, -1, -1, -1, 62/-/, -1, -1,
				2028	52/0/, 53/1/, 54/2/, 55/3/, 56/4/, 57/5/, 58/6/, 59/7/,
				2029	60/8/, 61/9/, -1, -1, -1, -1, -1, -1,
				2030	-1, 0/A/, 1/B/, 2/C/, 3/D/, 4/E/, 5/F/, 6/G/,
				2031	07/H/, 8/I/, 9/J/, 10/K/, 11/L/, 12/M/, 13/N/, 14/O/,
				2032	15/P/, 16/Q/, 17/R/, 18/S/, 19/T/, 20/U/, 21/V/, 22/W/,
				2033	23/X/, 24/Y/, 25/Z/, -1, -1, -1, -1, 63/_/,
				2034	-1, 26/a/, 27/b/, 28/c/, 29/d/, 30/e/, 31/f/, 32/g/,
				2035	33/h/, 34/i/, 35/j/, 36/k/, 37/l/, 38/m/, 39/n/, 40/o/,
				2036	41/p/, 42/q/, 43/r/, 44/s/, 45/t/, 46/u/, 47/v/, 48/w/,
				2037	49/x/, 50/y/, 51/z/, -1, -1, -1, -1, -1,
				2038	-1, -1, -1, -1, -1, -1, -1, -1,
				2039	-1, -1, -1, -1, -1, -1, -1, -1,
				2040	-1, -1, -1, -1, -1, -1, -1, -1,
				2041	-1, -1, -1, -1, -1, -1, -1, -1,
				2042	-1, -1, -1, -1, -1, -1, -1, -1,
				2043	-1, -1, -1, -1, -1, -1, -1, -1,
				2044	-1, -1, -1, -1, -1, -1, -1, -1,
				2045	-1, -1, -1, -1, -1, -1, -1, -1,
				2046	-1, -1, -1, -1, -1, -1, -1, -1,
				2047	-1, -1, -1, -1, -1, -1, -1, -1,
				2048	-1, -1, -1, -1, -1, -1, -1, -1,
				2049	-1, -1, -1, -1, -1, -1, -1, -1,
				2050	-1, -1, -1, -1, -1, -1, -1, -1,
				2051	-1, -1, -1, -1, -1, -1, -1, -1,
				2052	-1, -1, -1, -1, -1, -1, -1, -1,
				2053	-1, -1, -1, -1, -1, -1, -1, -1
				2054	};
				2055
				2056	int WebSafeBase64Unescape(const char src, int szsrc, char dest, int szdest) {
				2057	return Base64UnescapeInternal(src, szsrc, dest, szdest, kUnWebSafeBase64);
				2058	}
				2059
				2060	static bool Base64UnescapeInternal(const char* src, int slen, string* dest,
				2061	const signed char* unbase64) {
				2062	// Determine the size of the output string. Base64 encodes every 3 bytes into
				2063	// 4 characters. any leftover chars are added directly for good measure.
				2064	// This is documented in the base64 RFC: http://tools.ietf.org/html/rfc3548
				2065	const int dest_len = 3 * (slen / 4) + (slen % 4);
				2066
				2067	dest->resize(dest_len);
				2068
				2069	// We are getting the destination buffer by getting the beginning of the
				2070	// string and converting it into a char *.
				2071	const int len = Base64UnescapeInternal(src, slen, string_as_array(dest),
				2072	dest_len, unbase64);
				2073	if (len < 0) {
				2074	dest->clear();
				2075	return false;
				2076	}
				2077
				2078	// could be shorter if there was padding
				2079	GOOGLE_DCHECK_LE(len, dest_len);
				2080	dest->erase(len);
				2081
				2082	return true;
				2083	}
				2084
				2085	bool Base64Unescape(StringPiece src, string* dest) {
				2086	return Base64UnescapeInternal(src.data(), src.size(), dest, kUnBase64);
				2087	}
				2088
				2089	bool WebSafeBase64Unescape(StringPiece src, string* dest) {
				2090	return Base64UnescapeInternal(src.data(), src.size(), dest, kUnWebSafeBase64);
				2091	}
				2092
				2093	int Base64EscapeInternal(const unsigned char *src, int szsrc,
				2094	char dest, int szdest, const char base64,
				2095	bool do_padding) {
				2096	static const char kPad64 = '=';
				2097
				2098	if (szsrc <= 0) return 0;
				2099
				2100	if (szsrc * 4 > szdest * 3) return 0;
				2101
				2102	char *cur_dest = dest;
				2103	const unsigned char *cur_src = src;
				2104
				2105	char *limit_dest = dest + szdest;
				2106	const unsigned char *limit_src = src + szsrc;
				2107
				2108	// Three bytes of data encodes to four characters of cyphertext.
				2109	// So we can pump through three-byte chunks atomically.
				2110	while (cur_src < limit_src - 3) { // keep going as long as we have >= 32 bits
				2111	uint32 in = BigEndian::Load32(cur_src) >> 8;
				2112
				2113	cur_dest[0] = base64[in >> 18];
				2114	in &= 0x3FFFF;
				2115	cur_dest[1] = base64[in >> 12];
				2116	in &= 0xFFF;
				2117	cur_dest[2] = base64[in >> 6];
				2118	in &= 0x3F;
				2119	cur_dest[3] = base64[in];
				2120
				2121	cur_dest += 4;
				2122	cur_src += 3;
				2123	}
				2124	// To save time, we didn't update szdest or szsrc in the loop. So do it now.
				2125	szdest = limit_dest - cur_dest;
				2126	szsrc = limit_src - cur_src;
				2127
				2128	/* now deal with the tail (<=3 bytes) */
				2129	switch (szsrc) {
				2130	case 0:
				2131	// Nothing left; nothing more to do.
				2132	break;
				2133	case 1: {
				2134	// One byte left: this encodes to two characters, and (optionally)
				2135	// two pad characters to round out the four-character cypherblock.
				2136	if ((szdest -= 2) < 0) return 0;
				2137	uint32 in = cur_src[0];
				2138	cur_dest[0] = base64[in >> 2];
				2139	in &= 0x3;
				2140	cur_dest[1] = base64[in << 4];
				2141	cur_dest += 2;
				2142	if (do_padding) {
				2143	if ((szdest -= 2) < 0) return 0;
				2144	cur_dest[0] = kPad64;
				2145	cur_dest[1] = kPad64;
				2146	cur_dest += 2;
				2147	}
				2148	break;
				2149	}
				2150	case 2: {
				2151	// Two bytes left: this encodes to three characters, and (optionally)
				2152	// one pad character to round out the four-character cypherblock.
				2153	if ((szdest -= 3) < 0) return 0;
				2154	uint32 in = BigEndian::Load16(cur_src);
				2155	cur_dest[0] = base64[in >> 10];
				2156	in &= 0x3FF;
				2157	cur_dest[1] = base64[in >> 4];
				2158	in &= 0x00F;
				2159	cur_dest[2] = base64[in << 2];
				2160	cur_dest += 3;
				2161	if (do_padding) {
				2162	if ((szdest -= 1) < 0) return 0;
				2163	cur_dest[0] = kPad64;
				2164	cur_dest += 1;
				2165	}
				2166	break;
				2167	}
				2168	case 3: {
				2169	// Three bytes left: same as in the big loop above. We can't do this in
				2170	// the loop because the loop above always reads 4 bytes, and the fourth
				2171	// byte is past the end of the input.
				2172	if ((szdest -= 4) < 0) return 0;
				2173	uint32 in = (cur_src[0] << 16) + BigEndian::Load16(cur_src + 1);
				2174	cur_dest[0] = base64[in >> 18];
				2175	in &= 0x3FFFF;
				2176	cur_dest[1] = base64[in >> 12];
				2177	in &= 0xFFF;
				2178	cur_dest[2] = base64[in >> 6];
				2179	in &= 0x3F;
				2180	cur_dest[3] = base64[in];
				2181	cur_dest += 4;
				2182	break;
				2183	}
				2184	default:
				2185	// Should not be reached: blocks of 4 bytes are handled
				2186	// in the while loop before this switch statement.
				2187	GOOGLE_LOG(FATAL) << "Logic problem? szsrc = " << szsrc;
				2188	break;
				2189	}
				2190	return (cur_dest - dest);
				2191	}
				2192
				2193	static const char kBase64Chars[] =
				2194	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
				2195
				2196	static const char kWebSafeBase64Chars[] =
				2197	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
				2198
				2199	int Base64Escape(const unsigned char src, int szsrc, char dest, int szdest) {
				2200	return Base64EscapeInternal(src, szsrc, dest, szdest, kBase64Chars, true);
				2201	}
				2202	int WebSafeBase64Escape(const unsigned char src, int szsrc, char dest,
				2203	int szdest, bool do_padding) {
				2204	return Base64EscapeInternal(src, szsrc, dest, szdest,
				2205	kWebSafeBase64Chars, do_padding);
				2206	}
				2207
				2208	void Base64EscapeInternal(const unsigned char* src, int szsrc,
				2209	string* dest, bool do_padding,
				2210	const char* base64_chars) {
				2211	const int calc_escaped_size =
				2212	CalculateBase64EscapedLen(szsrc, do_padding);
				2213	dest->resize(calc_escaped_size);
				2214	const int escaped_len = Base64EscapeInternal(src, szsrc,
				2215	string_as_array(dest),
				2216	dest->size(),
				2217	base64_chars,
				2218	do_padding);
				2219	GOOGLE_DCHECK_EQ(calc_escaped_size, escaped_len);
				2220	dest->erase(escaped_len);
				2221	}
				2222
				2223	void Base64Escape(const unsigned char *src, int szsrc,
				2224	string* dest, bool do_padding) {
				2225	Base64EscapeInternal(src, szsrc, dest, do_padding, kBase64Chars);
				2226	}
				2227
				2228	void WebSafeBase64Escape(const unsigned char *src, int szsrc,
				2229	string *dest, bool do_padding) {
				2230	Base64EscapeInternal(src, szsrc, dest, do_padding, kWebSafeBase64Chars);
				2231	}
				2232
				2233	void Base64Escape(StringPiece src, string* dest) {
				2234	Base64Escape(reinterpret_cast<const unsigned char*>(src.data()),
				2235	src.size(), dest, true);
				2236	}
				2237
				2238	void WebSafeBase64Escape(StringPiece src, string* dest) {
				2239	WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
				2240	src.size(), dest, false);
				2241	}
				2242
				2243	void WebSafeBase64EscapeWithPadding(StringPiece src, string* dest) {
				2244	WebSafeBase64Escape(reinterpret_cast<const unsigned char*>(src.data()),
				2245	src.size(), dest, true);
				2246	}
				2247
				2248	// Helper to append a Unicode code point to a string as UTF8, without bringing
				2249	// in any external dependencies.
				2250	int EncodeAsUTF8Char(uint32 code_point, char* output) {
				2251	uint32 tmp = 0;
				2252	int len = 0;
				2253	if (code_point <= 0x7f) {
				2254	tmp = code_point;
				2255	len = 1;
				2256	} else if (code_point <= 0x07ff) {
				2257	tmp = 0x0000c080 \|
				2258	((code_point & 0x07c0) << 2) \|
				2259	(code_point & 0x003f);
				2260	len = 2;
				2261	} else if (code_point <= 0xffff) {
				2262	tmp = 0x00e08080 \|
				2263	((code_point & 0xf000) << 4) \|
				2264	((code_point & 0x0fc0) << 2) \|
				2265	(code_point & 0x003f);
				2266	len = 3;
				2267	} else {
				2268	// UTF-16 is only defined for code points up to 0x10FFFF, and UTF-8 is
				2269	// normally only defined up to there as well.
				2270	tmp = 0xf0808080 \|
				2271	((code_point & 0x1c0000) << 6) \|
				2272	((code_point & 0x03f000) << 4) \|
				2273	((code_point & 0x000fc0) << 2) \|
				2274	(code_point & 0x003f);
				2275	len = 4;
				2276	}
				2277	tmp = ghtonl(tmp);
				2278	memcpy(output, reinterpret_cast<const char*>(&tmp) + sizeof(tmp) - len, len);
				2279	return len;
				2280	}
				2281
				2282	// Table of UTF-8 character lengths, based on first byte
				2283	static const unsigned char kUTF8LenTbl[256] = {
				2284	1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
				2285	1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
				2286	1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
				2287	1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
				2288
				2289	1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
				2290	1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,
				2291	2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,
				2292	3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4, 4,4,4,4,4,4,4,4
				2293	};
				2294
				2295	// Return length of a single UTF-8 source character
				2296	int UTF8FirstLetterNumBytes(const char* src, int len) {
				2297	if (len == 0) {
				2298	return 0;
				2299	}
				2300	return kUTF8LenTbl[reinterpret_cast<const uint8>(src)];
				2301	}
				2302
				2303	} // namespace protobuf
				2304	} // namespace google