Blame - src/llvm/StringRef.cpp - RealtimeRoboticsGroup/test

blob: f12318c162fe7f249aa41067c8355362abb6e7a5 [file] [log] [blame]

Brian Silverman	f7bd1c2	2015-12-24 16:07:11 -0800	[diff] [blame^]	1	//===-- StringRef.cpp - Lightweight String References ---------------------===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9
				10	#include "llvm/StringRef.h"
				11	#include "llvm/SmallVector.h"
				12	#include <bitset>
				13	#include <climits>
				14
				15	using namespace llvm;
				16
				17	// MSVC emits references to this into the translation units which reference it.
				18	#ifndef _MSC_VER
				19	const size_t StringRef::npos;
				20	#endif
				21
				22	static char ascii_tolower(char x) {
				23	if (x >= 'A' && x <= 'Z')
				24	return x - 'A' + 'a';
				25	return x;
				26	}
				27
				28	static char ascii_toupper(char x) {
				29	if (x >= 'a' && x <= 'z')
				30	return x - 'a' + 'A';
				31	return x;
				32	}
				33
				34	static bool ascii_isdigit(char x) {
				35	return x >= '0' && x <= '9';
				36	}
				37
				38	// strncasecmp() is not available on non-POSIX systems, so define an
				39	// alternative function here.
				40	static int ascii_strncasecmp(const char LHS, const char RHS, size_t Length) {
				41	for (size_t I = 0; I < Length; ++I) {
				42	unsigned char LHC = ascii_tolower(LHS[I]);
				43	unsigned char RHC = ascii_tolower(RHS[I]);
				44	if (LHC != RHC)
				45	return LHC < RHC ? -1 : 1;
				46	}
				47	return 0;
				48	}
				49
				50	/// compare_lower - Compare strings, ignoring case.
				51	int StringRef::compare_lower(StringRef RHS) const {
				52	if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
				53	return Res;
				54	if (Length == RHS.Length)
				55	return 0;
				56	return Length < RHS.Length ? -1 : 1;
				57	}
				58
				59	/// Check if this string starts with the given \p Prefix, ignoring case.
				60	bool StringRef::startswith_lower(StringRef Prefix) const {
				61	return Length >= Prefix.Length &&
				62	ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
				63	}
				64
				65	/// Check if this string ends with the given \p Suffix, ignoring case.
				66	bool StringRef::endswith_lower(StringRef Suffix) const {
				67	return Length >= Suffix.Length &&
				68	ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
				69	}
				70
				71	/// compare_numeric - Compare strings, handle embedded numbers.
				72	int StringRef::compare_numeric(StringRef RHS) const {
				73	for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
				74	// Check for sequences of digits.
				75	if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
				76	// The longer sequence of numbers is considered larger.
				77	// This doesn't really handle prefixed zeros well.
				78	size_t J;
				79	for (J = I + 1; J != E + 1; ++J) {
				80	bool ld = J < Length && ascii_isdigit(Data[J]);
				81	bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
				82	if (ld != rd)
				83	return rd ? -1 : 1;
				84	if (!rd)
				85	break;
				86	}
				87	// The two number sequences have the same length (J-I), just memcmp them.
				88	if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
				89	return Res < 0 ? -1 : 1;
				90	// Identical number sequences, continue search after the numbers.
				91	I = J - 1;
				92	continue;
				93	}
				94	if (Data[I] != RHS.Data[I])
				95	return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
				96	}
				97	if (Length == RHS.Length)
				98	return 0;
				99	return Length < RHS.Length ? -1 : 1;
				100	}
				101
				102	//===----------------------------------------------------------------------===//
				103	// String Operations
				104	//===----------------------------------------------------------------------===//
				105
				106	std::string StringRef::lower() const {
				107	std::string Result(size(), char());
				108	for (size_type i = 0, e = size(); i != e; ++i) {
				109	Result[i] = ascii_tolower(Data[i]);
				110	}
				111	return Result;
				112	}
				113
				114	std::string StringRef::upper() const {
				115	std::string Result(size(), char());
				116	for (size_type i = 0, e = size(); i != e; ++i) {
				117	Result[i] = ascii_toupper(Data[i]);
				118	}
				119	return Result;
				120	}
				121
				122	//===----------------------------------------------------------------------===//
				123	// String Searching
				124	//===----------------------------------------------------------------------===//
				125
				126
				127	/// find - Search for the first string \arg Str in the string.
				128	///
				129	/// \return - The index of the first occurrence of \arg Str, or npos if not
				130	/// found.
				131	size_t StringRef::find(StringRef Str, size_t From) const {
				132	size_t N = Str.size();
				133	if (N > Length)
				134	return npos;
				135
				136	// For short haystacks or unsupported needles fall back to the naive algorithm
				137	if (Length < 16 \|\| N > 255 \|\| N == 0) {
				138	for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
				139	if (substr(i, N).equals(Str))
				140	return i;
				141	return npos;
				142	}
				143
				144	if (From >= Length)
				145	return npos;
				146
				147	// Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
				148	uint8_t BadCharSkip[256];
				149	std::memset(BadCharSkip, N, 256);
				150	for (unsigned i = 0; i != N-1; ++i)
				151	BadCharSkip[(uint8_t)Str[i]] = N-1-i;
				152
				153	unsigned Len = Length-From, Pos = From;
				154	while (Len >= N) {
				155	if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
				156	return Pos;
				157
				158	// Otherwise skip the appropriate number of bytes.
				159	uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
				160	Len -= Skip;
				161	Pos += Skip;
				162	}
				163
				164	return npos;
				165	}
				166
				167	/// rfind - Search for the last string \arg Str in the string.
				168	///
				169	/// \return - The index of the last occurrence of \arg Str, or npos if not
				170	/// found.
				171	size_t StringRef::rfind(StringRef Str) const {
				172	size_t N = Str.size();
				173	if (N > Length)
				174	return npos;
				175	for (size_t i = Length - N + 1, e = 0; i != e;) {
				176	--i;
				177	if (substr(i, N).equals(Str))
				178	return i;
				179	}
				180	return npos;
				181	}
				182
				183	/// find_first_of - Find the first character in the string that is in \arg
				184	/// Chars, or npos if not found.
				185	///
				186	/// Note: O(size() + Chars.size())
				187	StringRef::size_type StringRef::find_first_of(StringRef Chars,
				188	size_t From) const {
				189	std::bitset<1 << CHAR_BIT> CharBits;
				190	for (size_type i = 0; i != Chars.size(); ++i)
				191	CharBits.set((unsigned char)Chars[i]);
				192
				193	for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
				194	if (CharBits.test((unsigned char)Data[i]))
				195	return i;
				196	return npos;
				197	}
				198
				199	/// find_first_not_of - Find the first character in the string that is not
				200	/// \arg C or npos if not found.
				201	StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
				202	for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
				203	if (Data[i] != C)
				204	return i;
				205	return npos;
				206	}
				207
				208	/// find_first_not_of - Find the first character in the string that is not
				209	/// in the string \arg Chars, or npos if not found.
				210	///
				211	/// Note: O(size() + Chars.size())
				212	StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
				213	size_t From) const {
				214	std::bitset<1 << CHAR_BIT> CharBits;
				215	for (size_type i = 0; i != Chars.size(); ++i)
				216	CharBits.set((unsigned char)Chars[i]);
				217
				218	for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
				219	if (!CharBits.test((unsigned char)Data[i]))
				220	return i;
				221	return npos;
				222	}
				223
				224	/// find_last_of - Find the last character in the string that is in \arg C,
				225	/// or npos if not found.
				226	///
				227	/// Note: O(size() + Chars.size())
				228	StringRef::size_type StringRef::find_last_of(StringRef Chars,
				229	size_t From) const {
				230	std::bitset<1 << CHAR_BIT> CharBits;
				231	for (size_type i = 0; i != Chars.size(); ++i)
				232	CharBits.set((unsigned char)Chars[i]);
				233
				234	for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
				235	if (CharBits.test((unsigned char)Data[i]))
				236	return i;
				237	return npos;
				238	}
				239
				240	/// find_last_not_of - Find the last character in the string that is not
				241	/// \arg C, or npos if not found.
				242	StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
				243	for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
				244	if (Data[i] != C)
				245	return i;
				246	return npos;
				247	}
				248
				249	/// find_last_not_of - Find the last character in the string that is not in
				250	/// \arg Chars, or npos if not found.
				251	///
				252	/// Note: O(size() + Chars.size())
				253	StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
				254	size_t From) const {
				255	std::bitset<1 << CHAR_BIT> CharBits;
				256	for (size_type i = 0, e = Chars.size(); i != e; ++i)
				257	CharBits.set((unsigned char)Chars[i]);
				258
				259	for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
				260	if (!CharBits.test((unsigned char)Data[i]))
				261	return i;
				262	return npos;
				263	}
				264
				265	void StringRef::split(SmallVectorImpl<StringRef> &A,
				266	StringRef Separators, int MaxSplit,
				267	bool KeepEmpty) const {
				268	StringRef rest = *this;
				269
				270	// rest.data() is used to distinguish cases like "a," that splits into
				271	// "a" + "" and "a" that splits into "a" + 0.
				272	for (int splits = 0;
				273	rest.data() != nullptr && (MaxSplit < 0 \|\| splits < MaxSplit);
				274	++splits) {
				275	std::pair<StringRef, StringRef> p = rest.split(Separators);
				276
				277	if (KeepEmpty \|\| p.first.size() != 0)
				278	A.push_back(p.first);
				279	rest = p.second;
				280	}
				281	// If we have a tail left, add it.
				282	if (rest.data() != nullptr && (rest.size() != 0 \|\| KeepEmpty))
				283	A.push_back(rest);
				284	}
				285
				286	//===----------------------------------------------------------------------===//
				287	// Helpful Algorithms
				288	//===----------------------------------------------------------------------===//
				289
				290	/// count - Return the number of non-overlapped occurrences of \arg Str in
				291	/// the string.
				292	size_t StringRef::count(StringRef Str) const {
				293	size_t Count = 0;
				294	size_t N = Str.size();
				295	if (N > Length)
				296	return 0;
				297	for (size_t i = 0, e = Length - N + 1; i != e; ++i)
				298	if (substr(i, N).equals(Str))
				299	++Count;
				300	return Count;
				301	}
				302
				303	static unsigned GetAutoSenseRadix(StringRef &Str) {
				304	if (Str.startswith("0x")) {
				305	Str = Str.substr(2);
				306	return 16;
				307	}
				308
				309	if (Str.startswith("0b")) {
				310	Str = Str.substr(2);
				311	return 2;
				312	}
				313
				314	if (Str.startswith("0o")) {
				315	Str = Str.substr(2);
				316	return 8;
				317	}
				318
				319	if (Str.startswith("0"))
				320	return 8;
				321
				322	return 10;
				323	}
				324
				325
				326	/// GetAsUnsignedInteger - Workhorse method that converts a integer character
				327	/// sequence of radix up to 36 to an unsigned long long value.
				328	bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
				329	unsigned long long &Result) {
				330	// Autosense radix if not specified.
				331	if (Radix == 0)
				332	Radix = GetAutoSenseRadix(Str);
				333
				334	// Empty strings (after the radix autosense) are invalid.
				335	if (Str.empty()) return true;
				336
				337	// Parse all the bytes of the string given this radix. Watch for overflow.
				338	Result = 0;
				339	while (!Str.empty()) {
				340	unsigned CharVal;
				341	if (Str[0] >= '0' && Str[0] <= '9')
				342	CharVal = Str[0]-'0';
				343	else if (Str[0] >= 'a' && Str[0] <= 'z')
				344	CharVal = Str[0]-'a'+10;
				345	else if (Str[0] >= 'A' && Str[0] <= 'Z')
				346	CharVal = Str[0]-'A'+10;
				347	else
				348	return true;
				349
				350	// If the parsed value is larger than the integer radix, the string is
				351	// invalid.
				352	if (CharVal >= Radix)
				353	return true;
				354
				355	// Add in this character.
				356	unsigned long long PrevResult = Result;
				357	Result = Result*Radix+CharVal;
				358
				359	// Check for overflow by shifting back and seeing if bits were lost.
				360	if (Result/Radix < PrevResult)
				361	return true;
				362
				363	Str = Str.substr(1);
				364	}
				365
				366	return false;
				367	}
				368
				369	bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
				370	long long &Result) {
				371	unsigned long long ULLVal;
				372
				373	// Handle positive strings first.
				374	if (Str.empty() \|\| Str.front() != '-') {
				375	if (getAsUnsignedInteger(Str, Radix, ULLVal) \|\|
				376	// Check for value so large it overflows a signed value.
				377	(long long)ULLVal < 0)
				378	return true;
				379	Result = ULLVal;
				380	return false;
				381	}
				382
				383	// Get the positive part of the value.
				384	if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) \|\|
				385	// Reject values so large they'd overflow as negative signed, but allow
				386	// "-0". This negates the unsigned so that the negative isn't undefined
				387	// on signed overflow.
				388	(long long)-ULLVal > 0)
				389	return true;
				390
				391	Result = -ULLVal;
				392	return false;
				393	}