blob: f12318c162fe7f249aa41067c8355362abb6e7a5 [file] [log] [blame]
Brian Silvermanf7bd1c22015-12-24 16:07:11 -08001//===-- StringRef.cpp - Lightweight String References ---------------------===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9
10#include "llvm/StringRef.h"
11#include "llvm/SmallVector.h"
12#include <bitset>
13#include <climits>
14
15using namespace llvm;
16
17// MSVC emits references to this into the translation units which reference it.
18#ifndef _MSC_VER
19const size_t StringRef::npos;
20#endif
21
22static char ascii_tolower(char x) {
23 if (x >= 'A' && x <= 'Z')
24 return x - 'A' + 'a';
25 return x;
26}
27
28static char ascii_toupper(char x) {
29 if (x >= 'a' && x <= 'z')
30 return x - 'a' + 'A';
31 return x;
32}
33
34static bool ascii_isdigit(char x) {
35 return x >= '0' && x <= '9';
36}
37
38// strncasecmp() is not available on non-POSIX systems, so define an
39// alternative function here.
40static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) {
41 for (size_t I = 0; I < Length; ++I) {
42 unsigned char LHC = ascii_tolower(LHS[I]);
43 unsigned char RHC = ascii_tolower(RHS[I]);
44 if (LHC != RHC)
45 return LHC < RHC ? -1 : 1;
46 }
47 return 0;
48}
49
50/// compare_lower - Compare strings, ignoring case.
51int StringRef::compare_lower(StringRef RHS) const {
52 if (int Res = ascii_strncasecmp(Data, RHS.Data, std::min(Length, RHS.Length)))
53 return Res;
54 if (Length == RHS.Length)
55 return 0;
56 return Length < RHS.Length ? -1 : 1;
57}
58
59/// Check if this string starts with the given \p Prefix, ignoring case.
60bool StringRef::startswith_lower(StringRef Prefix) const {
61 return Length >= Prefix.Length &&
62 ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0;
63}
64
65/// Check if this string ends with the given \p Suffix, ignoring case.
66bool StringRef::endswith_lower(StringRef Suffix) const {
67 return Length >= Suffix.Length &&
68 ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0;
69}
70
71/// compare_numeric - Compare strings, handle embedded numbers.
72int StringRef::compare_numeric(StringRef RHS) const {
73 for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
74 // Check for sequences of digits.
75 if (ascii_isdigit(Data[I]) && ascii_isdigit(RHS.Data[I])) {
76 // The longer sequence of numbers is considered larger.
77 // This doesn't really handle prefixed zeros well.
78 size_t J;
79 for (J = I + 1; J != E + 1; ++J) {
80 bool ld = J < Length && ascii_isdigit(Data[J]);
81 bool rd = J < RHS.Length && ascii_isdigit(RHS.Data[J]);
82 if (ld != rd)
83 return rd ? -1 : 1;
84 if (!rd)
85 break;
86 }
87 // The two number sequences have the same length (J-I), just memcmp them.
88 if (int Res = compareMemory(Data + I, RHS.Data + I, J - I))
89 return Res < 0 ? -1 : 1;
90 // Identical number sequences, continue search after the numbers.
91 I = J - 1;
92 continue;
93 }
94 if (Data[I] != RHS.Data[I])
95 return (unsigned char)Data[I] < (unsigned char)RHS.Data[I] ? -1 : 1;
96 }
97 if (Length == RHS.Length)
98 return 0;
99 return Length < RHS.Length ? -1 : 1;
100}
101
102//===----------------------------------------------------------------------===//
103// String Operations
104//===----------------------------------------------------------------------===//
105
106std::string StringRef::lower() const {
107 std::string Result(size(), char());
108 for (size_type i = 0, e = size(); i != e; ++i) {
109 Result[i] = ascii_tolower(Data[i]);
110 }
111 return Result;
112}
113
114std::string StringRef::upper() const {
115 std::string Result(size(), char());
116 for (size_type i = 0, e = size(); i != e; ++i) {
117 Result[i] = ascii_toupper(Data[i]);
118 }
119 return Result;
120}
121
122//===----------------------------------------------------------------------===//
123// String Searching
124//===----------------------------------------------------------------------===//
125
126
127/// find - Search for the first string \arg Str in the string.
128///
129/// \return - The index of the first occurrence of \arg Str, or npos if not
130/// found.
131size_t StringRef::find(StringRef Str, size_t From) const {
132 size_t N = Str.size();
133 if (N > Length)
134 return npos;
135
136 // For short haystacks or unsupported needles fall back to the naive algorithm
137 if (Length < 16 || N > 255 || N == 0) {
138 for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
139 if (substr(i, N).equals(Str))
140 return i;
141 return npos;
142 }
143
144 if (From >= Length)
145 return npos;
146
147 // Build the bad char heuristic table, with uint8_t to reduce cache thrashing.
148 uint8_t BadCharSkip[256];
149 std::memset(BadCharSkip, N, 256);
150 for (unsigned i = 0; i != N-1; ++i)
151 BadCharSkip[(uint8_t)Str[i]] = N-1-i;
152
153 unsigned Len = Length-From, Pos = From;
154 while (Len >= N) {
155 if (substr(Pos, N).equals(Str)) // See if this is the correct substring.
156 return Pos;
157
158 // Otherwise skip the appropriate number of bytes.
159 uint8_t Skip = BadCharSkip[(uint8_t)(*this)[Pos+N-1]];
160 Len -= Skip;
161 Pos += Skip;
162 }
163
164 return npos;
165}
166
167/// rfind - Search for the last string \arg Str in the string.
168///
169/// \return - The index of the last occurrence of \arg Str, or npos if not
170/// found.
171size_t StringRef::rfind(StringRef Str) const {
172 size_t N = Str.size();
173 if (N > Length)
174 return npos;
175 for (size_t i = Length - N + 1, e = 0; i != e;) {
176 --i;
177 if (substr(i, N).equals(Str))
178 return i;
179 }
180 return npos;
181}
182
183/// find_first_of - Find the first character in the string that is in \arg
184/// Chars, or npos if not found.
185///
186/// Note: O(size() + Chars.size())
187StringRef::size_type StringRef::find_first_of(StringRef Chars,
188 size_t From) const {
189 std::bitset<1 << CHAR_BIT> CharBits;
190 for (size_type i = 0; i != Chars.size(); ++i)
191 CharBits.set((unsigned char)Chars[i]);
192
193 for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
194 if (CharBits.test((unsigned char)Data[i]))
195 return i;
196 return npos;
197}
198
199/// find_first_not_of - Find the first character in the string that is not
200/// \arg C or npos if not found.
201StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
202 for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
203 if (Data[i] != C)
204 return i;
205 return npos;
206}
207
208/// find_first_not_of - Find the first character in the string that is not
209/// in the string \arg Chars, or npos if not found.
210///
211/// Note: O(size() + Chars.size())
212StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
213 size_t From) const {
214 std::bitset<1 << CHAR_BIT> CharBits;
215 for (size_type i = 0; i != Chars.size(); ++i)
216 CharBits.set((unsigned char)Chars[i]);
217
218 for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
219 if (!CharBits.test((unsigned char)Data[i]))
220 return i;
221 return npos;
222}
223
224/// find_last_of - Find the last character in the string that is in \arg C,
225/// or npos if not found.
226///
227/// Note: O(size() + Chars.size())
228StringRef::size_type StringRef::find_last_of(StringRef Chars,
229 size_t From) const {
230 std::bitset<1 << CHAR_BIT> CharBits;
231 for (size_type i = 0; i != Chars.size(); ++i)
232 CharBits.set((unsigned char)Chars[i]);
233
234 for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
235 if (CharBits.test((unsigned char)Data[i]))
236 return i;
237 return npos;
238}
239
240/// find_last_not_of - Find the last character in the string that is not
241/// \arg C, or npos if not found.
242StringRef::size_type StringRef::find_last_not_of(char C, size_t From) const {
243 for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
244 if (Data[i] != C)
245 return i;
246 return npos;
247}
248
249/// find_last_not_of - Find the last character in the string that is not in
250/// \arg Chars, or npos if not found.
251///
252/// Note: O(size() + Chars.size())
253StringRef::size_type StringRef::find_last_not_of(StringRef Chars,
254 size_t From) const {
255 std::bitset<1 << CHAR_BIT> CharBits;
256 for (size_type i = 0, e = Chars.size(); i != e; ++i)
257 CharBits.set((unsigned char)Chars[i]);
258
259 for (size_type i = std::min(From, Length) - 1, e = -1; i != e; --i)
260 if (!CharBits.test((unsigned char)Data[i]))
261 return i;
262 return npos;
263}
264
265void StringRef::split(SmallVectorImpl<StringRef> &A,
266 StringRef Separators, int MaxSplit,
267 bool KeepEmpty) const {
268 StringRef rest = *this;
269
270 // rest.data() is used to distinguish cases like "a," that splits into
271 // "a" + "" and "a" that splits into "a" + 0.
272 for (int splits = 0;
273 rest.data() != nullptr && (MaxSplit < 0 || splits < MaxSplit);
274 ++splits) {
275 std::pair<StringRef, StringRef> p = rest.split(Separators);
276
277 if (KeepEmpty || p.first.size() != 0)
278 A.push_back(p.first);
279 rest = p.second;
280 }
281 // If we have a tail left, add it.
282 if (rest.data() != nullptr && (rest.size() != 0 || KeepEmpty))
283 A.push_back(rest);
284}
285
286//===----------------------------------------------------------------------===//
287// Helpful Algorithms
288//===----------------------------------------------------------------------===//
289
290/// count - Return the number of non-overlapped occurrences of \arg Str in
291/// the string.
292size_t StringRef::count(StringRef Str) const {
293 size_t Count = 0;
294 size_t N = Str.size();
295 if (N > Length)
296 return 0;
297 for (size_t i = 0, e = Length - N + 1; i != e; ++i)
298 if (substr(i, N).equals(Str))
299 ++Count;
300 return Count;
301}
302
303static unsigned GetAutoSenseRadix(StringRef &Str) {
304 if (Str.startswith("0x")) {
305 Str = Str.substr(2);
306 return 16;
307 }
308
309 if (Str.startswith("0b")) {
310 Str = Str.substr(2);
311 return 2;
312 }
313
314 if (Str.startswith("0o")) {
315 Str = Str.substr(2);
316 return 8;
317 }
318
319 if (Str.startswith("0"))
320 return 8;
321
322 return 10;
323}
324
325
326/// GetAsUnsignedInteger - Workhorse method that converts a integer character
327/// sequence of radix up to 36 to an unsigned long long value.
328bool llvm::getAsUnsignedInteger(StringRef Str, unsigned Radix,
329 unsigned long long &Result) {
330 // Autosense radix if not specified.
331 if (Radix == 0)
332 Radix = GetAutoSenseRadix(Str);
333
334 // Empty strings (after the radix autosense) are invalid.
335 if (Str.empty()) return true;
336
337 // Parse all the bytes of the string given this radix. Watch for overflow.
338 Result = 0;
339 while (!Str.empty()) {
340 unsigned CharVal;
341 if (Str[0] >= '0' && Str[0] <= '9')
342 CharVal = Str[0]-'0';
343 else if (Str[0] >= 'a' && Str[0] <= 'z')
344 CharVal = Str[0]-'a'+10;
345 else if (Str[0] >= 'A' && Str[0] <= 'Z')
346 CharVal = Str[0]-'A'+10;
347 else
348 return true;
349
350 // If the parsed value is larger than the integer radix, the string is
351 // invalid.
352 if (CharVal >= Radix)
353 return true;
354
355 // Add in this character.
356 unsigned long long PrevResult = Result;
357 Result = Result*Radix+CharVal;
358
359 // Check for overflow by shifting back and seeing if bits were lost.
360 if (Result/Radix < PrevResult)
361 return true;
362
363 Str = Str.substr(1);
364 }
365
366 return false;
367}
368
369bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix,
370 long long &Result) {
371 unsigned long long ULLVal;
372
373 // Handle positive strings first.
374 if (Str.empty() || Str.front() != '-') {
375 if (getAsUnsignedInteger(Str, Radix, ULLVal) ||
376 // Check for value so large it overflows a signed value.
377 (long long)ULLVal < 0)
378 return true;
379 Result = ULLVal;
380 return false;
381 }
382
383 // Get the positive part of the value.
384 if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) ||
385 // Reject values so large they'd overflow as negative signed, but allow
386 // "-0". This negates the unsigned so that the negative isn't undefined
387 // on signed overflow.
388 (long long)-ULLVal > 0)
389 return true;
390
391 Result = -ULLVal;
392 return false;
393}