blob: 68b90aa3126f21286247078112f3c79981ab6397 [file] [log] [blame]
Austin Schuh36244a12019-09-21 17:52:38 -07001//
2// Copyright 2017 The Abseil Authors.
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// https://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15//
16// -----------------------------------------------------------------------------
17// File: string_view.h
18// -----------------------------------------------------------------------------
19//
20// This file contains the definition of the `absl::string_view` class. A
21// `string_view` points to a contiguous span of characters, often part or all of
22// another `std::string`, double-quoted string literal, character array, or even
23// another `string_view`.
24//
25// This `absl::string_view` abstraction is designed to be a drop-in
26// replacement for the C++17 `std::string_view` abstraction.
27#ifndef ABSL_STRINGS_STRING_VIEW_H_
28#define ABSL_STRINGS_STRING_VIEW_H_
29
30#include <algorithm>
31#include "absl/base/config.h"
32
33#ifdef ABSL_HAVE_STD_STRING_VIEW
34
35#include <string_view> // IWYU pragma: export
36
37namespace absl {
38using std::string_view;
39} // namespace absl
40
41#else // ABSL_HAVE_STD_STRING_VIEW
42
43#include <cassert>
44#include <cstddef>
45#include <cstring>
46#include <iosfwd>
47#include <iterator>
48#include <limits>
49#include <string>
50
51#include "absl/base/internal/throw_delegate.h"
52#include "absl/base/macros.h"
53#include "absl/base/optimization.h"
54#include "absl/base/port.h"
55
56namespace absl {
57
58// absl::string_view
59//
60// A `string_view` provides a lightweight view into the string data provided by
61// a `std::string`, double-quoted string literal, character array, or even
62// another `string_view`. A `string_view` does *not* own the string to which it
63// points, and that data cannot be modified through the view.
64//
65// You can use `string_view` as a function or method parameter anywhere a
66// parameter can receive a double-quoted string literal, `const char*`,
67// `std::string`, or another `absl::string_view` argument with no need to copy
68// the string data. Systematic use of `string_view` within function arguments
69// reduces data copies and `strlen()` calls.
70//
71// Because of its small size, prefer passing `string_view` by value:
72//
73// void MyFunction(absl::string_view arg);
74//
75// If circumstances require, you may also pass one by const reference:
76//
77// void MyFunction(const absl::string_view& arg); // not preferred
78//
79// Passing by value generates slightly smaller code for many architectures.
80//
81// In either case, the source data of the `string_view` must outlive the
82// `string_view` itself.
83//
84// A `string_view` is also suitable for local variables if you know that the
85// lifetime of the underlying object is longer than the lifetime of your
86// `string_view` variable. However, beware of binding a `string_view` to a
87// temporary value:
88//
89// // BAD use of string_view: lifetime problem
90// absl::string_view sv = obj.ReturnAString();
91//
92// // GOOD use of string_view: str outlives sv
93// std::string str = obj.ReturnAString();
94// absl::string_view sv = str;
95//
96// Due to lifetime issues, a `string_view` is sometimes a poor choice for a
97// return value and usually a poor choice for a data member. If you do use a
98// `string_view` this way, it is your responsibility to ensure that the object
99// pointed to by the `string_view` outlives the `string_view`.
100//
101// A `string_view` may represent a whole string or just part of a string. For
102// example, when splitting a string, `std::vector<absl::string_view>` is a
103// natural data type for the output.
104//
105// When constructed from a source which is nul-terminated, the `string_view`
106// itself will not include the nul-terminator unless a specific size (including
107// the nul) is passed to the constructor. As a result, common idioms that work
108// on nul-terminated strings do not work on `string_view` objects. If you write
109// code that scans a `string_view`, you must check its length rather than test
110// for nul, for example. Note, however, that nuls may still be embedded within
111// a `string_view` explicitly.
112//
113// You may create a null `string_view` in two ways:
114//
115// absl::string_view sv();
116// absl::string_view sv(nullptr, 0);
117//
118// For the above, `sv.data() == nullptr`, `sv.length() == 0`, and
119// `sv.empty() == true`. Also, if you create a `string_view` with a non-null
120// pointer then `sv.data() != nullptr`. Thus, you can use `string_view()` to
121// signal an undefined value that is different from other `string_view` values
122// in a similar fashion to how `const char* p1 = nullptr;` is different from
123// `const char* p2 = "";`. However, in practice, it is not recommended to rely
124// on this behavior.
125//
126// Be careful not to confuse a null `string_view` with an empty one. A null
127// `string_view` is an empty `string_view`, but some empty `string_view`s are
128// not null. Prefer checking for emptiness over checking for null.
129//
130// There are many ways to create an empty string_view:
131//
132// const char* nullcp = nullptr;
133// // string_view.size() will return 0 in all cases.
134// absl::string_view();
135// absl::string_view(nullcp, 0);
136// absl::string_view("");
137// absl::string_view("", 0);
138// absl::string_view("abcdef", 0);
139// absl::string_view("abcdef" + 6, 0);
140//
141// All empty `string_view` objects whether null or not, are equal:
142//
143// absl::string_view() == absl::string_view("", 0)
144// absl::string_view(nullptr, 0) == absl::string_view("abcdef"+6, 0)
145class string_view {
146 public:
147 using traits_type = std::char_traits<char>;
148 using value_type = char;
149 using pointer = char*;
150 using const_pointer = const char*;
151 using reference = char&;
152 using const_reference = const char&;
153 using const_iterator = const char*;
154 using iterator = const_iterator;
155 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
156 using reverse_iterator = const_reverse_iterator;
157 using size_type = size_t;
158 using difference_type = std::ptrdiff_t;
159
160 static constexpr size_type npos = static_cast<size_type>(-1);
161
162 // Null `string_view` constructor
163 constexpr string_view() noexcept : ptr_(nullptr), length_(0) {}
164
165 // Implicit constructors
166
167 template <typename Allocator>
168 string_view( // NOLINT(runtime/explicit)
169 const std::basic_string<char, std::char_traits<char>, Allocator>&
170 str) noexcept
171 : ptr_(str.data()), length_(CheckLengthInternal(str.size())) {}
172
173 // Implicit constructor of a `string_view` from nul-terminated `str`. When
174 // accepting possibly null strings, use `absl::NullSafeStringView(str)`
175 // instead (see below).
176 constexpr string_view(const char* str) // NOLINT(runtime/explicit)
177 : ptr_(str),
178 length_(str ? CheckLengthInternal(StrlenInternal(str)) : 0) {}
179
180 // Implicit constructor of a `string_view` from a `const char*` and length.
181 constexpr string_view(const char* data, size_type len)
182 : ptr_(data), length_(CheckLengthInternal(len)) {}
183
184 // NOTE: Harmlessly omitted to work around gdb bug.
185 // constexpr string_view(const string_view&) noexcept = default;
186 // string_view& operator=(const string_view&) noexcept = default;
187
188 // Iterators
189
190 // string_view::begin()
191 //
192 // Returns an iterator pointing to the first character at the beginning of the
193 // `string_view`, or `end()` if the `string_view` is empty.
194 constexpr const_iterator begin() const noexcept { return ptr_; }
195
196 // string_view::end()
197 //
198 // Returns an iterator pointing just beyond the last character at the end of
199 // the `string_view`. This iterator acts as a placeholder; attempting to
200 // access it results in undefined behavior.
201 constexpr const_iterator end() const noexcept { return ptr_ + length_; }
202
203 // string_view::cbegin()
204 //
205 // Returns a const iterator pointing to the first character at the beginning
206 // of the `string_view`, or `end()` if the `string_view` is empty.
207 constexpr const_iterator cbegin() const noexcept { return begin(); }
208
209 // string_view::cend()
210 //
211 // Returns a const iterator pointing just beyond the last character at the end
212 // of the `string_view`. This pointer acts as a placeholder; attempting to
213 // access its element results in undefined behavior.
214 constexpr const_iterator cend() const noexcept { return end(); }
215
216 // string_view::rbegin()
217 //
218 // Returns a reverse iterator pointing to the last character at the end of the
219 // `string_view`, or `rend()` if the `string_view` is empty.
220 const_reverse_iterator rbegin() const noexcept {
221 return const_reverse_iterator(end());
222 }
223
224 // string_view::rend()
225 //
226 // Returns a reverse iterator pointing just before the first character at the
227 // beginning of the `string_view`. This pointer acts as a placeholder;
228 // attempting to access its element results in undefined behavior.
229 const_reverse_iterator rend() const noexcept {
230 return const_reverse_iterator(begin());
231 }
232
233 // string_view::crbegin()
234 //
235 // Returns a const reverse iterator pointing to the last character at the end
236 // of the `string_view`, or `crend()` if the `string_view` is empty.
237 const_reverse_iterator crbegin() const noexcept { return rbegin(); }
238
239 // string_view::crend()
240 //
241 // Returns a const reverse iterator pointing just before the first character
242 // at the beginning of the `string_view`. This pointer acts as a placeholder;
243 // attempting to access its element results in undefined behavior.
244 const_reverse_iterator crend() const noexcept { return rend(); }
245
246 // Capacity Utilities
247
248 // string_view::size()
249 //
250 // Returns the number of characters in the `string_view`.
251 constexpr size_type size() const noexcept {
252 return length_;
253 }
254
255 // string_view::length()
256 //
257 // Returns the number of characters in the `string_view`. Alias for `size()`.
258 constexpr size_type length() const noexcept { return size(); }
259
260 // string_view::max_size()
261 //
262 // Returns the maximum number of characters the `string_view` can hold.
263 constexpr size_type max_size() const noexcept { return kMaxSize; }
264
265 // string_view::empty()
266 //
267 // Checks if the `string_view` is empty (refers to no characters).
268 constexpr bool empty() const noexcept { return length_ == 0; }
269
270 // string_view::operator[]
271 //
272 // Returns the ith element of the `string_view` using the array operator.
273 // Note that this operator does not perform any bounds checking.
274 constexpr const_reference operator[](size_type i) const { return ptr_[i]; }
275
276 // string_view::at()
277 //
278 // Returns the ith element of the `string_view`. Bounds checking is performed,
279 // and an exception of type `std::out_of_range` will be thrown on invalid
280 // access.
281 constexpr const_reference at(size_type i) const {
282 return ABSL_PREDICT_TRUE(i < size())
283 ? ptr_[i]
284 : (base_internal::ThrowStdOutOfRange("absl::string_view::at"),
285 ptr_[i]);
286 }
287
288 // string_view::front()
289 //
290 // Returns the first element of a `string_view`.
291 constexpr const_reference front() const { return ptr_[0]; }
292
293 // string_view::back()
294 //
295 // Returns the last element of a `string_view`.
296 constexpr const_reference back() const { return ptr_[size() - 1]; }
297
298 // string_view::data()
299 //
300 // Returns a pointer to the underlying character array (which is of course
301 // stored elsewhere). Note that `string_view::data()` may contain embedded nul
302 // characters, but the returned buffer may or may not be nul-terminated;
303 // therefore, do not pass `data()` to a routine that expects a nul-terminated
304 // std::string.
305 constexpr const_pointer data() const noexcept { return ptr_; }
306
307 // Modifiers
308
309 // string_view::remove_prefix()
310 //
311 // Removes the first `n` characters from the `string_view`. Note that the
312 // underlying std::string is not changed, only the view.
313 void remove_prefix(size_type n) {
314 assert(n <= length_);
315 ptr_ += n;
316 length_ -= n;
317 }
318
319 // string_view::remove_suffix()
320 //
321 // Removes the last `n` characters from the `string_view`. Note that the
322 // underlying std::string is not changed, only the view.
323 void remove_suffix(size_type n) {
324 assert(n <= length_);
325 length_ -= n;
326 }
327
328 // string_view::swap()
329 //
330 // Swaps this `string_view` with another `string_view`.
331 void swap(string_view& s) noexcept {
332 auto t = *this;
333 *this = s;
334 s = t;
335 }
336
337 // Explicit conversion operators
338
339 // Converts to `std::basic_string`.
340 template <typename A>
341 explicit operator std::basic_string<char, traits_type, A>() const {
342 if (!data()) return {};
343 return std::basic_string<char, traits_type, A>(data(), size());
344 }
345
346 // string_view::copy()
347 //
348 // Copies the contents of the `string_view` at offset `pos` and length `n`
349 // into `buf`.
350 size_type copy(char* buf, size_type n, size_type pos = 0) const {
351 if (ABSL_PREDICT_FALSE(pos > length_)) {
352 base_internal::ThrowStdOutOfRange("absl::string_view::copy");
353 }
354 size_type rlen = (std::min)(length_ - pos, n);
355 if (rlen > 0) {
356 const char* start = ptr_ + pos;
357 traits_type::copy(buf, start, rlen);
358 }
359 return rlen;
360 }
361
362 // string_view::substr()
363 //
364 // Returns a "substring" of the `string_view` (at offset `pos` and length
365 // `n`) as another string_view. This function throws `std::out_of_bounds` if
366 // `pos > size`.
367 string_view substr(size_type pos, size_type n = npos) const {
368 if (ABSL_PREDICT_FALSE(pos > length_))
369 base_internal::ThrowStdOutOfRange("absl::string_view::substr");
370 n = (std::min)(n, length_ - pos);
371 return string_view(ptr_ + pos, n);
372 }
373
374 // string_view::compare()
375 //
376 // Performs a lexicographical comparison between the `string_view` and
377 // another `absl::string_view`, returning -1 if `this` is less than, 0 if
378 // `this` is equal to, and 1 if `this` is greater than the passed std::string
379 // view. Note that in the case of data equality, a further comparison is made
380 // on the respective sizes of the two `string_view`s to determine which is
381 // smaller, equal, or greater.
382 int compare(string_view x) const noexcept {
383 auto min_length = (std::min)(length_, x.length_);
384 if (min_length > 0) {
385 int r = memcmp(ptr_, x.ptr_, min_length);
386 if (r < 0) return -1;
387 if (r > 0) return 1;
388 }
389 if (length_ < x.length_) return -1;
390 if (length_ > x.length_) return 1;
391 return 0;
392 }
393
394 // Overload of `string_view::compare()` for comparing a substring of the
395 // 'string_view` and another `absl::string_view`.
396 int compare(size_type pos1, size_type count1, string_view v) const {
397 return substr(pos1, count1).compare(v);
398 }
399
400 // Overload of `string_view::compare()` for comparing a substring of the
401 // `string_view` and a substring of another `absl::string_view`.
402 int compare(size_type pos1, size_type count1, string_view v, size_type pos2,
403 size_type count2) const {
404 return substr(pos1, count1).compare(v.substr(pos2, count2));
405 }
406
407 // Overload of `string_view::compare()` for comparing a `string_view` and a
408 // a different C-style std::string `s`.
409 int compare(const char* s) const { return compare(string_view(s)); }
410
411 // Overload of `string_view::compare()` for comparing a substring of the
412 // `string_view` and a different std::string C-style std::string `s`.
413 int compare(size_type pos1, size_type count1, const char* s) const {
414 return substr(pos1, count1).compare(string_view(s));
415 }
416
417 // Overload of `string_view::compare()` for comparing a substring of the
418 // `string_view` and a substring of a different C-style std::string `s`.
419 int compare(size_type pos1, size_type count1, const char* s,
420 size_type count2) const {
421 return substr(pos1, count1).compare(string_view(s, count2));
422 }
423
424 // Find Utilities
425
426 // string_view::find()
427 //
428 // Finds the first occurrence of the substring `s` within the `string_view`,
429 // returning the position of the first character's match, or `npos` if no
430 // match was found.
431 size_type find(string_view s, size_type pos = 0) const noexcept;
432
433 // Overload of `string_view::find()` for finding the given character `c`
434 // within the `string_view`.
435 size_type find(char c, size_type pos = 0) const noexcept;
436
437 // string_view::rfind()
438 //
439 // Finds the last occurrence of a substring `s` within the `string_view`,
440 // returning the position of the first character's match, or `npos` if no
441 // match was found.
442 size_type rfind(string_view s, size_type pos = npos) const
443 noexcept;
444
445 // Overload of `string_view::rfind()` for finding the last given character `c`
446 // within the `string_view`.
447 size_type rfind(char c, size_type pos = npos) const noexcept;
448
449 // string_view::find_first_of()
450 //
451 // Finds the first occurrence of any of the characters in `s` within the
452 // `string_view`, returning the start position of the match, or `npos` if no
453 // match was found.
454 size_type find_first_of(string_view s, size_type pos = 0) const
455 noexcept;
456
457 // Overload of `string_view::find_first_of()` for finding a character `c`
458 // within the `string_view`.
459 size_type find_first_of(char c, size_type pos = 0) const
460 noexcept {
461 return find(c, pos);
462 }
463
464 // string_view::find_last_of()
465 //
466 // Finds the last occurrence of any of the characters in `s` within the
467 // `string_view`, returning the start position of the match, or `npos` if no
468 // match was found.
469 size_type find_last_of(string_view s, size_type pos = npos) const
470 noexcept;
471
472 // Overload of `string_view::find_last_of()` for finding a character `c`
473 // within the `string_view`.
474 size_type find_last_of(char c, size_type pos = npos) const
475 noexcept {
476 return rfind(c, pos);
477 }
478
479 // string_view::find_first_not_of()
480 //
481 // Finds the first occurrence of any of the characters not in `s` within the
482 // `string_view`, returning the start position of the first non-match, or
483 // `npos` if no non-match was found.
484 size_type find_first_not_of(string_view s, size_type pos = 0) const noexcept;
485
486 // Overload of `string_view::find_first_not_of()` for finding a character
487 // that is not `c` within the `string_view`.
488 size_type find_first_not_of(char c, size_type pos = 0) const noexcept;
489
490 // string_view::find_last_not_of()
491 //
492 // Finds the last occurrence of any of the characters not in `s` within the
493 // `string_view`, returning the start position of the last non-match, or
494 // `npos` if no non-match was found.
495 size_type find_last_not_of(string_view s,
496 size_type pos = npos) const noexcept;
497
498 // Overload of `string_view::find_last_not_of()` for finding a character
499 // that is not `c` within the `string_view`.
500 size_type find_last_not_of(char c, size_type pos = npos) const
501 noexcept;
502
503 private:
504 static constexpr size_type kMaxSize =
505 (std::numeric_limits<difference_type>::max)();
506
507 static constexpr size_type CheckLengthInternal(size_type len) {
508 return ABSL_ASSERT(len <= kMaxSize), len;
509 }
510
511 static constexpr size_type StrlenInternal(const char* str) {
512#if defined(_MSC_VER) && _MSC_VER >= 1910 && !defined(__clang__)
513 // MSVC 2017+ can evaluate this at compile-time.
514 const char* begin = str;
515 while (*str != '\0') ++str;
516 return str - begin;
517#elif ABSL_HAVE_BUILTIN(__builtin_strlen) || \
518 (defined(__GNUC__) && !defined(__clang__))
519 // GCC has __builtin_strlen according to
520 // https://gcc.gnu.org/onlinedocs/gcc-4.7.0/gcc/Other-Builtins.html, but
521 // ABSL_HAVE_BUILTIN doesn't detect that, so we use the extra checks above.
522 // __builtin_strlen is constexpr.
523 return __builtin_strlen(str);
524#else
525 return str ? strlen(str) : 0;
526#endif
527 }
528
529 const char* ptr_;
530 size_type length_;
531};
532
533// This large function is defined inline so that in a fairly common case where
534// one of the arguments is a literal, the compiler can elide a lot of the
535// following comparisons.
536inline bool operator==(string_view x, string_view y) noexcept {
537 auto len = x.size();
538 if (len != y.size()) {
539 return false;
540 }
541
542 return x.data() == y.data() || len <= 0 ||
543 memcmp(x.data(), y.data(), len) == 0;
544}
545
546inline bool operator!=(string_view x, string_view y) noexcept {
547 return !(x == y);
548}
549
550inline bool operator<(string_view x, string_view y) noexcept {
551 auto min_size = (std::min)(x.size(), y.size());
552 const int r = min_size == 0 ? 0 : memcmp(x.data(), y.data(), min_size);
553 return (r < 0) || (r == 0 && x.size() < y.size());
554}
555
556inline bool operator>(string_view x, string_view y) noexcept { return y < x; }
557
558inline bool operator<=(string_view x, string_view y) noexcept {
559 return !(y < x);
560}
561
562inline bool operator>=(string_view x, string_view y) noexcept {
563 return !(x < y);
564}
565
566// IO Insertion Operator
567std::ostream& operator<<(std::ostream& o, string_view piece);
568
569} // namespace absl
570
571#endif // ABSL_HAVE_STD_STRING_VIEW
572
573namespace absl {
574
575// ClippedSubstr()
576//
577// Like `s.substr(pos, n)`, but clips `pos` to an upper bound of `s.size()`.
578// Provided because std::string_view::substr throws if `pos > size()`
579inline string_view ClippedSubstr(string_view s, size_t pos,
580 size_t n = string_view::npos) {
581 pos = (std::min)(pos, static_cast<size_t>(s.size()));
582 return s.substr(pos, n);
583}
584
585// NullSafeStringView()
586//
587// Creates an `absl::string_view` from a pointer `p` even if it's null-valued.
588// This function should be used where an `absl::string_view` can be created from
589// a possibly-null pointer.
590inline string_view NullSafeStringView(const char* p) {
591 return p ? string_view(p) : string_view();
592}
593
594} // namespace absl
595
596#endif // ABSL_STRINGS_STRING_VIEW_H_