Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | // |
| 15 | |
| 16 | // This file declares INTERNAL parts of the Split API that are inline/templated |
| 17 | // or otherwise need to be available at compile time. The main abstractions |
| 18 | // defined in here are |
| 19 | // |
| 20 | // - ConvertibleToStringView |
| 21 | // - SplitIterator<> |
| 22 | // - Splitter<> |
| 23 | // |
| 24 | // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including |
| 25 | // absl/strings/str_split.h. |
| 26 | // |
| 27 | // IWYU pragma: private, include "absl/strings/str_split.h" |
| 28 | |
| 29 | #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ |
| 30 | #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ |
| 31 | |
| 32 | #include <array> |
| 33 | #include <initializer_list> |
| 34 | #include <iterator> |
| 35 | #include <map> |
| 36 | #include <type_traits> |
| 37 | #include <utility> |
| 38 | #include <vector> |
| 39 | |
| 40 | #include "absl/base/macros.h" |
| 41 | #include "absl/base/port.h" |
| 42 | #include "absl/meta/type_traits.h" |
| 43 | #include "absl/strings/string_view.h" |
| 44 | |
| 45 | #ifdef _GLIBCXX_DEBUG |
| 46 | #include "absl/strings/internal/stl_type_traits.h" |
| 47 | #endif // _GLIBCXX_DEBUG |
| 48 | |
| 49 | namespace absl { |
| 50 | namespace strings_internal { |
| 51 | |
| 52 | // This class is implicitly constructible from everything that absl::string_view |
| 53 | // is implicitly constructible from. If it's constructed from a temporary |
| 54 | // string, the data is moved into a data member so its lifetime matches that of |
| 55 | // the ConvertibleToStringView instance. |
| 56 | class ConvertibleToStringView { |
| 57 | public: |
| 58 | ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) |
| 59 | : value_(s) {} |
| 60 | ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) |
| 61 | ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) |
| 62 | : value_(s) {} |
| 63 | ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) |
| 64 | : value_(s) {} |
| 65 | |
| 66 | // Matches rvalue strings and moves their data to a member. |
| 67 | ConvertibleToStringView(std::string&& s) // NOLINT(runtime/explicit) |
| 68 | : copy_(std::move(s)), value_(copy_) {} |
| 69 | |
| 70 | ConvertibleToStringView(const ConvertibleToStringView& other) |
| 71 | : copy_(other.copy_), |
| 72 | value_(other.IsSelfReferential() ? copy_ : other.value_) {} |
| 73 | |
| 74 | ConvertibleToStringView(ConvertibleToStringView&& other) { |
| 75 | StealMembers(std::move(other)); |
| 76 | } |
| 77 | |
| 78 | ConvertibleToStringView& operator=(ConvertibleToStringView other) { |
| 79 | StealMembers(std::move(other)); |
| 80 | return *this; |
| 81 | } |
| 82 | |
| 83 | absl::string_view value() const { return value_; } |
| 84 | |
| 85 | private: |
| 86 | // Returns true if ctsp's value refers to its internal copy_ member. |
| 87 | bool IsSelfReferential() const { return value_.data() == copy_.data(); } |
| 88 | |
| 89 | void StealMembers(ConvertibleToStringView&& other) { |
| 90 | if (other.IsSelfReferential()) { |
| 91 | copy_ = std::move(other.copy_); |
| 92 | value_ = copy_; |
| 93 | other.value_ = other.copy_; |
| 94 | } else { |
| 95 | value_ = other.value_; |
| 96 | } |
| 97 | } |
| 98 | |
| 99 | // Holds the data moved from temporary std::string arguments. Declared first |
| 100 | // so that 'value' can refer to 'copy_'. |
| 101 | std::string copy_; |
| 102 | absl::string_view value_; |
| 103 | }; |
| 104 | |
| 105 | // An iterator that enumerates the parts of a string from a Splitter. The text |
| 106 | // to be split, the Delimiter, and the Predicate are all taken from the given |
| 107 | // Splitter object. Iterators may only be compared if they refer to the same |
| 108 | // Splitter instance. |
| 109 | // |
| 110 | // This class is NOT part of the public splitting API. |
| 111 | template <typename Splitter> |
| 112 | class SplitIterator { |
| 113 | public: |
| 114 | using iterator_category = std::input_iterator_tag; |
| 115 | using value_type = absl::string_view; |
| 116 | using difference_type = ptrdiff_t; |
| 117 | using pointer = const value_type*; |
| 118 | using reference = const value_type&; |
| 119 | |
| 120 | enum State { kInitState, kLastState, kEndState }; |
| 121 | SplitIterator(State state, const Splitter* splitter) |
| 122 | : pos_(0), |
| 123 | state_(state), |
| 124 | splitter_(splitter), |
| 125 | delimiter_(splitter->delimiter()), |
| 126 | predicate_(splitter->predicate()) { |
| 127 | // Hack to maintain backward compatibility. This one block makes it so an |
| 128 | // empty absl::string_view whose .data() happens to be nullptr behaves |
| 129 | // *differently* from an otherwise empty absl::string_view whose .data() is |
| 130 | // not nullptr. This is an undesirable difference in general, but this |
| 131 | // behavior is maintained to avoid breaking existing code that happens to |
| 132 | // depend on this old behavior/bug. Perhaps it will be fixed one day. The |
| 133 | // difference in behavior is as follows: |
| 134 | // Split(absl::string_view(""), '-'); // {""} |
| 135 | // Split(absl::string_view(), '-'); // {} |
| 136 | if (splitter_->text().data() == nullptr) { |
| 137 | state_ = kEndState; |
| 138 | pos_ = splitter_->text().size(); |
| 139 | return; |
| 140 | } |
| 141 | |
| 142 | if (state_ == kEndState) { |
| 143 | pos_ = splitter_->text().size(); |
| 144 | } else { |
| 145 | ++(*this); |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | bool at_end() const { return state_ == kEndState; } |
| 150 | |
| 151 | reference operator*() const { return curr_; } |
| 152 | pointer operator->() const { return &curr_; } |
| 153 | |
| 154 | SplitIterator& operator++() { |
| 155 | do { |
| 156 | if (state_ == kLastState) { |
| 157 | state_ = kEndState; |
| 158 | return *this; |
| 159 | } |
| 160 | const absl::string_view text = splitter_->text(); |
| 161 | const absl::string_view d = delimiter_.Find(text, pos_); |
| 162 | if (d.data() == text.data() + text.size()) state_ = kLastState; |
| 163 | curr_ = text.substr(pos_, d.data() - (text.data() + pos_)); |
| 164 | pos_ += curr_.size() + d.size(); |
| 165 | } while (!predicate_(curr_)); |
| 166 | return *this; |
| 167 | } |
| 168 | |
| 169 | SplitIterator operator++(int) { |
| 170 | SplitIterator old(*this); |
| 171 | ++(*this); |
| 172 | return old; |
| 173 | } |
| 174 | |
| 175 | friend bool operator==(const SplitIterator& a, const SplitIterator& b) { |
| 176 | return a.state_ == b.state_ && a.pos_ == b.pos_; |
| 177 | } |
| 178 | |
| 179 | friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { |
| 180 | return !(a == b); |
| 181 | } |
| 182 | |
| 183 | private: |
| 184 | size_t pos_; |
| 185 | State state_; |
| 186 | absl::string_view curr_; |
| 187 | const Splitter* splitter_; |
| 188 | typename Splitter::DelimiterType delimiter_; |
| 189 | typename Splitter::PredicateType predicate_; |
| 190 | }; |
| 191 | |
| 192 | // HasMappedType<T>::value is true iff there exists a type T::mapped_type. |
| 193 | template <typename T, typename = void> |
| 194 | struct HasMappedType : std::false_type {}; |
| 195 | template <typename T> |
| 196 | struct HasMappedType<T, absl::void_t<typename T::mapped_type>> |
| 197 | : std::true_type {}; |
| 198 | |
| 199 | // HasValueType<T>::value is true iff there exists a type T::value_type. |
| 200 | template <typename T, typename = void> |
| 201 | struct HasValueType : std::false_type {}; |
| 202 | template <typename T> |
| 203 | struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { |
| 204 | }; |
| 205 | |
| 206 | // HasConstIterator<T>::value is true iff there exists a type T::const_iterator. |
| 207 | template <typename T, typename = void> |
| 208 | struct HasConstIterator : std::false_type {}; |
| 209 | template <typename T> |
| 210 | struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> |
| 211 | : std::true_type {}; |
| 212 | |
| 213 | // IsInitializerList<T>::value is true iff T is an std::initializer_list. More |
| 214 | // details below in Splitter<> where this is used. |
| 215 | std::false_type IsInitializerListDispatch(...); // default: No |
| 216 | template <typename T> |
| 217 | std::true_type IsInitializerListDispatch(std::initializer_list<T>*); |
| 218 | template <typename T> |
| 219 | struct IsInitializerList |
| 220 | : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; |
| 221 | |
| 222 | // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition |
| 223 | // is true for type 'C'. |
| 224 | // |
| 225 | // Restricts conversion to container-like types (by testing for the presence of |
| 226 | // a const_iterator member type) and also to disable conversion to an |
| 227 | // std::initializer_list (which also has a const_iterator). Otherwise, code |
| 228 | // compiled in C++11 will get an error due to ambiguous conversion paths (in |
| 229 | // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> |
| 230 | // or an std::initializer_list<T>). |
| 231 | |
| 232 | template <typename C, bool has_value_type, bool has_mapped_type> |
| 233 | struct SplitterIsConvertibleToImpl : std::false_type {}; |
| 234 | |
| 235 | template <typename C> |
| 236 | struct SplitterIsConvertibleToImpl<C, true, false> |
| 237 | : std::is_constructible<typename C::value_type, absl::string_view> {}; |
| 238 | |
| 239 | template <typename C> |
| 240 | struct SplitterIsConvertibleToImpl<C, true, true> |
| 241 | : absl::conjunction< |
| 242 | std::is_constructible<typename C::key_type, absl::string_view>, |
| 243 | std::is_constructible<typename C::mapped_type, absl::string_view>> {}; |
| 244 | |
| 245 | template <typename C> |
| 246 | struct SplitterIsConvertibleTo |
| 247 | : SplitterIsConvertibleToImpl< |
| 248 | C, |
| 249 | #ifdef _GLIBCXX_DEBUG |
| 250 | !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && |
| 251 | #endif // _GLIBCXX_DEBUG |
| 252 | !IsInitializerList< |
| 253 | typename std::remove_reference<C>::type>::value && |
| 254 | HasValueType<C>::value && HasConstIterator<C>::value, |
| 255 | HasMappedType<C>::value> { |
| 256 | }; |
| 257 | |
| 258 | // This class implements the range that is returned by absl::StrSplit(). This |
| 259 | // class has templated conversion operators that allow it to be implicitly |
| 260 | // converted to a variety of types that the caller may have specified on the |
| 261 | // left-hand side of an assignment. |
| 262 | // |
| 263 | // The main interface for interacting with this class is through its implicit |
| 264 | // conversion operators. However, this class may also be used like a container |
| 265 | // in that it has .begin() and .end() member functions. It may also be used |
| 266 | // within a range-for loop. |
| 267 | // |
| 268 | // Output containers can be collections of any type that is constructible from |
| 269 | // an absl::string_view. |
| 270 | // |
| 271 | // An Predicate functor may be supplied. This predicate will be used to filter |
| 272 | // the split strings: only strings for which the predicate returns true will be |
| 273 | // kept. A Predicate object is any unary functor that takes an absl::string_view |
| 274 | // and returns bool. |
| 275 | template <typename Delimiter, typename Predicate> |
| 276 | class Splitter { |
| 277 | public: |
| 278 | using DelimiterType = Delimiter; |
| 279 | using PredicateType = Predicate; |
| 280 | using const_iterator = strings_internal::SplitIterator<Splitter>; |
| 281 | using value_type = typename std::iterator_traits<const_iterator>::value_type; |
| 282 | |
| 283 | Splitter(ConvertibleToStringView input_text, Delimiter d, Predicate p) |
| 284 | : text_(std::move(input_text)), |
| 285 | delimiter_(std::move(d)), |
| 286 | predicate_(std::move(p)) {} |
| 287 | |
| 288 | absl::string_view text() const { return text_.value(); } |
| 289 | const Delimiter& delimiter() const { return delimiter_; } |
| 290 | const Predicate& predicate() const { return predicate_; } |
| 291 | |
| 292 | // Range functions that iterate the split substrings as absl::string_view |
| 293 | // objects. These methods enable a Splitter to be used in a range-based for |
| 294 | // loop. |
| 295 | const_iterator begin() const { return {const_iterator::kInitState, this}; } |
| 296 | const_iterator end() const { return {const_iterator::kEndState, this}; } |
| 297 | |
| 298 | // An implicit conversion operator that is restricted to only those containers |
| 299 | // that the splitter is convertible to. |
| 300 | template <typename Container, |
| 301 | typename = typename std::enable_if< |
| 302 | SplitterIsConvertibleTo<Container>::value>::type> |
| 303 | operator Container() const { // NOLINT(runtime/explicit) |
| 304 | return ConvertToContainer<Container, typename Container::value_type, |
| 305 | HasMappedType<Container>::value>()(*this); |
| 306 | } |
| 307 | |
| 308 | // Returns a pair with its .first and .second members set to the first two |
| 309 | // strings returned by the begin() iterator. Either/both of .first and .second |
| 310 | // will be constructed with empty strings if the iterator doesn't have a |
| 311 | // corresponding value. |
| 312 | template <typename First, typename Second> |
| 313 | operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) |
| 314 | absl::string_view first, second; |
| 315 | auto it = begin(); |
| 316 | if (it != end()) { |
| 317 | first = *it; |
| 318 | if (++it != end()) { |
| 319 | second = *it; |
| 320 | } |
| 321 | } |
| 322 | return {First(first), Second(second)}; |
| 323 | } |
| 324 | |
| 325 | private: |
| 326 | // ConvertToContainer is a functor converting a Splitter to the requested |
| 327 | // Container of ValueType. It is specialized below to optimize splitting to |
| 328 | // certain combinations of Container and ValueType. |
| 329 | // |
| 330 | // This base template handles the generic case of storing the split results in |
| 331 | // the requested non-map-like container and converting the split substrings to |
| 332 | // the requested type. |
| 333 | template <typename Container, typename ValueType, bool is_map = false> |
| 334 | struct ConvertToContainer { |
| 335 | Container operator()(const Splitter& splitter) const { |
| 336 | Container c; |
| 337 | auto it = std::inserter(c, c.end()); |
| 338 | for (const auto sp : splitter) { |
| 339 | *it++ = ValueType(sp); |
| 340 | } |
| 341 | return c; |
| 342 | } |
| 343 | }; |
| 344 | |
| 345 | // Partial specialization for a std::vector<absl::string_view>. |
| 346 | // |
| 347 | // Optimized for the common case of splitting to a |
| 348 | // std::vector<absl::string_view>. In this case we first split the results to |
| 349 | // a small array of absl::string_view on the stack, to reduce reallocations. |
| 350 | template <typename A> |
| 351 | struct ConvertToContainer<std::vector<absl::string_view, A>, |
| 352 | absl::string_view, false> { |
| 353 | std::vector<absl::string_view, A> operator()( |
| 354 | const Splitter& splitter) const { |
| 355 | struct raw_view { |
| 356 | const char* data; |
| 357 | size_t size; |
| 358 | operator absl::string_view() const { // NOLINT(runtime/explicit) |
| 359 | return {data, size}; |
| 360 | } |
| 361 | }; |
| 362 | std::vector<absl::string_view, A> v; |
| 363 | std::array<raw_view, 16> ar; |
| 364 | for (auto it = splitter.begin(); !it.at_end();) { |
| 365 | size_t index = 0; |
| 366 | do { |
| 367 | ar[index].data = it->data(); |
| 368 | ar[index].size = it->size(); |
| 369 | ++it; |
| 370 | } while (++index != ar.size() && !it.at_end()); |
| 371 | v.insert(v.end(), ar.begin(), ar.begin() + index); |
| 372 | } |
| 373 | return v; |
| 374 | } |
| 375 | }; |
| 376 | |
| 377 | // Partial specialization for a std::vector<std::string>. |
| 378 | // |
| 379 | // Optimized for the common case of splitting to a std::vector<std::string>. |
| 380 | // In this case we first split the results to a std::vector<absl::string_view> |
| 381 | // so the returned std::vector<std::string> can have space reserved to avoid |
| 382 | // std::string moves. |
| 383 | template <typename A> |
| 384 | struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { |
| 385 | std::vector<std::string, A> operator()(const Splitter& splitter) const { |
| 386 | const std::vector<absl::string_view> v = splitter; |
| 387 | return std::vector<std::string, A>(v.begin(), v.end()); |
| 388 | } |
| 389 | }; |
| 390 | |
| 391 | // Partial specialization for containers of pairs (e.g., maps). |
| 392 | // |
| 393 | // The algorithm is to insert a new pair into the map for each even-numbered |
| 394 | // item, with the even-numbered item as the key with a default-constructed |
| 395 | // value. Each odd-numbered item will then be assigned to the last pair's |
| 396 | // value. |
| 397 | template <typename Container, typename First, typename Second> |
| 398 | struct ConvertToContainer<Container, std::pair<const First, Second>, true> { |
| 399 | Container operator()(const Splitter& splitter) const { |
| 400 | Container m; |
| 401 | typename Container::iterator it; |
| 402 | bool insert = true; |
| 403 | for (const auto sp : splitter) { |
| 404 | if (insert) { |
| 405 | it = Inserter<Container>::Insert(&m, First(sp), Second()); |
| 406 | } else { |
| 407 | it->second = Second(sp); |
| 408 | } |
| 409 | insert = !insert; |
| 410 | } |
| 411 | return m; |
| 412 | } |
| 413 | |
| 414 | // Inserts the key and value into the given map, returning an iterator to |
| 415 | // the inserted item. Specialized for std::map and std::multimap to use |
| 416 | // emplace() and adapt emplace()'s return value. |
| 417 | template <typename Map> |
| 418 | struct Inserter { |
| 419 | using M = Map; |
| 420 | template <typename... Args> |
| 421 | static typename M::iterator Insert(M* m, Args&&... args) { |
| 422 | return m->insert(std::make_pair(std::forward<Args>(args)...)).first; |
| 423 | } |
| 424 | }; |
| 425 | |
| 426 | template <typename... Ts> |
| 427 | struct Inserter<std::map<Ts...>> { |
| 428 | using M = std::map<Ts...>; |
| 429 | template <typename... Args> |
| 430 | static typename M::iterator Insert(M* m, Args&&... args) { |
| 431 | return m->emplace(std::make_pair(std::forward<Args>(args)...)).first; |
| 432 | } |
| 433 | }; |
| 434 | |
| 435 | template <typename... Ts> |
| 436 | struct Inserter<std::multimap<Ts...>> { |
| 437 | using M = std::multimap<Ts...>; |
| 438 | template <typename... Args> |
| 439 | static typename M::iterator Insert(M* m, Args&&... args) { |
| 440 | return m->emplace(std::make_pair(std::forward<Args>(args)...)); |
| 441 | } |
| 442 | }; |
| 443 | }; |
| 444 | |
| 445 | ConvertibleToStringView text_; |
| 446 | Delimiter delimiter_; |
| 447 | Predicate predicate_; |
| 448 | }; |
| 449 | |
| 450 | } // namespace strings_internal |
| 451 | } // namespace absl |
| 452 | |
| 453 | #endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ |