blob: a2f41c153131b1b87a88cb6802685ddcb6387ae2 [file] [log] [blame]
Austin Schuh36244a12019-09-21 17:52:38 -07001// Copyright 2017 The Abseil Authors.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// https://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16// This file declares INTERNAL parts of the Split API that are inline/templated
17// or otherwise need to be available at compile time. The main abstractions
18// defined in here are
19//
20// - ConvertibleToStringView
21// - SplitIterator<>
22// - Splitter<>
23//
24// DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including
25// absl/strings/str_split.h.
26//
27// IWYU pragma: private, include "absl/strings/str_split.h"
28
29#ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
30#define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_
31
32#include <array>
33#include <initializer_list>
34#include <iterator>
35#include <map>
36#include <type_traits>
37#include <utility>
38#include <vector>
39
40#include "absl/base/macros.h"
41#include "absl/base/port.h"
42#include "absl/meta/type_traits.h"
43#include "absl/strings/string_view.h"
44
45#ifdef _GLIBCXX_DEBUG
46#include "absl/strings/internal/stl_type_traits.h"
47#endif // _GLIBCXX_DEBUG
48
49namespace absl {
Austin Schuhb4691e92020-12-31 12:37:18 -080050ABSL_NAMESPACE_BEGIN
Austin Schuh36244a12019-09-21 17:52:38 -070051namespace strings_internal {
52
53// This class is implicitly constructible from everything that absl::string_view
Austin Schuhb4691e92020-12-31 12:37:18 -080054// is implicitly constructible from, except for rvalue strings. This means it
55// can be used as a function parameter in places where passing a temporary
56// string might cause memory lifetime issues.
Austin Schuh36244a12019-09-21 17:52:38 -070057class ConvertibleToStringView {
58 public:
59 ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit)
60 : value_(s) {}
61 ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit)
62 ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit)
63 : value_(s) {}
64 ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit)
65 : value_(s) {}
66
67 // Matches rvalue strings and moves their data to a member.
Austin Schuhb4691e92020-12-31 12:37:18 -080068 ConvertibleToStringView(std::string&& s) = delete;
69 ConvertibleToStringView(const std::string&& s) = delete;
Austin Schuh36244a12019-09-21 17:52:38 -070070
71 absl::string_view value() const { return value_; }
72
73 private:
Austin Schuh36244a12019-09-21 17:52:38 -070074 absl::string_view value_;
75};
76
77// An iterator that enumerates the parts of a string from a Splitter. The text
78// to be split, the Delimiter, and the Predicate are all taken from the given
79// Splitter object. Iterators may only be compared if they refer to the same
80// Splitter instance.
81//
82// This class is NOT part of the public splitting API.
83template <typename Splitter>
84class SplitIterator {
85 public:
86 using iterator_category = std::input_iterator_tag;
87 using value_type = absl::string_view;
88 using difference_type = ptrdiff_t;
89 using pointer = const value_type*;
90 using reference = const value_type&;
91
92 enum State { kInitState, kLastState, kEndState };
93 SplitIterator(State state, const Splitter* splitter)
94 : pos_(0),
95 state_(state),
96 splitter_(splitter),
97 delimiter_(splitter->delimiter()),
98 predicate_(splitter->predicate()) {
99 // Hack to maintain backward compatibility. This one block makes it so an
100 // empty absl::string_view whose .data() happens to be nullptr behaves
101 // *differently* from an otherwise empty absl::string_view whose .data() is
102 // not nullptr. This is an undesirable difference in general, but this
103 // behavior is maintained to avoid breaking existing code that happens to
104 // depend on this old behavior/bug. Perhaps it will be fixed one day. The
105 // difference in behavior is as follows:
106 // Split(absl::string_view(""), '-'); // {""}
107 // Split(absl::string_view(), '-'); // {}
108 if (splitter_->text().data() == nullptr) {
109 state_ = kEndState;
110 pos_ = splitter_->text().size();
111 return;
112 }
113
114 if (state_ == kEndState) {
115 pos_ = splitter_->text().size();
116 } else {
117 ++(*this);
118 }
119 }
120
121 bool at_end() const { return state_ == kEndState; }
122
123 reference operator*() const { return curr_; }
124 pointer operator->() const { return &curr_; }
125
126 SplitIterator& operator++() {
127 do {
128 if (state_ == kLastState) {
129 state_ = kEndState;
130 return *this;
131 }
132 const absl::string_view text = splitter_->text();
133 const absl::string_view d = delimiter_.Find(text, pos_);
134 if (d.data() == text.data() + text.size()) state_ = kLastState;
135 curr_ = text.substr(pos_, d.data() - (text.data() + pos_));
136 pos_ += curr_.size() + d.size();
137 } while (!predicate_(curr_));
138 return *this;
139 }
140
141 SplitIterator operator++(int) {
142 SplitIterator old(*this);
143 ++(*this);
144 return old;
145 }
146
147 friend bool operator==(const SplitIterator& a, const SplitIterator& b) {
148 return a.state_ == b.state_ && a.pos_ == b.pos_;
149 }
150
151 friend bool operator!=(const SplitIterator& a, const SplitIterator& b) {
152 return !(a == b);
153 }
154
155 private:
156 size_t pos_;
157 State state_;
158 absl::string_view curr_;
159 const Splitter* splitter_;
160 typename Splitter::DelimiterType delimiter_;
161 typename Splitter::PredicateType predicate_;
162};
163
164// HasMappedType<T>::value is true iff there exists a type T::mapped_type.
165template <typename T, typename = void>
166struct HasMappedType : std::false_type {};
167template <typename T>
168struct HasMappedType<T, absl::void_t<typename T::mapped_type>>
169 : std::true_type {};
170
171// HasValueType<T>::value is true iff there exists a type T::value_type.
172template <typename T, typename = void>
173struct HasValueType : std::false_type {};
174template <typename T>
175struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type {
176};
177
178// HasConstIterator<T>::value is true iff there exists a type T::const_iterator.
179template <typename T, typename = void>
180struct HasConstIterator : std::false_type {};
181template <typename T>
182struct HasConstIterator<T, absl::void_t<typename T::const_iterator>>
183 : std::true_type {};
184
185// IsInitializerList<T>::value is true iff T is an std::initializer_list. More
186// details below in Splitter<> where this is used.
187std::false_type IsInitializerListDispatch(...); // default: No
188template <typename T>
189std::true_type IsInitializerListDispatch(std::initializer_list<T>*);
190template <typename T>
191struct IsInitializerList
192 : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {};
193
194// A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition
195// is true for type 'C'.
196//
197// Restricts conversion to container-like types (by testing for the presence of
198// a const_iterator member type) and also to disable conversion to an
199// std::initializer_list (which also has a const_iterator). Otherwise, code
200// compiled in C++11 will get an error due to ambiguous conversion paths (in
201// C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T>
202// or an std::initializer_list<T>).
203
204template <typename C, bool has_value_type, bool has_mapped_type>
205struct SplitterIsConvertibleToImpl : std::false_type {};
206
207template <typename C>
208struct SplitterIsConvertibleToImpl<C, true, false>
209 : std::is_constructible<typename C::value_type, absl::string_view> {};
210
211template <typename C>
212struct SplitterIsConvertibleToImpl<C, true, true>
213 : absl::conjunction<
214 std::is_constructible<typename C::key_type, absl::string_view>,
215 std::is_constructible<typename C::mapped_type, absl::string_view>> {};
216
217template <typename C>
218struct SplitterIsConvertibleTo
219 : SplitterIsConvertibleToImpl<
220 C,
221#ifdef _GLIBCXX_DEBUG
222 !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value &&
223#endif // _GLIBCXX_DEBUG
224 !IsInitializerList<
225 typename std::remove_reference<C>::type>::value &&
226 HasValueType<C>::value && HasConstIterator<C>::value,
227 HasMappedType<C>::value> {
228};
229
230// This class implements the range that is returned by absl::StrSplit(). This
231// class has templated conversion operators that allow it to be implicitly
232// converted to a variety of types that the caller may have specified on the
233// left-hand side of an assignment.
234//
235// The main interface for interacting with this class is through its implicit
236// conversion operators. However, this class may also be used like a container
237// in that it has .begin() and .end() member functions. It may also be used
238// within a range-for loop.
239//
240// Output containers can be collections of any type that is constructible from
241// an absl::string_view.
242//
243// An Predicate functor may be supplied. This predicate will be used to filter
244// the split strings: only strings for which the predicate returns true will be
245// kept. A Predicate object is any unary functor that takes an absl::string_view
246// and returns bool.
Austin Schuhb4691e92020-12-31 12:37:18 -0800247//
248// The StringType parameter can be either string_view or string, depending on
249// whether the Splitter refers to a string stored elsewhere, or if the string
250// resides inside the Splitter itself.
251template <typename Delimiter, typename Predicate, typename StringType>
Austin Schuh36244a12019-09-21 17:52:38 -0700252class Splitter {
253 public:
254 using DelimiterType = Delimiter;
255 using PredicateType = Predicate;
256 using const_iterator = strings_internal::SplitIterator<Splitter>;
257 using value_type = typename std::iterator_traits<const_iterator>::value_type;
258
Austin Schuhb4691e92020-12-31 12:37:18 -0800259 Splitter(StringType input_text, Delimiter d, Predicate p)
Austin Schuh36244a12019-09-21 17:52:38 -0700260 : text_(std::move(input_text)),
261 delimiter_(std::move(d)),
262 predicate_(std::move(p)) {}
263
Austin Schuhb4691e92020-12-31 12:37:18 -0800264 absl::string_view text() const { return text_; }
Austin Schuh36244a12019-09-21 17:52:38 -0700265 const Delimiter& delimiter() const { return delimiter_; }
266 const Predicate& predicate() const { return predicate_; }
267
268 // Range functions that iterate the split substrings as absl::string_view
269 // objects. These methods enable a Splitter to be used in a range-based for
270 // loop.
271 const_iterator begin() const { return {const_iterator::kInitState, this}; }
272 const_iterator end() const { return {const_iterator::kEndState, this}; }
273
274 // An implicit conversion operator that is restricted to only those containers
275 // that the splitter is convertible to.
276 template <typename Container,
277 typename = typename std::enable_if<
278 SplitterIsConvertibleTo<Container>::value>::type>
279 operator Container() const { // NOLINT(runtime/explicit)
280 return ConvertToContainer<Container, typename Container::value_type,
281 HasMappedType<Container>::value>()(*this);
282 }
283
284 // Returns a pair with its .first and .second members set to the first two
285 // strings returned by the begin() iterator. Either/both of .first and .second
286 // will be constructed with empty strings if the iterator doesn't have a
287 // corresponding value.
288 template <typename First, typename Second>
289 operator std::pair<First, Second>() const { // NOLINT(runtime/explicit)
290 absl::string_view first, second;
291 auto it = begin();
292 if (it != end()) {
293 first = *it;
294 if (++it != end()) {
295 second = *it;
296 }
297 }
298 return {First(first), Second(second)};
299 }
300
301 private:
302 // ConvertToContainer is a functor converting a Splitter to the requested
303 // Container of ValueType. It is specialized below to optimize splitting to
304 // certain combinations of Container and ValueType.
305 //
306 // This base template handles the generic case of storing the split results in
307 // the requested non-map-like container and converting the split substrings to
308 // the requested type.
309 template <typename Container, typename ValueType, bool is_map = false>
310 struct ConvertToContainer {
311 Container operator()(const Splitter& splitter) const {
312 Container c;
313 auto it = std::inserter(c, c.end());
Austin Schuhb4691e92020-12-31 12:37:18 -0800314 for (const auto& sp : splitter) {
Austin Schuh36244a12019-09-21 17:52:38 -0700315 *it++ = ValueType(sp);
316 }
317 return c;
318 }
319 };
320
321 // Partial specialization for a std::vector<absl::string_view>.
322 //
323 // Optimized for the common case of splitting to a
324 // std::vector<absl::string_view>. In this case we first split the results to
325 // a small array of absl::string_view on the stack, to reduce reallocations.
326 template <typename A>
327 struct ConvertToContainer<std::vector<absl::string_view, A>,
328 absl::string_view, false> {
329 std::vector<absl::string_view, A> operator()(
330 const Splitter& splitter) const {
331 struct raw_view {
332 const char* data;
333 size_t size;
334 operator absl::string_view() const { // NOLINT(runtime/explicit)
335 return {data, size};
336 }
337 };
338 std::vector<absl::string_view, A> v;
339 std::array<raw_view, 16> ar;
340 for (auto it = splitter.begin(); !it.at_end();) {
341 size_t index = 0;
342 do {
343 ar[index].data = it->data();
344 ar[index].size = it->size();
345 ++it;
346 } while (++index != ar.size() && !it.at_end());
347 v.insert(v.end(), ar.begin(), ar.begin() + index);
348 }
349 return v;
350 }
351 };
352
353 // Partial specialization for a std::vector<std::string>.
354 //
355 // Optimized for the common case of splitting to a std::vector<std::string>.
356 // In this case we first split the results to a std::vector<absl::string_view>
357 // so the returned std::vector<std::string> can have space reserved to avoid
358 // std::string moves.
359 template <typename A>
360 struct ConvertToContainer<std::vector<std::string, A>, std::string, false> {
361 std::vector<std::string, A> operator()(const Splitter& splitter) const {
362 const std::vector<absl::string_view> v = splitter;
363 return std::vector<std::string, A>(v.begin(), v.end());
364 }
365 };
366
367 // Partial specialization for containers of pairs (e.g., maps).
368 //
369 // The algorithm is to insert a new pair into the map for each even-numbered
370 // item, with the even-numbered item as the key with a default-constructed
371 // value. Each odd-numbered item will then be assigned to the last pair's
372 // value.
373 template <typename Container, typename First, typename Second>
374 struct ConvertToContainer<Container, std::pair<const First, Second>, true> {
375 Container operator()(const Splitter& splitter) const {
376 Container m;
377 typename Container::iterator it;
378 bool insert = true;
Austin Schuhb4691e92020-12-31 12:37:18 -0800379 for (const auto& sp : splitter) {
Austin Schuh36244a12019-09-21 17:52:38 -0700380 if (insert) {
381 it = Inserter<Container>::Insert(&m, First(sp), Second());
382 } else {
383 it->second = Second(sp);
384 }
385 insert = !insert;
386 }
387 return m;
388 }
389
390 // Inserts the key and value into the given map, returning an iterator to
391 // the inserted item. Specialized for std::map and std::multimap to use
392 // emplace() and adapt emplace()'s return value.
393 template <typename Map>
394 struct Inserter {
395 using M = Map;
396 template <typename... Args>
397 static typename M::iterator Insert(M* m, Args&&... args) {
398 return m->insert(std::make_pair(std::forward<Args>(args)...)).first;
399 }
400 };
401
402 template <typename... Ts>
403 struct Inserter<std::map<Ts...>> {
404 using M = std::map<Ts...>;
405 template <typename... Args>
406 static typename M::iterator Insert(M* m, Args&&... args) {
407 return m->emplace(std::make_pair(std::forward<Args>(args)...)).first;
408 }
409 };
410
411 template <typename... Ts>
412 struct Inserter<std::multimap<Ts...>> {
413 using M = std::multimap<Ts...>;
414 template <typename... Args>
415 static typename M::iterator Insert(M* m, Args&&... args) {
416 return m->emplace(std::make_pair(std::forward<Args>(args)...));
417 }
418 };
419 };
420
Austin Schuhb4691e92020-12-31 12:37:18 -0800421 StringType text_;
Austin Schuh36244a12019-09-21 17:52:38 -0700422 Delimiter delimiter_;
423 Predicate predicate_;
424};
425
426} // namespace strings_internal
Austin Schuhb4691e92020-12-31 12:37:18 -0800427ABSL_NAMESPACE_END
Austin Schuh36244a12019-09-21 17:52:38 -0700428} // namespace absl
429
430#endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_