Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame^] | 1 | // Copyright 2017 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/strings/str_split.h" |
| 16 | |
| 17 | #include <deque> |
| 18 | #include <initializer_list> |
| 19 | #include <list> |
| 20 | #include <map> |
| 21 | #include <memory> |
| 22 | #include <string> |
| 23 | #include <type_traits> |
| 24 | #include <unordered_map> |
| 25 | #include <unordered_set> |
| 26 | #include <vector> |
| 27 | |
| 28 | #include "gmock/gmock.h" |
| 29 | #include "gtest/gtest.h" |
| 30 | #include "absl/base/dynamic_annotations.h" // for RunningOnValgrind |
| 31 | #include "absl/base/macros.h" |
| 32 | #include "absl/strings/numbers.h" |
| 33 | |
| 34 | namespace { |
| 35 | |
| 36 | using ::testing::ElementsAre; |
| 37 | using ::testing::Pair; |
| 38 | using ::testing::UnorderedElementsAre; |
| 39 | |
| 40 | TEST(Split, TraitsTest) { |
| 41 | static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value, |
| 42 | ""); |
| 43 | static_assert( |
| 44 | !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, ""); |
| 45 | static_assert(absl::strings_internal::SplitterIsConvertibleTo< |
| 46 | std::vector<std::string>>::value, |
| 47 | ""); |
| 48 | static_assert( |
| 49 | !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value, |
| 50 | ""); |
| 51 | static_assert(absl::strings_internal::SplitterIsConvertibleTo< |
| 52 | std::vector<absl::string_view>>::value, |
| 53 | ""); |
| 54 | static_assert(absl::strings_internal::SplitterIsConvertibleTo< |
| 55 | std::map<std::string, std::string>>::value, |
| 56 | ""); |
| 57 | static_assert(absl::strings_internal::SplitterIsConvertibleTo< |
| 58 | std::map<absl::string_view, absl::string_view>>::value, |
| 59 | ""); |
| 60 | static_assert(!absl::strings_internal::SplitterIsConvertibleTo< |
| 61 | std::map<int, std::string>>::value, |
| 62 | ""); |
| 63 | static_assert(!absl::strings_internal::SplitterIsConvertibleTo< |
| 64 | std::map<std::string, int>>::value, |
| 65 | ""); |
| 66 | } |
| 67 | |
| 68 | // This tests the overall split API, which is made up of the absl::StrSplit() |
| 69 | // function and the Delimiter objects in the absl:: namespace. |
| 70 | // This TEST macro is outside of any namespace to require full specification of |
| 71 | // namespaces just like callers will need to use. |
| 72 | TEST(Split, APIExamples) { |
| 73 | { |
| 74 | // Passes std::string delimiter. Assumes the default of ByString. |
| 75 | std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT |
| 76 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 77 | |
| 78 | // Equivalent to... |
| 79 | using absl::ByString; |
| 80 | v = absl::StrSplit("a,b,c", ByString(",")); |
| 81 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 82 | |
| 83 | // Equivalent to... |
| 84 | EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")), |
| 85 | ElementsAre("a", "b", "c")); |
| 86 | } |
| 87 | |
| 88 | { |
| 89 | // Same as above, but using a single character as the delimiter. |
| 90 | std::vector<std::string> v = absl::StrSplit("a,b,c", ','); |
| 91 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 92 | |
| 93 | // Equivalent to... |
| 94 | using absl::ByChar; |
| 95 | v = absl::StrSplit("a,b,c", ByChar(',')); |
| 96 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 97 | } |
| 98 | |
| 99 | { |
| 100 | // Uses the Literal std::string "=>" as the delimiter. |
| 101 | const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>"); |
| 102 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 103 | } |
| 104 | |
| 105 | { |
| 106 | // The substrings are returned as string_views, eliminating copying. |
| 107 | std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); |
| 108 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 109 | } |
| 110 | |
| 111 | { |
| 112 | // Leading and trailing empty substrings. |
| 113 | std::vector<std::string> v = absl::StrSplit(",a,b,c,", ','); |
| 114 | EXPECT_THAT(v, ElementsAre("", "a", "b", "c", "")); |
| 115 | } |
| 116 | |
| 117 | { |
| 118 | // Splits on a delimiter that is not found. |
| 119 | std::vector<std::string> v = absl::StrSplit("abc", ','); |
| 120 | EXPECT_THAT(v, ElementsAre("abc")); |
| 121 | } |
| 122 | |
| 123 | { |
| 124 | // Splits the input std::string into individual characters by using an empty |
| 125 | // std::string as the delimiter. |
| 126 | std::vector<std::string> v = absl::StrSplit("abc", ""); |
| 127 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 128 | } |
| 129 | |
| 130 | { |
| 131 | // Splits std::string data with embedded NUL characters, using NUL as the |
| 132 | // delimiter. A simple delimiter of "\0" doesn't work because strlen() will |
| 133 | // say that's the empty std::string when constructing the absl::string_view |
| 134 | // delimiter. Instead, a non-empty std::string containing NUL can be used as the |
| 135 | // delimiter. |
| 136 | std::string embedded_nulls("a\0b\0c", 5); |
| 137 | std::string null_delim("\0", 1); |
| 138 | std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim); |
| 139 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 140 | } |
| 141 | |
| 142 | { |
| 143 | // Stores first two split strings as the members in a std::pair. |
| 144 | std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); |
| 145 | EXPECT_EQ("a", p.first); |
| 146 | EXPECT_EQ("b", p.second); |
| 147 | // "c" is omitted because std::pair can hold only two elements. |
| 148 | } |
| 149 | |
| 150 | { |
| 151 | // Results stored in std::set<std::string> |
| 152 | std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ','); |
| 153 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 154 | } |
| 155 | |
| 156 | { |
| 157 | // Uses a non-const char* delimiter. |
| 158 | char a[] = ","; |
| 159 | char* d = a + 0; |
| 160 | std::vector<std::string> v = absl::StrSplit("a,b,c", d); |
| 161 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 162 | } |
| 163 | |
| 164 | { |
| 165 | // Results split using either of , or ; |
| 166 | using absl::ByAnyChar; |
| 167 | std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;")); |
| 168 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 169 | } |
| 170 | |
| 171 | { |
| 172 | // Uses the SkipWhitespace predicate. |
| 173 | using absl::SkipWhitespace; |
| 174 | std::vector<std::string> v = |
| 175 | absl::StrSplit(" a , ,,b,", ',', SkipWhitespace()); |
| 176 | EXPECT_THAT(v, ElementsAre(" a ", "b")); |
| 177 | } |
| 178 | |
| 179 | { |
| 180 | // Uses the ByLength delimiter. |
| 181 | using absl::ByLength; |
| 182 | std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3)); |
| 183 | EXPECT_THAT(v, ElementsAre("abc", "def", "g")); |
| 184 | } |
| 185 | |
| 186 | { |
| 187 | // Different forms of initialization / conversion. |
| 188 | std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); |
| 189 | EXPECT_THAT(v1, ElementsAre("a", "b", "c")); |
| 190 | std::vector<std::string> v2(absl::StrSplit("a,b,c", ',')); |
| 191 | EXPECT_THAT(v2, ElementsAre("a", "b", "c")); |
| 192 | auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ',')); |
| 193 | EXPECT_THAT(v3, ElementsAre("a", "b", "c")); |
| 194 | v3 = absl::StrSplit("a,b,c", ','); |
| 195 | EXPECT_THAT(v3, ElementsAre("a", "b", "c")); |
| 196 | } |
| 197 | |
| 198 | { |
| 199 | // Results stored in a std::map. |
| 200 | std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ','); |
| 201 | EXPECT_EQ(2, m.size()); |
| 202 | EXPECT_EQ("3", m["a"]); |
| 203 | EXPECT_EQ("2", m["b"]); |
| 204 | } |
| 205 | |
| 206 | { |
| 207 | // Results stored in a std::multimap. |
| 208 | std::multimap<std::string, std::string> m = |
| 209 | absl::StrSplit("a,1,b,2,a,3", ','); |
| 210 | EXPECT_EQ(3, m.size()); |
| 211 | auto it = m.find("a"); |
| 212 | EXPECT_EQ("1", it->second); |
| 213 | ++it; |
| 214 | EXPECT_EQ("3", it->second); |
| 215 | it = m.find("b"); |
| 216 | EXPECT_EQ("2", it->second); |
| 217 | } |
| 218 | |
| 219 | { |
| 220 | // Demonstrates use in a range-based for loop in C++11. |
| 221 | std::string s = "x,x,x,x,x,x,x"; |
| 222 | for (absl::string_view sp : absl::StrSplit(s, ',')) { |
| 223 | EXPECT_EQ("x", sp); |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | { |
| 228 | // Demonstrates use with a Predicate in a range-based for loop. |
| 229 | using absl::SkipWhitespace; |
| 230 | std::string s = " ,x,,x,,x,x,x,,"; |
| 231 | for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) { |
| 232 | EXPECT_EQ("x", sp); |
| 233 | } |
| 234 | } |
| 235 | |
| 236 | { |
| 237 | // Demonstrates a "smart" split to std::map using two separate calls to |
| 238 | // absl::StrSplit. One call to split the records, and another call to split |
| 239 | // the keys and values. This also uses the Limit delimiter so that the |
| 240 | // std::string "a=b=c" will split to "a" -> "b=c". |
| 241 | std::map<std::string, std::string> m; |
| 242 | for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { |
| 243 | m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); |
| 244 | } |
| 245 | EXPECT_EQ("b=c", m.find("a")->second); |
| 246 | EXPECT_EQ("e", m.find("d")->second); |
| 247 | EXPECT_EQ("", m.find("f")->second); |
| 248 | EXPECT_EQ("", m.find("g")->second); |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | // |
| 253 | // Tests for SplitIterator |
| 254 | // |
| 255 | |
| 256 | TEST(SplitIterator, Basics) { |
| 257 | auto splitter = absl::StrSplit("a,b", ','); |
| 258 | auto it = splitter.begin(); |
| 259 | auto end = splitter.end(); |
| 260 | |
| 261 | EXPECT_NE(it, end); |
| 262 | EXPECT_EQ("a", *it); // tests dereference |
| 263 | ++it; // tests preincrement |
| 264 | EXPECT_NE(it, end); |
| 265 | EXPECT_EQ("b", |
| 266 | std::string(it->data(), it->size())); // tests dereference as ptr |
| 267 | it++; // tests postincrement |
| 268 | EXPECT_EQ(it, end); |
| 269 | } |
| 270 | |
| 271 | // Simple Predicate to skip a particular string. |
| 272 | class Skip { |
| 273 | public: |
| 274 | explicit Skip(const std::string& s) : s_(s) {} |
| 275 | bool operator()(absl::string_view sp) { return sp != s_; } |
| 276 | |
| 277 | private: |
| 278 | std::string s_; |
| 279 | }; |
| 280 | |
| 281 | TEST(SplitIterator, Predicate) { |
| 282 | auto splitter = absl::StrSplit("a,b,c", ',', Skip("b")); |
| 283 | auto it = splitter.begin(); |
| 284 | auto end = splitter.end(); |
| 285 | |
| 286 | EXPECT_NE(it, end); |
| 287 | EXPECT_EQ("a", *it); // tests dereference |
| 288 | ++it; // tests preincrement -- "b" should be skipped here. |
| 289 | EXPECT_NE(it, end); |
| 290 | EXPECT_EQ("c", |
| 291 | std::string(it->data(), it->size())); // tests dereference as ptr |
| 292 | it++; // tests postincrement |
| 293 | EXPECT_EQ(it, end); |
| 294 | } |
| 295 | |
| 296 | TEST(SplitIterator, EdgeCases) { |
| 297 | // Expected input and output, assuming a delimiter of ',' |
| 298 | struct { |
| 299 | std::string in; |
| 300 | std::vector<std::string> expect; |
| 301 | } specs[] = { |
| 302 | {"", {""}}, |
| 303 | {"foo", {"foo"}}, |
| 304 | {",", {"", ""}}, |
| 305 | {",foo", {"", "foo"}}, |
| 306 | {"foo,", {"foo", ""}}, |
| 307 | {",foo,", {"", "foo", ""}}, |
| 308 | {"foo,bar", {"foo", "bar"}}, |
| 309 | }; |
| 310 | |
| 311 | for (const auto& spec : specs) { |
| 312 | SCOPED_TRACE(spec.in); |
| 313 | auto splitter = absl::StrSplit(spec.in, ','); |
| 314 | auto it = splitter.begin(); |
| 315 | auto end = splitter.end(); |
| 316 | for (const auto& expected : spec.expect) { |
| 317 | EXPECT_NE(it, end); |
| 318 | EXPECT_EQ(expected, *it++); |
| 319 | } |
| 320 | EXPECT_EQ(it, end); |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | TEST(Splitter, Const) { |
| 325 | const auto splitter = absl::StrSplit("a,b,c", ','); |
| 326 | EXPECT_THAT(splitter, ElementsAre("a", "b", "c")); |
| 327 | } |
| 328 | |
| 329 | TEST(Split, EmptyAndNull) { |
| 330 | // Attention: Splitting a null absl::string_view is different than splitting |
| 331 | // an empty absl::string_view even though both string_views are considered |
| 332 | // equal. This behavior is likely surprising and undesirable. However, to |
| 333 | // maintain backward compatibility, there is a small "hack" in |
| 334 | // str_split_internal.h that preserves this behavior. If that behavior is ever |
| 335 | // changed/fixed, this test will need to be updated. |
| 336 | EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre("")); |
| 337 | EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre()); |
| 338 | } |
| 339 | |
| 340 | TEST(SplitIterator, EqualityAsEndCondition) { |
| 341 | auto splitter = absl::StrSplit("a,b,c", ','); |
| 342 | auto it = splitter.begin(); |
| 343 | auto it2 = it; |
| 344 | |
| 345 | // Increments it2 twice to point to "c" in the input text. |
| 346 | ++it2; |
| 347 | ++it2; |
| 348 | EXPECT_EQ("c", *it2); |
| 349 | |
| 350 | // This test uses a non-end SplitIterator as the terminating condition in a |
| 351 | // for loop. This relies on SplitIterator equality for non-end SplitIterators |
| 352 | // working correctly. At this point it2 points to "c", and we use that as the |
| 353 | // "end" condition in this test. |
| 354 | std::vector<absl::string_view> v; |
| 355 | for (; it != it2; ++it) { |
| 356 | v.push_back(*it); |
| 357 | } |
| 358 | EXPECT_THAT(v, ElementsAre("a", "b")); |
| 359 | } |
| 360 | |
| 361 | // |
| 362 | // Tests for Splitter |
| 363 | // |
| 364 | |
| 365 | TEST(Splitter, RangeIterators) { |
| 366 | auto splitter = absl::StrSplit("a,b,c", ','); |
| 367 | std::vector<absl::string_view> output; |
| 368 | for (const absl::string_view p : splitter) { |
| 369 | output.push_back(p); |
| 370 | } |
| 371 | EXPECT_THAT(output, ElementsAre("a", "b", "c")); |
| 372 | } |
| 373 | |
| 374 | // Some template functions for use in testing conversion operators |
| 375 | template <typename ContainerType, typename Splitter> |
| 376 | void TestConversionOperator(const Splitter& splitter) { |
| 377 | ContainerType output = splitter; |
| 378 | EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d")); |
| 379 | } |
| 380 | |
| 381 | template <typename MapType, typename Splitter> |
| 382 | void TestMapConversionOperator(const Splitter& splitter) { |
| 383 | MapType m = splitter; |
| 384 | EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d"))); |
| 385 | } |
| 386 | |
| 387 | template <typename FirstType, typename SecondType, typename Splitter> |
| 388 | void TestPairConversionOperator(const Splitter& splitter) { |
| 389 | std::pair<FirstType, SecondType> p = splitter; |
| 390 | EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b"))); |
| 391 | } |
| 392 | |
| 393 | TEST(Splitter, ConversionOperator) { |
| 394 | auto splitter = absl::StrSplit("a,b,c,d", ','); |
| 395 | |
| 396 | TestConversionOperator<std::vector<absl::string_view>>(splitter); |
| 397 | TestConversionOperator<std::vector<std::string>>(splitter); |
| 398 | TestConversionOperator<std::list<absl::string_view>>(splitter); |
| 399 | TestConversionOperator<std::list<std::string>>(splitter); |
| 400 | TestConversionOperator<std::deque<absl::string_view>>(splitter); |
| 401 | TestConversionOperator<std::deque<std::string>>(splitter); |
| 402 | TestConversionOperator<std::set<absl::string_view>>(splitter); |
| 403 | TestConversionOperator<std::set<std::string>>(splitter); |
| 404 | TestConversionOperator<std::multiset<absl::string_view>>(splitter); |
| 405 | TestConversionOperator<std::multiset<std::string>>(splitter); |
| 406 | TestConversionOperator<std::unordered_set<std::string>>(splitter); |
| 407 | |
| 408 | // Tests conversion to map-like objects. |
| 409 | |
| 410 | TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>( |
| 411 | splitter); |
| 412 | TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter); |
| 413 | TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter); |
| 414 | TestMapConversionOperator<std::map<std::string, std::string>>(splitter); |
| 415 | TestMapConversionOperator< |
| 416 | std::multimap<absl::string_view, absl::string_view>>(splitter); |
| 417 | TestMapConversionOperator<std::multimap<absl::string_view, std::string>>( |
| 418 | splitter); |
| 419 | TestMapConversionOperator<std::multimap<std::string, absl::string_view>>( |
| 420 | splitter); |
| 421 | TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter); |
| 422 | TestMapConversionOperator<std::unordered_map<std::string, std::string>>( |
| 423 | splitter); |
| 424 | |
| 425 | // Tests conversion to std::pair |
| 426 | |
| 427 | TestPairConversionOperator<absl::string_view, absl::string_view>(splitter); |
| 428 | TestPairConversionOperator<absl::string_view, std::string>(splitter); |
| 429 | TestPairConversionOperator<std::string, absl::string_view>(splitter); |
| 430 | TestPairConversionOperator<std::string, std::string>(splitter); |
| 431 | } |
| 432 | |
| 433 | // A few additional tests for conversion to std::pair. This conversion is |
| 434 | // different from others because a std::pair always has exactly two elements: |
| 435 | // .first and .second. The split has to work even when the split has |
| 436 | // less-than, equal-to, and more-than 2 strings. |
| 437 | TEST(Splitter, ToPair) { |
| 438 | { |
| 439 | // Empty std::string |
| 440 | std::pair<std::string, std::string> p = absl::StrSplit("", ','); |
| 441 | EXPECT_EQ("", p.first); |
| 442 | EXPECT_EQ("", p.second); |
| 443 | } |
| 444 | |
| 445 | { |
| 446 | // Only first |
| 447 | std::pair<std::string, std::string> p = absl::StrSplit("a", ','); |
| 448 | EXPECT_EQ("a", p.first); |
| 449 | EXPECT_EQ("", p.second); |
| 450 | } |
| 451 | |
| 452 | { |
| 453 | // Only second |
| 454 | std::pair<std::string, std::string> p = absl::StrSplit(",b", ','); |
| 455 | EXPECT_EQ("", p.first); |
| 456 | EXPECT_EQ("b", p.second); |
| 457 | } |
| 458 | |
| 459 | { |
| 460 | // First and second. |
| 461 | std::pair<std::string, std::string> p = absl::StrSplit("a,b", ','); |
| 462 | EXPECT_EQ("a", p.first); |
| 463 | EXPECT_EQ("b", p.second); |
| 464 | } |
| 465 | |
| 466 | { |
| 467 | // First and second and then more stuff that will be ignored. |
| 468 | std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); |
| 469 | EXPECT_EQ("a", p.first); |
| 470 | EXPECT_EQ("b", p.second); |
| 471 | // "c" is omitted. |
| 472 | } |
| 473 | } |
| 474 | |
| 475 | TEST(Splitter, Predicates) { |
| 476 | static const char kTestChars[] = ",a, ,b,"; |
| 477 | using absl::AllowEmpty; |
| 478 | using absl::SkipEmpty; |
| 479 | using absl::SkipWhitespace; |
| 480 | |
| 481 | { |
| 482 | // No predicate. Does not skip empties. |
| 483 | auto splitter = absl::StrSplit(kTestChars, ','); |
| 484 | std::vector<std::string> v = splitter; |
| 485 | EXPECT_THAT(v, ElementsAre("", "a", " ", "b", "")); |
| 486 | } |
| 487 | |
| 488 | { |
| 489 | // Allows empty strings. Same behavior as no predicate at all. |
| 490 | auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty()); |
| 491 | std::vector<std::string> v_allowempty = splitter; |
| 492 | EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", "")); |
| 493 | |
| 494 | // Ensures AllowEmpty equals the behavior with no predicate. |
| 495 | auto splitter_nopredicate = absl::StrSplit(kTestChars, ','); |
| 496 | std::vector<std::string> v_nopredicate = splitter_nopredicate; |
| 497 | EXPECT_EQ(v_allowempty, v_nopredicate); |
| 498 | } |
| 499 | |
| 500 | { |
| 501 | // Skips empty strings. |
| 502 | auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty()); |
| 503 | std::vector<std::string> v = splitter; |
| 504 | EXPECT_THAT(v, ElementsAre("a", " ", "b")); |
| 505 | } |
| 506 | |
| 507 | { |
| 508 | // Skips empty and all-whitespace strings. |
| 509 | auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace()); |
| 510 | std::vector<std::string> v = splitter; |
| 511 | EXPECT_THAT(v, ElementsAre("a", "b")); |
| 512 | } |
| 513 | } |
| 514 | |
| 515 | // |
| 516 | // Tests for StrSplit() |
| 517 | // |
| 518 | |
| 519 | TEST(Split, Basics) { |
| 520 | { |
| 521 | // Doesn't really do anything useful because the return value is ignored, |
| 522 | // but it should work. |
| 523 | absl::StrSplit("a,b,c", ','); |
| 524 | } |
| 525 | |
| 526 | { |
| 527 | std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); |
| 528 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 529 | } |
| 530 | |
| 531 | { |
| 532 | std::vector<std::string> v = absl::StrSplit("a,b,c", ','); |
| 533 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 534 | } |
| 535 | |
| 536 | { |
| 537 | // Ensures that assignment works. This requires a little extra work with |
| 538 | // C++11 because of overloads with initializer_list. |
| 539 | std::vector<std::string> v; |
| 540 | v = absl::StrSplit("a,b,c", ','); |
| 541 | |
| 542 | EXPECT_THAT(v, ElementsAre("a", "b", "c")); |
| 543 | std::map<std::string, std::string> m; |
| 544 | m = absl::StrSplit("a,b,c", ','); |
| 545 | EXPECT_EQ(2, m.size()); |
| 546 | std::unordered_map<std::string, std::string> hm; |
| 547 | hm = absl::StrSplit("a,b,c", ','); |
| 548 | EXPECT_EQ(2, hm.size()); |
| 549 | } |
| 550 | } |
| 551 | |
| 552 | absl::string_view ReturnStringView() { return "Hello World"; } |
| 553 | const char* ReturnConstCharP() { return "Hello World"; } |
| 554 | char* ReturnCharP() { return const_cast<char*>("Hello World"); } |
| 555 | |
| 556 | TEST(Split, AcceptsCertainTemporaries) { |
| 557 | std::vector<std::string> v; |
| 558 | v = absl::StrSplit(ReturnStringView(), ' '); |
| 559 | EXPECT_THAT(v, ElementsAre("Hello", "World")); |
| 560 | v = absl::StrSplit(ReturnConstCharP(), ' '); |
| 561 | EXPECT_THAT(v, ElementsAre("Hello", "World")); |
| 562 | v = absl::StrSplit(ReturnCharP(), ' '); |
| 563 | EXPECT_THAT(v, ElementsAre("Hello", "World")); |
| 564 | } |
| 565 | |
| 566 | TEST(Split, Temporary) { |
| 567 | // Use a std::string longer than the SSO length, so that when the temporary is |
| 568 | // destroyed, if the splitter keeps a reference to the std::string's contents, |
| 569 | // it'll reference freed memory instead of just dead on-stack memory. |
| 570 | const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u"; |
| 571 | EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input)) |
| 572 | << "Input should be larger than fits on the stack."; |
| 573 | |
| 574 | // This happens more often in C++11 as part of a range-based for loop. |
| 575 | auto splitter = absl::StrSplit(std::string(input), ','); |
| 576 | std::string expected = "a"; |
| 577 | for (absl::string_view letter : splitter) { |
| 578 | EXPECT_EQ(expected, letter); |
| 579 | ++expected[0]; |
| 580 | } |
| 581 | EXPECT_EQ("v", expected); |
| 582 | |
| 583 | // This happens more often in C++11 as part of a range-based for loop. |
| 584 | auto std_splitter = absl::StrSplit(std::string(input), ','); |
| 585 | expected = "a"; |
| 586 | for (absl::string_view letter : std_splitter) { |
| 587 | EXPECT_EQ(expected, letter); |
| 588 | ++expected[0]; |
| 589 | } |
| 590 | EXPECT_EQ("v", expected); |
| 591 | } |
| 592 | |
| 593 | template <typename T> |
| 594 | static std::unique_ptr<T> CopyToHeap(const T& value) { |
| 595 | return std::unique_ptr<T>(new T(value)); |
| 596 | } |
| 597 | |
| 598 | TEST(Split, LvalueCaptureIsCopyable) { |
| 599 | std::string input = "a,b"; |
| 600 | auto heap_splitter = CopyToHeap(absl::StrSplit(input, ',')); |
| 601 | auto stack_splitter = *heap_splitter; |
| 602 | heap_splitter.reset(); |
| 603 | std::vector<std::string> result = stack_splitter; |
| 604 | EXPECT_THAT(result, testing::ElementsAre("a", "b")); |
| 605 | } |
| 606 | |
| 607 | TEST(Split, TemporaryCaptureIsCopyable) { |
| 608 | auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ',')); |
| 609 | auto stack_splitter = *heap_splitter; |
| 610 | heap_splitter.reset(); |
| 611 | std::vector<std::string> result = stack_splitter; |
| 612 | EXPECT_THAT(result, testing::ElementsAre("a", "b")); |
| 613 | } |
| 614 | |
| 615 | TEST(Split, SplitterIsCopyableAndMoveable) { |
| 616 | auto a = absl::StrSplit("foo", '-'); |
| 617 | |
| 618 | // Ensures that the following expressions compile. |
| 619 | auto b = a; // Copy construct |
| 620 | auto c = std::move(a); // Move construct |
| 621 | b = c; // Copy assign |
| 622 | c = std::move(b); // Move assign |
| 623 | |
| 624 | EXPECT_THAT(c, ElementsAre("foo")); |
| 625 | } |
| 626 | |
| 627 | TEST(Split, StringDelimiter) { |
| 628 | { |
| 629 | std::vector<absl::string_view> v = absl::StrSplit("a,b", ','); |
| 630 | EXPECT_THAT(v, ElementsAre("a", "b")); |
| 631 | } |
| 632 | |
| 633 | { |
| 634 | std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(",")); |
| 635 | EXPECT_THAT(v, ElementsAre("a", "b")); |
| 636 | } |
| 637 | |
| 638 | { |
| 639 | std::vector<absl::string_view> v = |
| 640 | absl::StrSplit("a,b", absl::string_view(",")); |
| 641 | EXPECT_THAT(v, ElementsAre("a", "b")); |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | #if !defined(__cpp_char8_t) |
| 646 | #if defined(__clang__) |
| 647 | #pragma clang diagnostic push |
| 648 | #pragma clang diagnostic ignored "-Wc++2a-compat" |
| 649 | #endif |
| 650 | TEST(Split, UTF8) { |
| 651 | // Tests splitting utf8 strings and utf8 delimiters. |
| 652 | std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5"; |
| 653 | { |
| 654 | // A utf8 input std::string with an ascii delimiter. |
| 655 | std::string to_split = "a," + utf8_string; |
| 656 | std::vector<absl::string_view> v = absl::StrSplit(to_split, ','); |
| 657 | EXPECT_THAT(v, ElementsAre("a", utf8_string)); |
| 658 | } |
| 659 | |
| 660 | { |
| 661 | // A utf8 input std::string and a utf8 delimiter. |
| 662 | std::string to_split = "a," + utf8_string + ",b"; |
| 663 | std::string unicode_delimiter = "," + utf8_string + ","; |
| 664 | std::vector<absl::string_view> v = |
| 665 | absl::StrSplit(to_split, unicode_delimiter); |
| 666 | EXPECT_THAT(v, ElementsAre("a", "b")); |
| 667 | } |
| 668 | |
| 669 | { |
| 670 | // A utf8 input std::string and ByAnyChar with ascii chars. |
| 671 | std::vector<absl::string_view> v = |
| 672 | absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t")); |
| 673 | EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere")); |
| 674 | } |
| 675 | } |
| 676 | #if defined(__clang__) |
| 677 | #pragma clang diagnostic pop |
| 678 | #endif |
| 679 | #endif // !defined(__cpp_char8_t) |
| 680 | |
| 681 | TEST(Split, EmptyStringDelimiter) { |
| 682 | { |
| 683 | std::vector<std::string> v = absl::StrSplit("", ""); |
| 684 | EXPECT_THAT(v, ElementsAre("")); |
| 685 | } |
| 686 | |
| 687 | { |
| 688 | std::vector<std::string> v = absl::StrSplit("a", ""); |
| 689 | EXPECT_THAT(v, ElementsAre("a")); |
| 690 | } |
| 691 | |
| 692 | { |
| 693 | std::vector<std::string> v = absl::StrSplit("ab", ""); |
| 694 | EXPECT_THAT(v, ElementsAre("a", "b")); |
| 695 | } |
| 696 | |
| 697 | { |
| 698 | std::vector<std::string> v = absl::StrSplit("a b", ""); |
| 699 | EXPECT_THAT(v, ElementsAre("a", " ", "b")); |
| 700 | } |
| 701 | } |
| 702 | |
| 703 | TEST(Split, SubstrDelimiter) { |
| 704 | std::vector<absl::string_view> results; |
| 705 | absl::string_view delim("//"); |
| 706 | |
| 707 | results = absl::StrSplit("", delim); |
| 708 | EXPECT_THAT(results, ElementsAre("")); |
| 709 | |
| 710 | results = absl::StrSplit("//", delim); |
| 711 | EXPECT_THAT(results, ElementsAre("", "")); |
| 712 | |
| 713 | results = absl::StrSplit("ab", delim); |
| 714 | EXPECT_THAT(results, ElementsAre("ab")); |
| 715 | |
| 716 | results = absl::StrSplit("ab//", delim); |
| 717 | EXPECT_THAT(results, ElementsAre("ab", "")); |
| 718 | |
| 719 | results = absl::StrSplit("ab/", delim); |
| 720 | EXPECT_THAT(results, ElementsAre("ab/")); |
| 721 | |
| 722 | results = absl::StrSplit("a/b", delim); |
| 723 | EXPECT_THAT(results, ElementsAre("a/b")); |
| 724 | |
| 725 | results = absl::StrSplit("a//b", delim); |
| 726 | EXPECT_THAT(results, ElementsAre("a", "b")); |
| 727 | |
| 728 | results = absl::StrSplit("a///b", delim); |
| 729 | EXPECT_THAT(results, ElementsAre("a", "/b")); |
| 730 | |
| 731 | results = absl::StrSplit("a////b", delim); |
| 732 | EXPECT_THAT(results, ElementsAre("a", "", "b")); |
| 733 | } |
| 734 | |
| 735 | TEST(Split, EmptyResults) { |
| 736 | std::vector<absl::string_view> results; |
| 737 | |
| 738 | results = absl::StrSplit("", '#'); |
| 739 | EXPECT_THAT(results, ElementsAre("")); |
| 740 | |
| 741 | results = absl::StrSplit("#", '#'); |
| 742 | EXPECT_THAT(results, ElementsAre("", "")); |
| 743 | |
| 744 | results = absl::StrSplit("#cd", '#'); |
| 745 | EXPECT_THAT(results, ElementsAre("", "cd")); |
| 746 | |
| 747 | results = absl::StrSplit("ab#cd#", '#'); |
| 748 | EXPECT_THAT(results, ElementsAre("ab", "cd", "")); |
| 749 | |
| 750 | results = absl::StrSplit("ab##cd", '#'); |
| 751 | EXPECT_THAT(results, ElementsAre("ab", "", "cd")); |
| 752 | |
| 753 | results = absl::StrSplit("ab##", '#'); |
| 754 | EXPECT_THAT(results, ElementsAre("ab", "", "")); |
| 755 | |
| 756 | results = absl::StrSplit("ab#ab#", '#'); |
| 757 | EXPECT_THAT(results, ElementsAre("ab", "ab", "")); |
| 758 | |
| 759 | results = absl::StrSplit("aaaa", 'a'); |
| 760 | EXPECT_THAT(results, ElementsAre("", "", "", "", "")); |
| 761 | |
| 762 | results = absl::StrSplit("", '#', absl::SkipEmpty()); |
| 763 | EXPECT_THAT(results, ElementsAre()); |
| 764 | } |
| 765 | |
| 766 | template <typename Delimiter> |
| 767 | static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d, |
| 768 | size_t starting_pos, int expected_pos) { |
| 769 | absl::string_view found = d.Find(text, starting_pos); |
| 770 | return found.data() != text.data() + text.size() && |
| 771 | expected_pos == found.data() - text.data(); |
| 772 | } |
| 773 | |
| 774 | // Helper function for testing Delimiter objects. Returns true if the given |
| 775 | // Delimiter is found in the given string at the given position. This function |
| 776 | // tests two cases: |
| 777 | // 1. The actual text given, staring at position 0 |
| 778 | // 2. The text given with leading padding that should be ignored |
| 779 | template <typename Delimiter> |
| 780 | static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) { |
| 781 | const std::string leading_text = ",x,y,z,"; |
| 782 | return IsFoundAtStartingPos(text, d, 0, expected_pos) && |
| 783 | IsFoundAtStartingPos(leading_text + std::string(text), d, |
| 784 | leading_text.length(), |
| 785 | expected_pos + leading_text.length()); |
| 786 | } |
| 787 | |
| 788 | // |
| 789 | // Tests for ByString |
| 790 | // |
| 791 | |
| 792 | // Tests using any delimiter that represents a single comma. |
| 793 | template <typename Delimiter> |
| 794 | void TestComma(Delimiter d) { |
| 795 | EXPECT_TRUE(IsFoundAt(",", d, 0)); |
| 796 | EXPECT_TRUE(IsFoundAt("a,", d, 1)); |
| 797 | EXPECT_TRUE(IsFoundAt(",b", d, 0)); |
| 798 | EXPECT_TRUE(IsFoundAt("a,b", d, 1)); |
| 799 | EXPECT_TRUE(IsFoundAt("a,b,", d, 1)); |
| 800 | EXPECT_TRUE(IsFoundAt("a,b,c", d, 1)); |
| 801 | EXPECT_FALSE(IsFoundAt("", d, -1)); |
| 802 | EXPECT_FALSE(IsFoundAt(" ", d, -1)); |
| 803 | EXPECT_FALSE(IsFoundAt("a", d, -1)); |
| 804 | EXPECT_FALSE(IsFoundAt("a b c", d, -1)); |
| 805 | EXPECT_FALSE(IsFoundAt("a;b;c", d, -1)); |
| 806 | EXPECT_FALSE(IsFoundAt(";", d, -1)); |
| 807 | } |
| 808 | |
| 809 | TEST(Delimiter, ByString) { |
| 810 | using absl::ByString; |
| 811 | TestComma(ByString(",")); |
| 812 | |
| 813 | // Works as named variable. |
| 814 | ByString comma_string(","); |
| 815 | TestComma(comma_string); |
| 816 | |
| 817 | // The first occurrence of empty std::string ("") in a std::string is at position 0. |
| 818 | // There is a test below that demonstrates this for absl::string_view::find(). |
| 819 | // If the ByString delimiter returned position 0 for this, there would |
| 820 | // be an infinite loop in the SplitIterator code. To avoid this, empty std::string |
| 821 | // is a special case in that it always returns the item at position 1. |
| 822 | absl::string_view abc("abc"); |
| 823 | EXPECT_EQ(0, abc.find("")); // "" is found at position 0 |
| 824 | ByString empty(""); |
| 825 | EXPECT_FALSE(IsFoundAt("", empty, 0)); |
| 826 | EXPECT_FALSE(IsFoundAt("a", empty, 0)); |
| 827 | EXPECT_TRUE(IsFoundAt("ab", empty, 1)); |
| 828 | EXPECT_TRUE(IsFoundAt("abc", empty, 1)); |
| 829 | } |
| 830 | |
| 831 | TEST(Split, ByChar) { |
| 832 | using absl::ByChar; |
| 833 | TestComma(ByChar(',')); |
| 834 | |
| 835 | // Works as named variable. |
| 836 | ByChar comma_char(','); |
| 837 | TestComma(comma_char); |
| 838 | } |
| 839 | |
| 840 | // |
| 841 | // Tests for ByAnyChar |
| 842 | // |
| 843 | |
| 844 | TEST(Delimiter, ByAnyChar) { |
| 845 | using absl::ByAnyChar; |
| 846 | ByAnyChar one_delim(","); |
| 847 | // Found |
| 848 | EXPECT_TRUE(IsFoundAt(",", one_delim, 0)); |
| 849 | EXPECT_TRUE(IsFoundAt("a,", one_delim, 1)); |
| 850 | EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1)); |
| 851 | EXPECT_TRUE(IsFoundAt(",b", one_delim, 0)); |
| 852 | // Not found |
| 853 | EXPECT_FALSE(IsFoundAt("", one_delim, -1)); |
| 854 | EXPECT_FALSE(IsFoundAt(" ", one_delim, -1)); |
| 855 | EXPECT_FALSE(IsFoundAt("a", one_delim, -1)); |
| 856 | EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1)); |
| 857 | EXPECT_FALSE(IsFoundAt(";", one_delim, -1)); |
| 858 | |
| 859 | ByAnyChar two_delims(",;"); |
| 860 | // Found |
| 861 | EXPECT_TRUE(IsFoundAt(",", two_delims, 0)); |
| 862 | EXPECT_TRUE(IsFoundAt(";", two_delims, 0)); |
| 863 | EXPECT_TRUE(IsFoundAt(",;", two_delims, 0)); |
| 864 | EXPECT_TRUE(IsFoundAt(";,", two_delims, 0)); |
| 865 | EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0)); |
| 866 | EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0)); |
| 867 | EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1)); |
| 868 | EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1)); |
| 869 | EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1)); |
| 870 | EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1)); |
| 871 | // Not found |
| 872 | EXPECT_FALSE(IsFoundAt("", two_delims, -1)); |
| 873 | EXPECT_FALSE(IsFoundAt(" ", two_delims, -1)); |
| 874 | EXPECT_FALSE(IsFoundAt("a", two_delims, -1)); |
| 875 | EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1)); |
| 876 | EXPECT_FALSE(IsFoundAt("=", two_delims, -1)); |
| 877 | |
| 878 | // ByAnyChar behaves just like ByString when given a delimiter of empty |
| 879 | // std::string. That is, it always returns a zero-length absl::string_view |
| 880 | // referring to the item at position 1, not position 0. |
| 881 | ByAnyChar empty(""); |
| 882 | EXPECT_FALSE(IsFoundAt("", empty, 0)); |
| 883 | EXPECT_FALSE(IsFoundAt("a", empty, 0)); |
| 884 | EXPECT_TRUE(IsFoundAt("ab", empty, 1)); |
| 885 | EXPECT_TRUE(IsFoundAt("abc", empty, 1)); |
| 886 | } |
| 887 | |
| 888 | // |
| 889 | // Tests for ByLength |
| 890 | // |
| 891 | |
| 892 | TEST(Delimiter, ByLength) { |
| 893 | using absl::ByLength; |
| 894 | |
| 895 | ByLength four_char_delim(4); |
| 896 | |
| 897 | // Found |
| 898 | EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4)); |
| 899 | EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4)); |
| 900 | EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4)); |
| 901 | // Not found |
| 902 | EXPECT_FALSE(IsFoundAt("", four_char_delim, 0)); |
| 903 | EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0)); |
| 904 | EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0)); |
| 905 | EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0)); |
| 906 | EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0)); |
| 907 | } |
| 908 | |
| 909 | TEST(Split, WorksWithLargeStrings) { |
| 910 | if (sizeof(size_t) > 4) { |
| 911 | std::string s((uint32_t{1} << 31) + 1, 'x'); // 2G + 1 byte |
| 912 | s.back() = '-'; |
| 913 | std::vector<absl::string_view> v = absl::StrSplit(s, '-'); |
| 914 | EXPECT_EQ(2, v.size()); |
| 915 | // The first element will contain 2G of 'x's. |
| 916 | // testing::StartsWith is too slow with a 2G std::string. |
| 917 | EXPECT_EQ('x', v[0][0]); |
| 918 | EXPECT_EQ('x', v[0][1]); |
| 919 | EXPECT_EQ('x', v[0][3]); |
| 920 | EXPECT_EQ("", v[1]); |
| 921 | } |
| 922 | } |
| 923 | |
| 924 | TEST(SplitInternalTest, TypeTraits) { |
| 925 | EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value); |
| 926 | EXPECT_TRUE( |
| 927 | (absl::strings_internal::HasMappedType<std::map<int, int>>::value)); |
| 928 | EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value); |
| 929 | EXPECT_TRUE( |
| 930 | (absl::strings_internal::HasValueType<std::map<int, int>>::value)); |
| 931 | EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value); |
| 932 | EXPECT_TRUE( |
| 933 | (absl::strings_internal::HasConstIterator<std::map<int, int>>::value)); |
| 934 | EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value); |
| 935 | EXPECT_TRUE((absl::strings_internal::IsInitializerList< |
| 936 | std::initializer_list<int>>::value)); |
| 937 | } |
| 938 | |
| 939 | } // namespace |