Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 1 | // Copyright 2018 The Abseil Authors. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // https://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #include "absl/strings/internal/charconv_parse.h" |
| 16 | |
| 17 | #include <string> |
| 18 | #include <utility> |
| 19 | |
| 20 | #include "gmock/gmock.h" |
| 21 | #include "gtest/gtest.h" |
| 22 | #include "absl/base/internal/raw_logging.h" |
| 23 | #include "absl/strings/str_cat.h" |
| 24 | |
| 25 | using absl::chars_format; |
| 26 | using absl::strings_internal::FloatType; |
| 27 | using absl::strings_internal::ParsedFloat; |
| 28 | using absl::strings_internal::ParseFloat; |
| 29 | |
| 30 | namespace { |
| 31 | |
| 32 | // Check that a given string input is parsed to the expected mantissa and |
| 33 | // exponent. |
| 34 | // |
| 35 | // Input string `s` must contain a '$' character. It marks the end of the |
| 36 | // characters that should be consumed by the match. It is stripped from the |
| 37 | // input to ParseFloat. |
| 38 | // |
| 39 | // If input string `s` contains '[' and ']' characters, these mark the region |
| 40 | // of characters that should be marked as the "subrange". For NaNs, this is |
| 41 | // the location of the extended NaN string. For numbers, this is the location |
| 42 | // of the full, over-large mantissa. |
| 43 | template <int base> |
| 44 | void ExpectParsedFloat(std::string s, absl::chars_format format_flags, |
| 45 | FloatType expected_type, uint64_t expected_mantissa, |
| 46 | int expected_exponent, |
| 47 | int expected_literal_exponent = -999) { |
| 48 | SCOPED_TRACE(s); |
| 49 | |
| 50 | int begin_subrange = -1; |
| 51 | int end_subrange = -1; |
| 52 | // If s contains '[' and ']', then strip these characters and set the subrange |
| 53 | // indices appropriately. |
| 54 | std::string::size_type open_bracket_pos = s.find('['); |
| 55 | if (open_bracket_pos != std::string::npos) { |
| 56 | begin_subrange = static_cast<int>(open_bracket_pos); |
| 57 | s.replace(open_bracket_pos, 1, ""); |
| 58 | std::string::size_type close_bracket_pos = s.find(']'); |
| 59 | ABSL_RAW_CHECK(close_bracket_pos != absl::string_view::npos, |
| 60 | "Test input contains [ without matching ]"); |
| 61 | end_subrange = static_cast<int>(close_bracket_pos); |
| 62 | s.replace(close_bracket_pos, 1, ""); |
| 63 | } |
| 64 | const std::string::size_type expected_characters_matched = s.find('$'); |
| 65 | ABSL_RAW_CHECK(expected_characters_matched != std::string::npos, |
Austin Schuh | b4691e9 | 2020-12-31 12:37:18 -0800 | [diff] [blame^] | 66 | "Input string must contain $"); |
Austin Schuh | 36244a1 | 2019-09-21 17:52:38 -0700 | [diff] [blame] | 67 | s.replace(expected_characters_matched, 1, ""); |
| 68 | |
| 69 | ParsedFloat parsed = |
| 70 | ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); |
| 71 | |
| 72 | EXPECT_NE(parsed.end, nullptr); |
| 73 | if (parsed.end == nullptr) { |
| 74 | return; // The following tests are not useful if we fully failed to parse |
| 75 | } |
| 76 | EXPECT_EQ(parsed.type, expected_type); |
| 77 | if (begin_subrange == -1) { |
| 78 | EXPECT_EQ(parsed.subrange_begin, nullptr); |
| 79 | EXPECT_EQ(parsed.subrange_end, nullptr); |
| 80 | } else { |
| 81 | EXPECT_EQ(parsed.subrange_begin, s.data() + begin_subrange); |
| 82 | EXPECT_EQ(parsed.subrange_end, s.data() + end_subrange); |
| 83 | } |
| 84 | if (parsed.type == FloatType::kNumber) { |
| 85 | EXPECT_EQ(parsed.mantissa, expected_mantissa); |
| 86 | EXPECT_EQ(parsed.exponent, expected_exponent); |
| 87 | if (expected_literal_exponent != -999) { |
| 88 | EXPECT_EQ(parsed.literal_exponent, expected_literal_exponent); |
| 89 | } |
| 90 | } |
| 91 | auto characters_matched = static_cast<int>(parsed.end - s.data()); |
| 92 | EXPECT_EQ(characters_matched, expected_characters_matched); |
| 93 | } |
| 94 | |
| 95 | // Check that a given string input is parsed to the expected mantissa and |
| 96 | // exponent. |
| 97 | // |
| 98 | // Input string `s` must contain a '$' character. It marks the end of the |
| 99 | // characters that were consumed by the match. |
| 100 | template <int base> |
| 101 | void ExpectNumber(std::string s, absl::chars_format format_flags, |
| 102 | uint64_t expected_mantissa, int expected_exponent, |
| 103 | int expected_literal_exponent = -999) { |
| 104 | ExpectParsedFloat<base>(std::move(s), format_flags, FloatType::kNumber, |
| 105 | expected_mantissa, expected_exponent, |
| 106 | expected_literal_exponent); |
| 107 | } |
| 108 | |
| 109 | // Check that a given string input is parsed to the given special value. |
| 110 | // |
| 111 | // This tests against both number bases, since infinities and NaNs have |
| 112 | // identical representations in both modes. |
| 113 | void ExpectSpecial(const std::string& s, absl::chars_format format_flags, |
| 114 | FloatType type) { |
| 115 | ExpectParsedFloat<10>(s, format_flags, type, 0, 0); |
| 116 | ExpectParsedFloat<16>(s, format_flags, type, 0, 0); |
| 117 | } |
| 118 | |
| 119 | // Check that a given input string is not matched by Float. |
| 120 | template <int base> |
| 121 | void ExpectFailedParse(absl::string_view s, absl::chars_format format_flags) { |
| 122 | ParsedFloat parsed = |
| 123 | ParseFloat<base>(s.data(), s.data() + s.size(), format_flags); |
| 124 | EXPECT_EQ(parsed.end, nullptr); |
| 125 | } |
| 126 | |
| 127 | TEST(ParseFloat, SimpleValue) { |
| 128 | // Test that various forms of floating point numbers all parse correctly. |
| 129 | ExpectNumber<10>("1.23456789e5$", chars_format::general, 123456789, -3); |
| 130 | ExpectNumber<10>("1.23456789e+5$", chars_format::general, 123456789, -3); |
| 131 | ExpectNumber<10>("1.23456789E5$", chars_format::general, 123456789, -3); |
| 132 | ExpectNumber<10>("1.23456789e05$", chars_format::general, 123456789, -3); |
| 133 | ExpectNumber<10>("123.456789e3$", chars_format::general, 123456789, -3); |
| 134 | ExpectNumber<10>("0.000123456789e9$", chars_format::general, 123456789, -3); |
| 135 | ExpectNumber<10>("123456.789$", chars_format::general, 123456789, -3); |
| 136 | ExpectNumber<10>("123456789e-3$", chars_format::general, 123456789, -3); |
| 137 | |
| 138 | ExpectNumber<16>("1.234abcdefp28$", chars_format::general, 0x1234abcdef, -8); |
| 139 | ExpectNumber<16>("1.234abcdefp+28$", chars_format::general, 0x1234abcdef, -8); |
| 140 | ExpectNumber<16>("1.234ABCDEFp28$", chars_format::general, 0x1234abcdef, -8); |
| 141 | ExpectNumber<16>("1.234AbCdEfP0028$", chars_format::general, 0x1234abcdef, |
| 142 | -8); |
| 143 | ExpectNumber<16>("123.4abcdefp20$", chars_format::general, 0x1234abcdef, -8); |
| 144 | ExpectNumber<16>("0.0001234abcdefp44$", chars_format::general, 0x1234abcdef, |
| 145 | -8); |
| 146 | ExpectNumber<16>("1234abcd.ef$", chars_format::general, 0x1234abcdef, -8); |
| 147 | ExpectNumber<16>("1234abcdefp-8$", chars_format::general, 0x1234abcdef, -8); |
| 148 | |
| 149 | // ExpectNumber does not attempt to drop trailing zeroes. |
| 150 | ExpectNumber<10>("0001.2345678900e005$", chars_format::general, 12345678900, |
| 151 | -5); |
| 152 | ExpectNumber<16>("0001.234abcdef000p28$", chars_format::general, |
| 153 | 0x1234abcdef000, -20); |
| 154 | |
| 155 | // Ensure non-matching characters after a number are ignored, even when they |
| 156 | // look like potentially matching characters. |
| 157 | ExpectNumber<10>("1.23456789e5$ ", chars_format::general, 123456789, -3); |
| 158 | ExpectNumber<10>("1.23456789e5$e5e5", chars_format::general, 123456789, -3); |
| 159 | ExpectNumber<10>("1.23456789e5$.25", chars_format::general, 123456789, -3); |
| 160 | ExpectNumber<10>("1.23456789e5$-", chars_format::general, 123456789, -3); |
| 161 | ExpectNumber<10>("1.23456789e5$PUPPERS!!!", chars_format::general, 123456789, |
| 162 | -3); |
| 163 | ExpectNumber<10>("123456.789$efghij", chars_format::general, 123456789, -3); |
| 164 | ExpectNumber<10>("123456.789$e", chars_format::general, 123456789, -3); |
| 165 | ExpectNumber<10>("123456.789$p5", chars_format::general, 123456789, -3); |
| 166 | ExpectNumber<10>("123456.789$.10", chars_format::general, 123456789, -3); |
| 167 | |
| 168 | ExpectNumber<16>("1.234abcdefp28$ ", chars_format::general, 0x1234abcdef, |
| 169 | -8); |
| 170 | ExpectNumber<16>("1.234abcdefp28$p28", chars_format::general, 0x1234abcdef, |
| 171 | -8); |
| 172 | ExpectNumber<16>("1.234abcdefp28$.125", chars_format::general, 0x1234abcdef, |
| 173 | -8); |
| 174 | ExpectNumber<16>("1.234abcdefp28$-", chars_format::general, 0x1234abcdef, -8); |
| 175 | ExpectNumber<16>("1.234abcdefp28$KITTEHS!!!", chars_format::general, |
| 176 | 0x1234abcdef, -8); |
| 177 | ExpectNumber<16>("1234abcd.ef$ghijk", chars_format::general, 0x1234abcdef, |
| 178 | -8); |
| 179 | ExpectNumber<16>("1234abcd.ef$p", chars_format::general, 0x1234abcdef, -8); |
| 180 | ExpectNumber<16>("1234abcd.ef$.10", chars_format::general, 0x1234abcdef, -8); |
| 181 | |
| 182 | // Ensure we can read a full resolution mantissa without overflow. |
| 183 | ExpectNumber<10>("9999999999999999999$", chars_format::general, |
| 184 | 9999999999999999999u, 0); |
| 185 | ExpectNumber<16>("fffffffffffffff$", chars_format::general, |
| 186 | 0xfffffffffffffffu, 0); |
| 187 | |
| 188 | // Check that zero is consistently read. |
| 189 | ExpectNumber<10>("0$", chars_format::general, 0, 0); |
| 190 | ExpectNumber<16>("0$", chars_format::general, 0, 0); |
| 191 | ExpectNumber<10>("000000000000000000000000000000000000000$", |
| 192 | chars_format::general, 0, 0); |
| 193 | ExpectNumber<16>("000000000000000000000000000000000000000$", |
| 194 | chars_format::general, 0, 0); |
| 195 | ExpectNumber<10>("0000000000000000000000.000000000000000000$", |
| 196 | chars_format::general, 0, 0); |
| 197 | ExpectNumber<16>("0000000000000000000000.000000000000000000$", |
| 198 | chars_format::general, 0, 0); |
| 199 | ExpectNumber<10>("0.00000000000000000000000000000000e123456$", |
| 200 | chars_format::general, 0, 0); |
| 201 | ExpectNumber<16>("0.00000000000000000000000000000000p123456$", |
| 202 | chars_format::general, 0, 0); |
| 203 | } |
| 204 | |
| 205 | TEST(ParseFloat, LargeDecimalMantissa) { |
| 206 | // After 19 significant decimal digits in the mantissa, ParsedFloat will |
| 207 | // truncate additional digits. We need to test that: |
| 208 | // 1) the truncation to 19 digits happens |
| 209 | // 2) the returned exponent reflects the dropped significant digits |
| 210 | // 3) a correct literal_exponent is set |
| 211 | // |
| 212 | // If and only if a significant digit is found after 19 digits, then the |
| 213 | // entirety of the mantissa in case the exact value is needed to make a |
| 214 | // rounding decision. The [ and ] characters below denote where such a |
| 215 | // subregion was marked by by ParseFloat. They are not part of the input. |
| 216 | |
| 217 | // Mark a capture group only if a dropped digit is significant (nonzero). |
| 218 | ExpectNumber<10>("100000000000000000000000000$", chars_format::general, |
| 219 | 1000000000000000000, |
| 220 | /* adjusted exponent */ 8); |
| 221 | |
| 222 | ExpectNumber<10>("123456789123456789100000000$", chars_format::general, |
| 223 | 1234567891234567891, |
| 224 | /* adjusted exponent */ 8); |
| 225 | |
| 226 | ExpectNumber<10>("[123456789123456789123456789]$", chars_format::general, |
| 227 | 1234567891234567891, |
| 228 | /* adjusted exponent */ 8, |
| 229 | /* literal exponent */ 0); |
| 230 | |
| 231 | ExpectNumber<10>("[123456789123456789100000009]$", chars_format::general, |
| 232 | 1234567891234567891, |
| 233 | /* adjusted exponent */ 8, |
| 234 | /* literal exponent */ 0); |
| 235 | |
| 236 | ExpectNumber<10>("[123456789123456789120000000]$", chars_format::general, |
| 237 | 1234567891234567891, |
| 238 | /* adjusted exponent */ 8, |
| 239 | /* literal exponent */ 0); |
| 240 | |
| 241 | // Leading zeroes should not count towards the 19 significant digit limit |
| 242 | ExpectNumber<10>("[00000000123456789123456789123456789]$", |
| 243 | chars_format::general, 1234567891234567891, |
| 244 | /* adjusted exponent */ 8, |
| 245 | /* literal exponent */ 0); |
| 246 | |
| 247 | ExpectNumber<10>("00000000123456789123456789100000000$", |
| 248 | chars_format::general, 1234567891234567891, |
| 249 | /* adjusted exponent */ 8); |
| 250 | |
| 251 | // Truncated digits after the decimal point should not cause a further |
| 252 | // exponent adjustment. |
| 253 | ExpectNumber<10>("1.234567891234567891e123$", chars_format::general, |
| 254 | 1234567891234567891, 105); |
| 255 | ExpectNumber<10>("[1.23456789123456789123456789]e123$", chars_format::general, |
| 256 | 1234567891234567891, |
| 257 | /* adjusted exponent */ 105, |
| 258 | /* literal exponent */ 123); |
| 259 | |
| 260 | // Ensure we truncate, and not round. (The from_chars algorithm we use |
| 261 | // depends on our guess missing low, if it misses, so we need the rounding |
| 262 | // error to be downward.) |
| 263 | ExpectNumber<10>("[1999999999999999999999]$", chars_format::general, |
| 264 | 1999999999999999999, |
| 265 | /* adjusted exponent */ 3, |
| 266 | /* literal exponent */ 0); |
| 267 | } |
| 268 | |
| 269 | TEST(ParseFloat, LargeHexadecimalMantissa) { |
| 270 | // After 15 significant hex digits in the mantissa, ParsedFloat will treat |
| 271 | // additional digits as sticky, We need to test that: |
| 272 | // 1) The truncation to 15 digits happens |
| 273 | // 2) The returned exponent reflects the dropped significant digits |
| 274 | // 3) If a nonzero digit is dropped, the low bit of mantissa is set. |
| 275 | |
| 276 | ExpectNumber<16>("123456789abcdef123456789abcdef$", chars_format::general, |
| 277 | 0x123456789abcdef, 60); |
| 278 | |
| 279 | // Leading zeroes should not count towards the 15 significant digit limit |
| 280 | ExpectNumber<16>("000000123456789abcdef123456789abcdef$", |
| 281 | chars_format::general, 0x123456789abcdef, 60); |
| 282 | |
| 283 | // Truncated digits after the radix point should not cause a further |
| 284 | // exponent adjustment. |
| 285 | ExpectNumber<16>("1.23456789abcdefp100$", chars_format::general, |
| 286 | 0x123456789abcdef, 44); |
| 287 | ExpectNumber<16>("1.23456789abcdef123456789abcdefp100$", |
| 288 | chars_format::general, 0x123456789abcdef, 44); |
| 289 | |
| 290 | // test sticky digit behavior. The low bit should be set iff any dropped |
| 291 | // digit is nonzero. |
| 292 | ExpectNumber<16>("123456789abcdee123456789abcdee$", chars_format::general, |
| 293 | 0x123456789abcdef, 60); |
| 294 | ExpectNumber<16>("123456789abcdee000000000000001$", chars_format::general, |
| 295 | 0x123456789abcdef, 60); |
| 296 | ExpectNumber<16>("123456789abcdee000000000000000$", chars_format::general, |
| 297 | 0x123456789abcdee, 60); |
| 298 | } |
| 299 | |
| 300 | TEST(ParseFloat, ScientificVsFixed) { |
| 301 | // In fixed mode, an exponent is never matched (but the remainder of the |
| 302 | // number will be matched.) |
| 303 | ExpectNumber<10>("1.23456789$e5", chars_format::fixed, 123456789, -8); |
| 304 | ExpectNumber<10>("123456.789$", chars_format::fixed, 123456789, -3); |
| 305 | ExpectNumber<16>("1.234abcdef$p28", chars_format::fixed, 0x1234abcdef, -36); |
| 306 | ExpectNumber<16>("1234abcd.ef$", chars_format::fixed, 0x1234abcdef, -8); |
| 307 | |
| 308 | // In scientific mode, numbers don't match *unless* they have an exponent. |
| 309 | ExpectNumber<10>("1.23456789e5$", chars_format::scientific, 123456789, -3); |
| 310 | ExpectFailedParse<10>("-123456.789$", chars_format::scientific); |
| 311 | ExpectNumber<16>("1.234abcdefp28$", chars_format::scientific, 0x1234abcdef, |
| 312 | -8); |
| 313 | ExpectFailedParse<16>("1234abcd.ef$", chars_format::scientific); |
| 314 | } |
| 315 | |
| 316 | TEST(ParseFloat, Infinity) { |
| 317 | ExpectFailedParse<10>("in", chars_format::general); |
| 318 | ExpectFailedParse<16>("in", chars_format::general); |
| 319 | ExpectFailedParse<10>("inx", chars_format::general); |
| 320 | ExpectFailedParse<16>("inx", chars_format::general); |
| 321 | ExpectSpecial("inf$", chars_format::general, FloatType::kInfinity); |
| 322 | ExpectSpecial("Inf$", chars_format::general, FloatType::kInfinity); |
| 323 | ExpectSpecial("INF$", chars_format::general, FloatType::kInfinity); |
| 324 | ExpectSpecial("inf$inite", chars_format::general, FloatType::kInfinity); |
| 325 | ExpectSpecial("iNfInItY$", chars_format::general, FloatType::kInfinity); |
| 326 | ExpectSpecial("infinity$!!!", chars_format::general, FloatType::kInfinity); |
| 327 | } |
| 328 | |
| 329 | TEST(ParseFloat, NaN) { |
| 330 | ExpectFailedParse<10>("na", chars_format::general); |
| 331 | ExpectFailedParse<16>("na", chars_format::general); |
| 332 | ExpectFailedParse<10>("nah", chars_format::general); |
| 333 | ExpectFailedParse<16>("nah", chars_format::general); |
| 334 | ExpectSpecial("nan$", chars_format::general, FloatType::kNan); |
| 335 | ExpectSpecial("NaN$", chars_format::general, FloatType::kNan); |
| 336 | ExpectSpecial("nAn$", chars_format::general, FloatType::kNan); |
| 337 | ExpectSpecial("NAN$", chars_format::general, FloatType::kNan); |
| 338 | ExpectSpecial("NaN$aNaNaNaNaBatman!", chars_format::general, FloatType::kNan); |
| 339 | |
| 340 | // A parenthesized sequence of the characters [a-zA-Z0-9_] is allowed to |
| 341 | // appear after an NaN. Check that this is allowed, and that the correct |
| 342 | // characters are grouped. |
| 343 | // |
| 344 | // (The characters [ and ] in the pattern below delimit the expected matched |
| 345 | // subgroup; they are not part of the input passed to ParseFloat.) |
| 346 | ExpectSpecial("nan([0xabcdef])$", chars_format::general, FloatType::kNan); |
| 347 | ExpectSpecial("nan([0xabcdef])$...", chars_format::general, FloatType::kNan); |
| 348 | ExpectSpecial("nan([0xabcdef])$)...", chars_format::general, FloatType::kNan); |
| 349 | ExpectSpecial("nan([])$", chars_format::general, FloatType::kNan); |
| 350 | ExpectSpecial("nan([aAzZ09_])$", chars_format::general, FloatType::kNan); |
| 351 | // If the subgroup contains illegal characters, don't match it at all. |
| 352 | ExpectSpecial("nan$(bad-char)", chars_format::general, FloatType::kNan); |
| 353 | // Also cope with a missing close paren. |
| 354 | ExpectSpecial("nan$(0xabcdef", chars_format::general, FloatType::kNan); |
| 355 | } |
| 356 | |
| 357 | } // namespace |