Austin Schuh | bb1338c | 2024-06-15 19:31:16 -0700 | [diff] [blame] | 1 | /* __gmp_doscan -- formatted input internals. |
| 2 | |
| 3 | THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST |
| 4 | CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN |
| 5 | FUTURE GNU MP RELEASES. |
| 6 | |
| 7 | Copyright 2001-2003 Free Software Foundation, Inc. |
| 8 | |
| 9 | This file is part of the GNU MP Library. |
| 10 | |
| 11 | The GNU MP Library is free software; you can redistribute it and/or modify |
| 12 | it under the terms of either: |
| 13 | |
| 14 | * the GNU Lesser General Public License as published by the Free |
| 15 | Software Foundation; either version 3 of the License, or (at your |
| 16 | option) any later version. |
| 17 | |
| 18 | or |
| 19 | |
| 20 | * the GNU General Public License as published by the Free Software |
| 21 | Foundation; either version 2 of the License, or (at your option) any |
| 22 | later version. |
| 23 | |
| 24 | or both in parallel, as here. |
| 25 | |
| 26 | The GNU MP Library is distributed in the hope that it will be useful, but |
| 27 | WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY |
| 28 | or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 29 | for more details. |
| 30 | |
| 31 | You should have received copies of the GNU General Public License and the |
| 32 | GNU Lesser General Public License along with the GNU MP Library. If not, |
| 33 | see https://www.gnu.org/licenses/. */ |
| 34 | |
| 35 | #define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */ |
| 36 | |
| 37 | #include "config.h" /* needed for the HAVE_, could also move gmp incls */ |
| 38 | |
| 39 | #include <stdarg.h> |
| 40 | #include <ctype.h> |
| 41 | #include <stddef.h> /* for ptrdiff_t */ |
| 42 | #include <stdio.h> |
| 43 | #include <stdlib.h> /* for strtol */ |
| 44 | #include <string.h> |
| 45 | |
| 46 | #if HAVE_LANGINFO_H |
| 47 | #include <langinfo.h> /* for nl_langinfo */ |
| 48 | #endif |
| 49 | |
| 50 | #if HAVE_LOCALE_H |
| 51 | #include <locale.h> /* for localeconv */ |
| 52 | #endif |
| 53 | |
| 54 | #if HAVE_INTTYPES_H |
| 55 | # include <inttypes.h> /* for intmax_t */ |
| 56 | #else |
| 57 | # if HAVE_STDINT_H |
| 58 | # include <stdint.h> |
| 59 | # endif |
| 60 | #endif |
| 61 | |
| 62 | #if HAVE_SYS_TYPES_H |
| 63 | #include <sys/types.h> /* for quad_t */ |
| 64 | #endif |
| 65 | |
| 66 | #include "gmp-impl.h" |
| 67 | |
| 68 | |
| 69 | /* Change this to "#define TRACE(x) x" for some traces. */ |
| 70 | #define TRACE(x) |
| 71 | |
| 72 | |
| 73 | /* General: |
| 74 | |
| 75 | It's necessary to parse up the format string to recognise the GMP |
| 76 | extra types F, Q and Z. Other types and conversions are passed |
| 77 | across to the standard sscanf or fscanf via funs->scan, for ease of |
| 78 | implementation. This is essential in the case of something like glibc |
| 79 | %p where the pointer format isn't actually documented. |
| 80 | |
| 81 | Because funs->scan doesn't get the whole input it can't put the right |
| 82 | values in for %n, so that's handled in __gmp_doscan. Neither sscanf |
| 83 | nor fscanf directly indicate how many characters were read, so an |
| 84 | extra %n is appended to each run for that. For fscanf this merely |
| 85 | supports our %n output, but for sscanf it lets funs->step move us |
| 86 | along the input string. |
| 87 | |
| 88 | Whitespace and literal matches in the format string, including %%, |
| 89 | are handled directly within __gmp_doscan. This is reasonably |
| 90 | efficient, and avoids some suspicious behaviour observed in various |
| 91 | system libc's. GLIBC 2.2.4 for instance returns 0 on |
| 92 | |
| 93 | sscanf(" ", " x") |
| 94 | or |
| 95 | sscanf(" ", " x%d",&n) |
| 96 | |
| 97 | whereas we think they should return EOF, since end-of-string is |
| 98 | reached when a match of "x" is required. |
| 99 | |
| 100 | For standard % conversions, funs->scan is called once for each |
| 101 | conversion. If we had vfscanf and vsscanf and could rely on their |
| 102 | fixed text matching behaviour then we could call them with multiple |
| 103 | consecutive standard conversions. But plain fscanf and sscanf work |
| 104 | fine, and parsing one field at a time shouldn't be too much of a |
| 105 | slowdown. |
| 106 | |
| 107 | gmpscan: |
| 108 | |
| 109 | gmpscan reads a gmp type. It's only used from one place, but is a |
| 110 | separate subroutine to avoid a big chunk of complicated code in the |
| 111 | middle of __gmp_doscan. Within gmpscan a couple of loopbacks make it |
| 112 | possible to share code for parsing integers, rationals and floats. |
| 113 | |
| 114 | In gmpscan normally one char of lookahead is maintained, but when width |
| 115 | is reached that stops, on the principle that an fgetc/ungetc of a char |
| 116 | past where we're told to stop would be undesirable. "chars" is how many |
| 117 | characters have been read so far, including the current c. When |
| 118 | chars==width and another character is desired then a jump is done to the |
| 119 | "convert" stage. c is invalid and mustn't be unget'ed in this case; |
| 120 | chars is set to width+1 to indicate that. |
| 121 | |
| 122 | gmpscan normally returns the number of characters read. -1 means an |
| 123 | invalid field, -2 means EOF reached before any matching characters |
| 124 | were read. |
| 125 | |
| 126 | For hex floats, the mantissa part is passed to mpf_set_str, then the |
| 127 | exponent is applied with mpf_mul_exp or mpf_div_2exp. This is easier |
| 128 | than teaching mpf_set_str about an exponent factor (ie. 2) differing |
| 129 | from the mantissa radix point factor (ie. 16). mpf_mul_exp and |
| 130 | mpf_div_2exp will preserve the application requested precision, so |
| 131 | nothing in that respect is lost by making this a two-step process. |
| 132 | |
| 133 | Matching and errors: |
| 134 | |
| 135 | C99 7.19.6.2 paras 9 and 10 say an input item is read as the longest |
| 136 | string which is a match for the appropriate type, or a prefix of a |
| 137 | match. With that done, if it's only a prefix then the result is a |
| 138 | matching failure, ie. invalid input. |
| 139 | |
| 140 | This rule seems fairly clear, but doesn't seem to be universally |
| 141 | applied in system C libraries. Even GLIBC doesn't seem to get it |
| 142 | right, insofar as it seems to accept some apparently invalid forms. |
| 143 | Eg. glibc 2.3.1 accepts "0x" for a "%i", where a reading of the |
| 144 | standard would suggest a non-empty sequence of digits should be |
| 145 | required after an "0x". |
| 146 | |
| 147 | A footnote to 7.19.6.2 para 17 notes how this input item reading can |
| 148 | mean inputs acceptable to strtol are not acceptable to fscanf. We |
| 149 | think this confirms our reading of "0x" as invalid. |
| 150 | |
| 151 | Clearly gmp_sscanf could backtrack to a longest input which was a |
| 152 | valid match for a given item, but this is not done, since C99 says |
| 153 | sscanf is identical to fscanf, so we make gmp_sscanf identical to |
| 154 | gmp_fscanf. |
| 155 | |
| 156 | Types: |
| 157 | |
| 158 | C99 says "ll" is for long long, and "L" is for long double floats. |
| 159 | Unfortunately in GMP 4.1.1 we documented the two as equivalent. This |
| 160 | doesn't affect us directly, since both are passed through to plain |
| 161 | scanf. It seems wisest not to try to enforce the C99 rule. This is |
| 162 | consistent with what we said before, though whether it actually |
| 163 | worked was always up to the C library. |
| 164 | |
| 165 | Alternatives: |
| 166 | |
| 167 | Consideration was given to using separate code for gmp_fscanf and |
| 168 | gmp_sscanf. The sscanf case could zip across a string doing literal |
| 169 | matches or recognising digits in gmpscan, rather than making a |
| 170 | function call fun->get per character. The fscanf could use getc |
| 171 | rather than fgetc too, which might help those systems where getc is a |
| 172 | macro or otherwise inlined. But none of this scanning and converting |
| 173 | will be particularly fast, so the two are done together to keep it a |
| 174 | little simpler for now. |
| 175 | |
| 176 | Various multibyte string issues are not addressed, for a start C99 |
| 177 | scanf says the format string is multibyte. Since we pass %c, %s and |
| 178 | %[ to the system scanf, they might do multibyte reads already, but |
| 179 | it's another matter whether or not that can be used, since our digit |
| 180 | and whitespace parsing is only unibyte. The plan is to quietly |
| 181 | ignore multibyte locales for now. This is not as bad as it sounds, |
| 182 | since GMP is presumably used mostly on numbers, which can be |
| 183 | perfectly adequately treated in plain ASCII. |
| 184 | |
| 185 | */ |
| 186 | |
| 187 | |
| 188 | struct gmp_doscan_params_t { |
| 189 | int base; |
| 190 | int ignore; |
| 191 | char type; |
| 192 | int width; |
| 193 | }; |
| 194 | |
| 195 | |
| 196 | #define GET(c) \ |
| 197 | do { \ |
| 198 | ASSERT (chars <= width); \ |
| 199 | chars++; \ |
| 200 | if (chars > width) \ |
| 201 | goto convert; \ |
| 202 | (c) = (*funs->get) (data); \ |
| 203 | } while (0) |
| 204 | |
| 205 | /* store into "s", extending if necessary */ |
| 206 | #define STORE(c) \ |
| 207 | do { \ |
| 208 | ASSERT (s_upto <= s_alloc); \ |
| 209 | if (s_upto >= s_alloc) \ |
| 210 | { \ |
| 211 | size_t s_alloc_new = s_alloc + S_ALLOC_STEP; \ |
| 212 | s = __GMP_REALLOCATE_FUNC_TYPE (s, s_alloc, s_alloc_new, char); \ |
| 213 | s_alloc = s_alloc_new; \ |
| 214 | } \ |
| 215 | s[s_upto++] = c; \ |
| 216 | } while (0) |
| 217 | |
| 218 | #define S_ALLOC_STEP 512 |
| 219 | |
| 220 | static int |
| 221 | gmpscan (const struct gmp_doscan_funs_t *funs, void *data, |
| 222 | const struct gmp_doscan_params_t *p, void *dst) |
| 223 | { |
| 224 | int chars, c, base, first, width, seen_point, seen_digit, hexfloat; |
| 225 | size_t s_upto, s_alloc, hexexp; |
| 226 | char *s; |
| 227 | int invalid = 0; |
| 228 | |
| 229 | TRACE (printf ("gmpscan\n")); |
| 230 | |
| 231 | ASSERT (p->type == 'F' || p->type == 'Q' || p->type == 'Z'); |
| 232 | |
| 233 | c = (*funs->get) (data); |
| 234 | if (c == EOF) |
| 235 | return -2; |
| 236 | |
| 237 | chars = 1; |
| 238 | first = 1; |
| 239 | seen_point = 0; |
| 240 | width = (p->width == 0 ? INT_MAX-1 : p->width); |
| 241 | base = p->base; |
| 242 | s_alloc = S_ALLOC_STEP; |
| 243 | s = __GMP_ALLOCATE_FUNC_TYPE (s_alloc, char); |
| 244 | s_upto = 0; |
| 245 | hexfloat = 0; |
| 246 | hexexp = 0; |
| 247 | |
| 248 | another: |
| 249 | seen_digit = 0; |
| 250 | if (c == '-') |
| 251 | { |
| 252 | STORE (c); |
| 253 | goto get_for_sign; |
| 254 | } |
| 255 | else if (c == '+') |
| 256 | { |
| 257 | /* don't store '+', it's not accepted by mpz_set_str etc */ |
| 258 | get_for_sign: |
| 259 | GET (c); |
| 260 | } |
| 261 | |
| 262 | if (base == 0) |
| 263 | { |
| 264 | base = 10; /* decimal if no base indicator */ |
| 265 | if (c == '0') |
| 266 | { |
| 267 | seen_digit = 1; /* 0 alone is a valid number */ |
| 268 | if (p->type != 'F') |
| 269 | base = 8; /* leading 0 is octal, for non-floats */ |
| 270 | STORE (c); |
| 271 | GET (c); |
| 272 | if (c == 'x' || c == 'X') |
| 273 | { |
| 274 | base = 16; |
| 275 | seen_digit = 0; /* must have digits after an 0x */ |
| 276 | if (p->type == 'F') /* don't pass 'x' to mpf_set_str_point */ |
| 277 | hexfloat = 1; |
| 278 | else |
| 279 | STORE (c); |
| 280 | GET (c); |
| 281 | } |
| 282 | } |
| 283 | } |
| 284 | |
| 285 | digits: |
| 286 | for (;;) |
| 287 | { |
| 288 | if (base == 16) |
| 289 | { |
| 290 | if (! isxdigit (c)) |
| 291 | break; |
| 292 | } |
| 293 | else |
| 294 | { |
| 295 | if (! isdigit (c)) |
| 296 | break; |
| 297 | if (base == 8 && (c == '8' || c == '9')) |
| 298 | break; |
| 299 | } |
| 300 | |
| 301 | seen_digit = 1; |
| 302 | STORE (c); |
| 303 | GET (c); |
| 304 | } |
| 305 | |
| 306 | if (first) |
| 307 | { |
| 308 | /* decimal point */ |
| 309 | if (p->type == 'F' && ! seen_point) |
| 310 | { |
| 311 | /* For a multi-character decimal point, if the first character is |
| 312 | present then all of it must be, otherwise the input is |
| 313 | considered invalid. */ |
| 314 | const char *point = GMP_DECIMAL_POINT; |
| 315 | int pc = (unsigned char) *point++; |
| 316 | if (c == pc) |
| 317 | { |
| 318 | for (;;) |
| 319 | { |
| 320 | STORE (c); |
| 321 | GET (c); |
| 322 | pc = (unsigned char) *point++; |
| 323 | if (pc == '\0') |
| 324 | break; |
| 325 | if (c != pc) |
| 326 | goto set_invalid; |
| 327 | } |
| 328 | seen_point = 1; |
| 329 | goto digits; |
| 330 | } |
| 331 | } |
| 332 | |
| 333 | /* exponent */ |
| 334 | if (p->type == 'F') |
| 335 | { |
| 336 | if (hexfloat && (c == 'p' || c == 'P')) |
| 337 | { |
| 338 | hexexp = s_upto; /* exponent location */ |
| 339 | base = 10; /* exponent in decimal */ |
| 340 | goto exponent; |
| 341 | } |
| 342 | else if (! hexfloat && (c == 'e' || c == 'E')) |
| 343 | { |
| 344 | exponent: |
| 345 | /* must have at least one digit in the mantissa, just an exponent |
| 346 | is not good enough */ |
| 347 | if (! seen_digit) |
| 348 | goto set_invalid; |
| 349 | |
| 350 | do_second: |
| 351 | first = 0; |
| 352 | STORE (c); |
| 353 | GET (c); |
| 354 | goto another; |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | /* denominator */ |
| 359 | if (p->type == 'Q' && c == '/') |
| 360 | { |
| 361 | /* must have at least one digit in the numerator */ |
| 362 | if (! seen_digit) |
| 363 | goto set_invalid; |
| 364 | |
| 365 | /* now look for at least one digit in the denominator */ |
| 366 | seen_digit = 0; |
| 367 | |
| 368 | /* allow the base to be redetermined for "%i" */ |
| 369 | base = p->base; |
| 370 | goto do_second; |
| 371 | } |
| 372 | } |
| 373 | |
| 374 | convert: |
| 375 | if (! seen_digit) |
| 376 | { |
| 377 | set_invalid: |
| 378 | invalid = 1; |
| 379 | goto done; |
| 380 | } |
| 381 | |
| 382 | if (! p->ignore) |
| 383 | { |
| 384 | STORE ('\0'); |
| 385 | TRACE (printf (" convert \"%s\"\n", s)); |
| 386 | |
| 387 | /* We ought to have parsed out a valid string above, so just test |
| 388 | mpz_set_str etc with an ASSERT. */ |
| 389 | switch (p->type) { |
| 390 | case 'F': |
| 391 | { |
| 392 | mpf_ptr f = (mpf_ptr) dst; |
| 393 | if (hexexp != 0) |
| 394 | s[hexexp] = '\0'; |
| 395 | ASSERT_NOCARRY (mpf_set_str (f, s, hexfloat ? 16 : 10)); |
| 396 | if (hexexp != 0) |
| 397 | { |
| 398 | char *dummy; |
| 399 | long exp; |
| 400 | exp = strtol (s + hexexp + 1, &dummy, 10); |
| 401 | if (exp >= 0) |
| 402 | mpf_mul_2exp (f, f, (unsigned long) exp); |
| 403 | else |
| 404 | mpf_div_2exp (f, f, NEG_CAST (unsigned long, exp)); |
| 405 | } |
| 406 | } |
| 407 | break; |
| 408 | case 'Q': |
| 409 | ASSERT_NOCARRY (mpq_set_str ((mpq_ptr) dst, s, p->base)); |
| 410 | break; |
| 411 | case 'Z': |
| 412 | ASSERT_NOCARRY (mpz_set_str ((mpz_ptr) dst, s, p->base)); |
| 413 | break; |
| 414 | default: |
| 415 | ASSERT (0); |
| 416 | /*FALLTHRU*/ |
| 417 | break; |
| 418 | } |
| 419 | } |
| 420 | |
| 421 | done: |
| 422 | ASSERT (chars <= width+1); |
| 423 | if (chars != width+1) |
| 424 | { |
| 425 | (*funs->unget) (c, data); |
| 426 | TRACE (printf (" ungetc %d, to give %d chars\n", c, chars-1)); |
| 427 | } |
| 428 | chars--; |
| 429 | |
| 430 | (*__gmp_free_func) (s, s_alloc); |
| 431 | |
| 432 | if (invalid) |
| 433 | { |
| 434 | TRACE (printf (" invalid\n")); |
| 435 | return -1; |
| 436 | } |
| 437 | |
| 438 | TRACE (printf (" return %d chars (cf width %d)\n", chars, width)); |
| 439 | return chars; |
| 440 | } |
| 441 | |
| 442 | |
| 443 | /* Read and discard whitespace, if any. Return number of chars skipped. |
| 444 | Whitespace skipping never provokes the EOF return from __gmp_doscan, so |
| 445 | it's not necessary to watch for EOF from funs->get, */ |
| 446 | static int |
| 447 | skip_white (const struct gmp_doscan_funs_t *funs, void *data) |
| 448 | { |
| 449 | int c; |
| 450 | int ret = 0; |
| 451 | |
| 452 | do |
| 453 | { |
| 454 | c = (funs->get) (data); |
| 455 | ret++; |
| 456 | } |
| 457 | while (isspace (c)); |
| 458 | |
| 459 | (funs->unget) (c, data); |
| 460 | ret--; |
| 461 | |
| 462 | TRACE (printf (" skip white %d\n", ret)); |
| 463 | return ret; |
| 464 | } |
| 465 | |
| 466 | |
| 467 | int |
| 468 | __gmp_doscan (const struct gmp_doscan_funs_t *funs, void *data, |
| 469 | const char *orig_fmt, va_list orig_ap) |
| 470 | { |
| 471 | struct gmp_doscan_params_t param; |
| 472 | va_list ap; |
| 473 | char *alloc_fmt; |
| 474 | const char *fmt, *this_fmt, *end_fmt; |
| 475 | size_t orig_fmt_len, alloc_fmt_size, len; |
| 476 | int new_fields, new_chars; |
| 477 | char fchar; |
| 478 | int fields = 0; |
| 479 | int chars = 0; |
| 480 | |
| 481 | TRACE (printf ("__gmp_doscan \"%s\"\n", orig_fmt); |
| 482 | if (funs->scan == (gmp_doscan_scan_t) sscanf) |
| 483 | printf (" s=\"%s\"\n", * (const char **) data)); |
| 484 | |
| 485 | /* Don't modify orig_ap, if va_list is actually an array and hence call by |
| 486 | reference. It could be argued that it'd be more efficient to leave |
| 487 | callers to make a copy if they care, but doing so here is going to be a |
| 488 | very small part of the total work, and we may as well keep applications |
| 489 | out of trouble. */ |
| 490 | va_copy (ap, orig_ap); |
| 491 | |
| 492 | /* Parts of the format string are going to be copied so that a " %n" can |
| 493 | be appended. alloc_fmt is some space for that. orig_fmt_len+4 will be |
| 494 | needed if fmt consists of a single "%" specifier, but otherwise is an |
| 495 | overestimate. We're not going to be very fast here, so use |
| 496 | __gmp_allocate_func rather than TMP_ALLOC. */ |
| 497 | orig_fmt_len = strlen (orig_fmt); |
| 498 | alloc_fmt_size = orig_fmt_len + 4; |
| 499 | alloc_fmt = __GMP_ALLOCATE_FUNC_TYPE (alloc_fmt_size, char); |
| 500 | |
| 501 | fmt = orig_fmt; |
| 502 | end_fmt = orig_fmt + orig_fmt_len; |
| 503 | |
| 504 | for (;;) |
| 505 | { |
| 506 | next: |
| 507 | fchar = *fmt++; |
| 508 | |
| 509 | if (fchar == '\0') |
| 510 | break; |
| 511 | |
| 512 | if (isspace (fchar)) |
| 513 | { |
| 514 | chars += skip_white (funs, data); |
| 515 | continue; |
| 516 | } |
| 517 | |
| 518 | if (fchar != '%') |
| 519 | { |
| 520 | int c; |
| 521 | literal: |
| 522 | c = (funs->get) (data); |
| 523 | if (c != fchar) |
| 524 | { |
| 525 | (funs->unget) (c, data); |
| 526 | if (c == EOF) |
| 527 | { |
| 528 | eof_no_match: |
| 529 | if (fields == 0) |
| 530 | fields = EOF; |
| 531 | } |
| 532 | goto done; |
| 533 | } |
| 534 | chars++; |
| 535 | continue; |
| 536 | } |
| 537 | |
| 538 | param.type = '\0'; |
| 539 | param.base = 0; /* for e,f,g,i */ |
| 540 | param.ignore = 0; |
| 541 | param.width = 0; |
| 542 | |
| 543 | this_fmt = fmt-1; |
| 544 | TRACE (printf (" this_fmt \"%s\"\n", this_fmt)); |
| 545 | |
| 546 | for (;;) |
| 547 | { |
| 548 | ASSERT (fmt <= end_fmt); |
| 549 | |
| 550 | fchar = *fmt++; |
| 551 | switch (fchar) { |
| 552 | |
| 553 | case '\0': /* unterminated % sequence */ |
| 554 | ASSERT (0); |
| 555 | goto done; |
| 556 | |
| 557 | case '%': /* literal % */ |
| 558 | goto literal; |
| 559 | |
| 560 | case '[': /* character range */ |
| 561 | fchar = *fmt++; |
| 562 | if (fchar == '^') |
| 563 | fchar = *fmt++; |
| 564 | /* ']' allowed as the first char (possibly after '^') */ |
| 565 | if (fchar == ']') |
| 566 | fchar = *fmt++; |
| 567 | for (;;) |
| 568 | { |
| 569 | ASSERT (fmt <= end_fmt); |
| 570 | if (fchar == '\0') |
| 571 | { |
| 572 | /* unterminated % sequence */ |
| 573 | ASSERT (0); |
| 574 | goto done; |
| 575 | } |
| 576 | if (fchar == ']') |
| 577 | break; |
| 578 | fchar = *fmt++; |
| 579 | } |
| 580 | /*FALLTHRU*/ |
| 581 | case 'c': /* characters */ |
| 582 | case 's': /* string of non-whitespace */ |
| 583 | case 'p': /* pointer */ |
| 584 | libc_type: |
| 585 | len = fmt - this_fmt; |
| 586 | memcpy (alloc_fmt, this_fmt, len); |
| 587 | alloc_fmt[len++] = '%'; |
| 588 | alloc_fmt[len++] = 'n'; |
| 589 | alloc_fmt[len] = '\0'; |
| 590 | |
| 591 | TRACE (printf (" scan \"%s\"\n", alloc_fmt); |
| 592 | if (funs->scan == (gmp_doscan_scan_t) sscanf) |
| 593 | printf (" s=\"%s\"\n", * (const char **) data)); |
| 594 | |
| 595 | new_chars = -1; |
| 596 | if (param.ignore) |
| 597 | { |
| 598 | new_fields = (*funs->scan) (data, alloc_fmt, &new_chars, NULL); |
| 599 | ASSERT (new_fields == 0 || new_fields == EOF); |
| 600 | } |
| 601 | else |
| 602 | { |
| 603 | void *arg = va_arg (ap, void *); |
| 604 | new_fields = (*funs->scan) (data, alloc_fmt, arg, &new_chars); |
| 605 | ASSERT (new_fields==0 || new_fields==1 || new_fields==EOF); |
| 606 | |
| 607 | if (new_fields == 0) |
| 608 | goto done; /* invalid input */ |
| 609 | |
| 610 | if (new_fields == 1) |
| 611 | ASSERT (new_chars != -1); |
| 612 | } |
| 613 | TRACE (printf (" new_fields %d new_chars %d\n", |
| 614 | new_fields, new_chars)); |
| 615 | |
| 616 | if (new_fields == -1) |
| 617 | goto eof_no_match; /* EOF before anything matched */ |
| 618 | |
| 619 | /* Under param.ignore, when new_fields==0 we don't know if |
| 620 | it's a successful match or an invalid field. new_chars |
| 621 | won't have been assigned if it was an invalid field. */ |
| 622 | if (new_chars == -1) |
| 623 | goto done; /* invalid input */ |
| 624 | |
| 625 | chars += new_chars; |
| 626 | (*funs->step) (data, new_chars); |
| 627 | |
| 628 | increment_fields: |
| 629 | if (! param.ignore) |
| 630 | fields++; |
| 631 | goto next; |
| 632 | |
| 633 | case 'd': /* decimal */ |
| 634 | case 'u': /* decimal */ |
| 635 | param.base = 10; |
| 636 | goto numeric; |
| 637 | |
| 638 | case 'e': /* float */ |
| 639 | case 'E': /* float */ |
| 640 | case 'f': /* float */ |
| 641 | case 'g': /* float */ |
| 642 | case 'G': /* float */ |
| 643 | case 'i': /* integer with base marker */ |
| 644 | numeric: |
| 645 | if (param.type != 'F' && param.type != 'Q' && param.type != 'Z') |
| 646 | goto libc_type; |
| 647 | |
| 648 | chars += skip_white (funs, data); |
| 649 | |
| 650 | new_chars = gmpscan (funs, data, ¶m, |
| 651 | param.ignore ? NULL : va_arg (ap, void*)); |
| 652 | if (new_chars == -2) |
| 653 | goto eof_no_match; |
| 654 | if (new_chars == -1) |
| 655 | goto done; |
| 656 | |
| 657 | ASSERT (new_chars >= 0); |
| 658 | chars += new_chars; |
| 659 | goto increment_fields; |
| 660 | |
| 661 | case 'a': /* glibc allocate string */ |
| 662 | case '\'': /* glibc digit groupings */ |
| 663 | break; |
| 664 | |
| 665 | case 'F': /* mpf_t */ |
| 666 | case 'j': /* intmax_t */ |
| 667 | case 'L': /* long long */ |
| 668 | case 'q': /* quad_t */ |
| 669 | case 'Q': /* mpq_t */ |
| 670 | case 't': /* ptrdiff_t */ |
| 671 | case 'z': /* size_t */ |
| 672 | case 'Z': /* mpz_t */ |
| 673 | set_type: |
| 674 | param.type = fchar; |
| 675 | break; |
| 676 | |
| 677 | case 'h': /* short or char */ |
| 678 | if (param.type != 'h') |
| 679 | goto set_type; |
| 680 | param.type = 'H'; /* internal code for "hh" */ |
| 681 | break; |
| 682 | |
| 683 | goto numeric; |
| 684 | |
| 685 | case 'l': /* long, long long, double or long double */ |
| 686 | if (param.type != 'l') |
| 687 | goto set_type; |
| 688 | param.type = 'L'; /* "ll" means "L" */ |
| 689 | break; |
| 690 | |
| 691 | case 'n': |
| 692 | if (! param.ignore) |
| 693 | { |
| 694 | void *p; |
| 695 | p = va_arg (ap, void *); |
| 696 | TRACE (printf (" store %%n to %p\n", p)); |
| 697 | switch (param.type) { |
| 698 | case '\0': * (int *) p = chars; break; |
| 699 | case 'F': mpf_set_si ((mpf_ptr) p, (long) chars); break; |
| 700 | case 'H': * (char *) p = chars; break; |
| 701 | case 'h': * (short *) p = chars; break; |
| 702 | #if HAVE_INTMAX_T |
| 703 | case 'j': * (intmax_t *) p = chars; break; |
| 704 | #else |
| 705 | case 'j': ASSERT_FAIL (intmax_t not available); break; |
| 706 | #endif |
| 707 | case 'l': * (long *) p = chars; break; |
| 708 | #if HAVE_QUAD_T && HAVE_LONG_LONG |
| 709 | case 'q': |
| 710 | ASSERT_ALWAYS (sizeof (quad_t) == sizeof (long long)); |
| 711 | /*FALLTHRU*/ |
| 712 | #else |
| 713 | case 'q': ASSERT_FAIL (quad_t not available); break; |
| 714 | #endif |
| 715 | #if HAVE_LONG_LONG |
| 716 | case 'L': * (long long *) p = chars; break; |
| 717 | #else |
| 718 | case 'L': ASSERT_FAIL (long long not available); break; |
| 719 | #endif |
| 720 | case 'Q': mpq_set_si ((mpq_ptr) p, (long) chars, 1L); break; |
| 721 | #if HAVE_PTRDIFF_T |
| 722 | case 't': * (ptrdiff_t *) p = chars; break; |
| 723 | #else |
| 724 | case 't': ASSERT_FAIL (ptrdiff_t not available); break; |
| 725 | #endif |
| 726 | case 'z': * (size_t *) p = chars; break; |
| 727 | case 'Z': mpz_set_si ((mpz_ptr) p, (long) chars); break; |
| 728 | default: ASSERT (0); break; |
| 729 | } |
| 730 | } |
| 731 | goto next; |
| 732 | |
| 733 | case 'o': |
| 734 | param.base = 8; |
| 735 | goto numeric; |
| 736 | |
| 737 | case 'x': |
| 738 | case 'X': |
| 739 | param.base = 16; |
| 740 | goto numeric; |
| 741 | |
| 742 | case '0': case '1': case '2': case '3': case '4': |
| 743 | case '5': case '6': case '7': case '8': case '9': |
| 744 | param.width = 0; |
| 745 | do { |
| 746 | param.width = param.width * 10 + (fchar-'0'); |
| 747 | fchar = *fmt++; |
| 748 | } while (isdigit (fchar)); |
| 749 | fmt--; /* unget the non-digit */ |
| 750 | break; |
| 751 | |
| 752 | case '*': |
| 753 | param.ignore = 1; |
| 754 | break; |
| 755 | |
| 756 | default: |
| 757 | /* something invalid in a % sequence */ |
| 758 | ASSERT (0); |
| 759 | goto next; |
| 760 | } |
| 761 | } |
| 762 | } |
| 763 | |
| 764 | done: |
| 765 | (*__gmp_free_func) (alloc_fmt, alloc_fmt_size); |
| 766 | return fields; |
| 767 | } |