blob: c7bfe0b22a89e209592edd0bcef4b9c20047976f [file] [log] [blame]
Austin Schuhbb1338c2024-06-15 19:31:16 -07001/* mpf_set_str (dest, string, base) -- Convert the string STRING
2 in base BASE to a float in dest. If BASE is zero, the leading characters
3 of STRING is used to figure out the base.
4
5Copyright 1993-1997, 2000-2003, 2005, 2007, 2008, 2011, 2013, 2019 Free
6Software Foundation, Inc.
7
8This file is part of the GNU MP Library.
9
10The GNU MP Library is free software; you can redistribute it and/or modify
11it under the terms of either:
12
13 * the GNU Lesser General Public License as published by the Free
14 Software Foundation; either version 3 of the License, or (at your
15 option) any later version.
16
17or
18
19 * the GNU General Public License as published by the Free Software
20 Foundation; either version 2 of the License, or (at your option) any
21 later version.
22
23or both in parallel, as here.
24
25The GNU MP Library is distributed in the hope that it will be useful, but
26WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
28for more details.
29
30You should have received copies of the GNU General Public License and the
31GNU Lesser General Public License along with the GNU MP Library. If not,
32see https://www.gnu.org/licenses/. */
33
34/*
35 This still needs work, as suggested by some FIXME comments.
36 1. Don't depend on superfluous mantissa digits.
37 2. Allocate temp space more cleverly.
38 3. Use mpn_div_q instead of mpn_lshift+mpn_divrem.
39*/
40
41#define _GNU_SOURCE /* for DECIMAL_POINT in langinfo.h */
42
43#include "config.h"
44
45#include <stdlib.h>
46#include <string.h>
47#include <ctype.h>
48
49#if HAVE_LANGINFO_H
50#include <langinfo.h> /* for nl_langinfo */
51#endif
52
53#if HAVE_LOCALE_H
54#include <locale.h> /* for localeconv */
55#endif
56
57#include "gmp-impl.h"
58#include "longlong.h"
59
60
61#define digit_value_tab __gmp_digit_value_tab
62
63/* Compute base^exp and return the most significant prec limbs in rp[].
64 Put the count of omitted low limbs in *ign.
65 Return the actual size (which might be less than prec). */
66static mp_size_t
67mpn_pow_1_highpart (mp_ptr rp, mp_size_t *ignp,
68 mp_limb_t base, mp_exp_t exp,
69 mp_size_t prec, mp_ptr tp)
70{
71 mp_size_t ign; /* counts number of ignored low limbs in r */
72 mp_size_t off; /* keeps track of offset where value starts */
73 mp_ptr passed_rp = rp;
74 mp_size_t rn;
75 int cnt;
76 int i;
77
78 rp[0] = base;
79 rn = 1;
80 off = 0;
81 ign = 0;
82 count_leading_zeros (cnt, exp);
83 for (i = GMP_LIMB_BITS - cnt - 2; i >= 0; i--)
84 {
85 mpn_sqr (tp, rp + off, rn);
86 rn = 2 * rn;
87 rn -= tp[rn - 1] == 0;
88 ign <<= 1;
89
90 off = 0;
91 if (rn > prec)
92 {
93 ign += rn - prec;
94 off = rn - prec;
95 rn = prec;
96 }
97 MP_PTR_SWAP (rp, tp);
98
99 if (((exp >> i) & 1) != 0)
100 {
101 mp_limb_t cy;
102 cy = mpn_mul_1 (rp, rp + off, rn, base);
103 rp[rn] = cy;
104 rn += cy != 0;
105 off = 0;
106 }
107 }
108
109 if (rn > prec)
110 {
111 ign += rn - prec;
112 rp += rn - prec;
113 rn = prec;
114 }
115
116 MPN_COPY_INCR (passed_rp, rp + off, rn);
117 *ignp = ign;
118 return rn;
119}
120
121int
122mpf_set_str (mpf_ptr x, const char *str, int base)
123{
124 size_t str_size;
125 char *s, *begs;
126 size_t i, j;
127 int c;
128 int negative;
129 char *dotpos;
130 const char *expptr;
131 int exp_base;
132 const char *point = GMP_DECIMAL_POINT;
133 size_t pointlen = strlen (point);
134 const unsigned char *digit_value;
135 int incr;
136 size_t n_zeros_skipped;
137
138 TMP_DECL;
139
140 c = (unsigned char) *str;
141
142 /* Skip whitespace. */
143 while (isspace (c))
144 c = (unsigned char) *++str;
145
146 negative = 0;
147 if (c == '-')
148 {
149 negative = 1;
150 c = (unsigned char) *++str;
151 }
152
153 /* Default base to decimal. */
154 if (base == 0)
155 base = 10;
156
157 exp_base = base;
158
159 if (base < 0)
160 {
161 exp_base = 10;
162 base = -base;
163 }
164
165 digit_value = digit_value_tab;
166 if (base > 36)
167 {
168 /* For bases > 36, use the collating sequence
169 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz. */
170 digit_value += 208;
171 if (base > 62)
172 return -1; /* too large base */
173 }
174
175 /* Require at least one digit, possibly after an initial decimal point. */
176 if (digit_value[c] >= base)
177 {
178 /* not a digit, must be a decimal point */
179 for (i = 0; i < pointlen; i++)
180 if (str[i] != point[i])
181 return -1;
182 if (digit_value[(unsigned char) str[pointlen]] >= base)
183 return -1;
184 }
185
186 /* Locate exponent part of the input. Look from the right of the string,
187 since the exponent is usually a lot shorter than the mantissa. */
188 expptr = NULL;
189 str_size = strlen (str);
190 for (i = str_size - 1; i > 0; i--)
191 {
192 c = (unsigned char) str[i];
193 if (c == '@' || (base <= 10 && (c == 'e' || c == 'E')))
194 {
195 expptr = str + i + 1;
196 str_size = i;
197 break;
198 }
199 }
200
201 TMP_MARK;
202 s = begs = (char *) TMP_ALLOC (str_size + 1);
203
204 incr = 0;
205 n_zeros_skipped = 0;
206 dotpos = NULL;
207
208 /* Loop through mantissa, converting it from ASCII to raw byte values. */
209 for (i = 0; i < str_size; i++)
210 {
211 c = (unsigned char) *str;
212 if (!isspace (c))
213 {
214 int dig;
215
216 for (j = 0; j < pointlen; j++)
217 if (str[j] != point[j])
218 goto not_point;
219 if (1)
220 {
221 if (dotpos != 0)
222 {
223 /* already saw a decimal point, another is invalid */
224 TMP_FREE;
225 return -1;
226 }
227 dotpos = s;
228 str += pointlen - 1;
229 i += pointlen - 1;
230 }
231 else
232 {
233 not_point:
234 dig = digit_value[c];
235 if (dig >= base)
236 {
237 TMP_FREE;
238 return -1;
239 }
240 *s = dig;
241 incr |= dig != 0;
242 s += incr; /* Increment after first non-0 digit seen. */
243 if (dotpos != NULL)
244 /* Count skipped zeros between radix point and first non-0
245 digit. */
246 n_zeros_skipped += 1 - incr;
247 }
248 }
249 c = (unsigned char) *++str;
250 }
251
252 str_size = s - begs;
253
254 {
255 long exp_in_base;
256 mp_size_t ra, ma, rn, mn;
257 int cnt;
258 mp_ptr mp, tp, rp;
259 mp_exp_t exp_in_limbs;
260 mp_size_t prec = PREC(x) + 1;
261 int divflag;
262 mp_size_t madj, radj;
263
264#if 0
265 size_t n_chars_needed;
266
267 /* This needs careful testing. Leave disabled for now. */
268 /* Just consider the relevant leading digits of the mantissa. */
269 LIMBS_PER_DIGIT_IN_BASE (n_chars_needed, prec, base);
270 if (str_size > n_chars_needed)
271 str_size = n_chars_needed;
272#endif
273
274 if (str_size == 0)
275 {
276 SIZ(x) = 0;
277 EXP(x) = 0;
278 TMP_FREE;
279 return 0;
280 }
281
282 LIMBS_PER_DIGIT_IN_BASE (ma, str_size, base);
283 mp = TMP_ALLOC_LIMBS (ma);
284 mn = mpn_set_str (mp, (unsigned char *) begs, str_size, base);
285
286 madj = 0;
287 /* Ignore excess limbs in MP,MSIZE. */
288 if (mn > prec)
289 {
290 madj = mn - prec;
291 mp += mn - prec;
292 mn = prec;
293 }
294
295 if (expptr != 0)
296 {
297 /* Scan and convert the exponent, in base exp_base. */
298 long dig, minus, plusminus;
299 c = (unsigned char) *expptr;
300 minus = -(long) (c == '-');
301 plusminus = minus | -(long) (c == '+');
302 expptr -= plusminus; /* conditional increment */
303 c = (unsigned char) *expptr++;
304 dig = digit_value[c];
305 if (dig >= exp_base)
306 {
307 TMP_FREE;
308 return -1;
309 }
310 exp_in_base = dig;
311 c = (unsigned char) *expptr++;
312 dig = digit_value[c];
313 while (dig < exp_base)
314 {
315 exp_in_base = exp_in_base * exp_base;
316 exp_in_base += dig;
317 c = (unsigned char) *expptr++;
318 dig = digit_value[c];
319 }
320 exp_in_base = (exp_in_base ^ minus) - minus; /* conditional negation */
321 }
322 else
323 exp_in_base = 0;
324 if (dotpos != 0)
325 exp_in_base -= s - dotpos + n_zeros_skipped;
326 divflag = exp_in_base < 0;
327 exp_in_base = ABS (exp_in_base);
328
329 if (exp_in_base == 0)
330 {
331 MPN_COPY (PTR(x), mp, mn);
332 SIZ(x) = negative ? -mn : mn;
333 EXP(x) = mn + madj;
334 TMP_FREE;
335 return 0;
336 }
337
338 ra = 2 * (prec + 1);
339 TMP_ALLOC_LIMBS_2 (rp, ra, tp, ra);
340 rn = mpn_pow_1_highpart (rp, &radj, (mp_limb_t) base, exp_in_base, prec, tp);
341
342 if (divflag)
343 {
344#if 0
345 /* FIXME: Should use mpn_div_q here. */
346 ...
347 mpn_div_q (tp, mp, mn, rp, rn, scratch);
348 ...
349#else
350 mp_ptr qp;
351 mp_limb_t qlimb;
352 if (mn < rn)
353 {
354 /* Pad out MP,MSIZE for current divrem semantics. */
355 mp_ptr tmp = TMP_ALLOC_LIMBS (rn + 1);
356 MPN_ZERO (tmp, rn - mn);
357 MPN_COPY (tmp + rn - mn, mp, mn);
358 mp = tmp;
359 madj -= rn - mn;
360 mn = rn;
361 }
362 if ((rp[rn - 1] & GMP_NUMB_HIGHBIT) == 0)
363 {
364 mp_limb_t cy;
365 count_leading_zeros (cnt, rp[rn - 1]);
366 cnt -= GMP_NAIL_BITS;
367 mpn_lshift (rp, rp, rn, cnt);
368 cy = mpn_lshift (mp, mp, mn, cnt);
369 if (cy)
370 mp[mn++] = cy;
371 }
372
373 qp = TMP_ALLOC_LIMBS (prec + 1);
374 qlimb = mpn_divrem (qp, prec - (mn - rn), mp, mn, rp, rn);
375 tp = qp;
376 exp_in_limbs = qlimb + (mn - rn) + (madj - radj);
377 rn = prec;
378 if (qlimb != 0)
379 {
380 tp[prec] = qlimb;
381 /* Skip the least significant limb not to overrun the destination
382 variable. */
383 tp++;
384 }
385#endif
386 }
387 else
388 {
389 tp = TMP_ALLOC_LIMBS (rn + mn);
390 if (rn > mn)
391 mpn_mul (tp, rp, rn, mp, mn);
392 else
393 mpn_mul (tp, mp, mn, rp, rn);
394 rn += mn;
395 rn -= tp[rn - 1] == 0;
396 exp_in_limbs = rn + madj + radj;
397
398 if (rn > prec)
399 {
400 tp += rn - prec;
401 rn = prec;
402 exp_in_limbs += 0;
403 }
404 }
405
406 MPN_COPY (PTR(x), tp, rn);
407 SIZ(x) = negative ? -rn : rn;
408 EXP(x) = exp_in_limbs;
409 TMP_FREE;
410 return 0;
411 }
412}