1 /* $OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3 * Copyright (c) 1990, 1993
4 * The Regents of the University of California. All rights reserved.
5 *
6 * This code is derived from software contributed to Berkeley by
7 * Chris Torek.
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 */
33
34 #include "scanf_common.h"
35
36 // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
37 // be a little too expensive, and some kind of compressed version isn't worth the trouble.
in_ccl(wchar_t wc,const wchar_t * ccl)38 static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
39 // Is this a negated set?
40 bool member_result = true;
41 if (*ccl == '^') {
42 member_result = false;
43 ++ccl;
44 }
45
46 // The first character may be ']' or '-' without being special.
47 if (*ccl == '-' || *ccl == ']') {
48 // A literal match?
49 if (*ccl == wc) return member_result;
50 ++ccl;
51 }
52
53 while (*ccl && *ccl != ']') {
54 // The last character may be '-' without being special.
55 if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
56 wchar_t first = *(ccl - 1);
57 wchar_t last = *(ccl + 1);
58 if (first <= last) {
59 // In the range?
60 if (wc >= first && wc <= last) return member_result;
61 ccl += 2;
62 continue;
63 }
64 // A '-' is not considered to be part of a range if the character after
65 // is not greater than the character before, so fall through...
66 }
67 // A literal match?
68 if (*ccl == wc) return member_result;
69 ++ccl;
70 }
71 return !member_result;
72 }
73
74 #pragma GCC diagnostic push
75 #pragma GCC diagnostic ignored "-Wframe-larger-than="
76
77 /*
78 * vfwscanf
79 */
__vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)80 int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
81 wint_t c; /* character from format, or conversion */
82 size_t width; /* field width, or 0 */
83 wchar_t* p; /* points into all kinds of strings */
84 int n; /* handy integer */
85 int flags; /* flags as defined above */
86 wchar_t* p0; /* saves original value of p when necessary */
87 int nassigned; /* number of fields assigned */
88 int nconversions; /* number of conversions */
89 int nread; /* number of characters consumed from fp */
90 int base; /* base argument to strtoimax/strtouimax */
91 wchar_t buf[BUF]; /* buffer for numeric conversions */
92 const wchar_t* ccl;
93 wint_t wi; /* handy wint_t */
94 char* mbp; /* multibyte string pointer for %c %s %[ */
95 size_t nconv; /* number of bytes in mb. conversion */
96 char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
97 mbstate_t mbs;
98
99 _SET_ORIENTATION(fp, ORIENT_CHARS);
100
101 nassigned = 0;
102 nconversions = 0;
103 nread = 0;
104 base = 0; /* XXX just to keep gcc happy */
105 for (;;) {
106 c = *fmt++;
107 if (c == 0) {
108 return (nassigned);
109 }
110 if (iswspace(c)) {
111 while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
112 ;
113 if (c != WEOF) __ungetwc(c, fp);
114 continue;
115 }
116 if (c != '%') goto literal;
117 width = 0;
118 flags = 0;
119 /*
120 * switch on the format. continue if done;
121 * break once format type is derived.
122 */
123 again:
124 c = *fmt++;
125 reswitch:
126 switch (c) {
127 case '%':
128 literal:
129 if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
130 if (wi != c) {
131 __ungetwc(wi, fp);
132 goto match_failure;
133 }
134 nread++;
135 continue;
136
137 case '*':
138 flags |= SUPPRESS;
139 goto again;
140 case 'j':
141 flags |= MAXINT;
142 goto again;
143 case 'L':
144 flags |= LONGDBL;
145 goto again;
146 case 'h':
147 if (*fmt == 'h') {
148 fmt++;
149 flags |= SHORTSHORT;
150 } else {
151 flags |= SHORT;
152 }
153 goto again;
154 case 'l':
155 if (*fmt == 'l') {
156 fmt++;
157 flags |= LLONG;
158 } else {
159 flags |= LONG;
160 }
161 goto again;
162 case 'q':
163 flags |= LLONG; /* deprecated */
164 goto again;
165 case 't':
166 flags |= PTRINT;
167 goto again;
168 case 'z':
169 flags |= SIZEINT;
170 goto again;
171
172 case '0':
173 case '1':
174 case '2':
175 case '3':
176 case '4':
177 case '5':
178 case '6':
179 case '7':
180 case '8':
181 case '9':
182 width = width * 10 + c - '0';
183 goto again;
184
185 /*
186 * Conversions.
187 * Those marked `compat' are for 4.[123]BSD compatibility.
188 */
189 case 'b':
190 c = CT_INT;
191 base = 2;
192 flags |= PFBOK; /* enable 0b prefixing */
193 break;
194
195 case 'D': /* compat */
196 flags |= LONG;
197 __BIONIC_FALLTHROUGH;
198 case 'd':
199 c = CT_INT;
200 base = 10;
201 break;
202
203 case 'i':
204 c = CT_INT;
205 base = 0;
206 break;
207
208 case 'O': /* compat */
209 flags |= LONG;
210 __BIONIC_FALLTHROUGH;
211 case 'o':
212 c = CT_INT;
213 flags |= UNSIGNED;
214 base = 8;
215 break;
216
217 case 'u':
218 c = CT_INT;
219 flags |= UNSIGNED;
220 base = 10;
221 break;
222
223 case 'w': {
224 int size = 0;
225 bool fast = false;
226 c = *fmt++;
227 if (c == 'f') {
228 fast = true;
229 c = *fmt++;
230 }
231 while (is_digit(c)) {
232 APPEND_DIGIT(size, c);
233 c = *fmt++;
234 }
235 flags |= w_to_flag(size, fast);
236 goto reswitch;
237 }
238
239 case 'X':
240 case 'x':
241 flags |= PFXOK; /* enable 0x prefixing */
242 c = CT_INT;
243 flags |= UNSIGNED;
244 base = 16;
245 break;
246
247 case 'e':
248 case 'E':
249 case 'f':
250 case 'F':
251 case 'g':
252 case 'G':
253 case 'a':
254 case 'A':
255 c = CT_FLOAT;
256 break;
257
258 case 's':
259 c = CT_STRING;
260 break;
261
262 case '[':
263 ccl = fmt;
264 if (*fmt == '^') fmt++;
265 if (*fmt == ']') fmt++;
266 while (*fmt != '\0' && *fmt != ']') fmt++;
267 fmt++;
268 flags |= NOSKIP;
269 c = CT_CCL;
270 break;
271
272 case 'c':
273 flags |= NOSKIP;
274 c = CT_CHAR;
275 break;
276
277 case 'p': /* pointer format is like hex */
278 flags |= POINTER | PFXOK;
279 c = CT_INT;
280 flags |= UNSIGNED;
281 base = 16;
282 break;
283
284 case 'n':
285 nconversions++;
286 if (flags & SUPPRESS) continue;
287 if (flags & SHORTSHORT)
288 *va_arg(ap, signed char*) = nread;
289 else if (flags & SHORT)
290 *va_arg(ap, short*) = nread;
291 else if (flags & LONG)
292 *va_arg(ap, long*) = nread;
293 else if (flags & SIZEINT)
294 *va_arg(ap, ssize_t*) = nread;
295 else if (flags & PTRINT)
296 *va_arg(ap, ptrdiff_t*) = nread;
297 else if (flags & LLONG)
298 *va_arg(ap, long long*) = nread;
299 else if (flags & MAXINT)
300 *va_arg(ap, intmax_t*) = nread;
301 else
302 *va_arg(ap, int*) = nread;
303 continue;
304
305 /*
306 * Disgusting backwards compatibility hacks. XXX
307 */
308 case '\0': /* compat */
309 return (EOF);
310
311 default: /* compat */
312 if (iswupper(c)) flags |= LONG;
313 c = CT_INT;
314 base = 10;
315 break;
316 }
317
318 /*
319 * Consume leading white space, except for formats
320 * that suppress this.
321 */
322 if ((flags & NOSKIP) == 0) {
323 while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
324 if (wi == WEOF) goto input_failure;
325 __ungetwc(wi, fp);
326 }
327
328 /*
329 * Do the conversion.
330 */
331 switch (c) {
332 case CT_CHAR:
333 /* scan arbitrary characters (sets NOSKIP) */
334 if (width == 0) width = 1;
335 if (flags & LONG) {
336 if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
337 n = 0;
338 while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
339 if (!(flags & SUPPRESS)) *p++ = static_cast<wchar_t>(wi);
340 n++;
341 }
342 if (n == 0) goto input_failure;
343 nread += n;
344 if (!(flags & SUPPRESS)) nassigned++;
345 } else {
346 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
347 n = 0;
348 memset(&mbs, 0, sizeof(mbs));
349 while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
350 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
351 nconv = wcrtomb(mbp, wi, &mbs);
352 if (nconv == static_cast<size_t>(-1)) goto input_failure;
353 } else {
354 nconv = wcrtomb(mbbuf, wi, &mbs);
355 if (nconv == static_cast<size_t>(-1)) goto input_failure;
356 if (nconv > width) {
357 __ungetwc(wi, fp);
358 break;
359 }
360 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
361 }
362 if (!(flags & SUPPRESS)) mbp += nconv;
363 width -= nconv;
364 n++;
365 }
366 if (n == 0) goto input_failure;
367 nread += n;
368 if (!(flags & SUPPRESS)) nassigned++;
369 }
370 nconversions++;
371 break;
372
373 case CT_CCL:
374 case CT_STRING:
375 // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
376 // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
377 if (width == 0) width = SIZE_MAX; // 'infinity'.
378 if ((flags & SUPPRESS) && (flags & LONG)) {
379 n = 0;
380 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
381 if (wi != WEOF) __ungetwc(wi, fp);
382 } else if (flags & LONG) {
383 p0 = p = va_arg(ap, wchar_t*);
384 while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
385 *p++ = static_cast<wchar_t>(wi);
386 }
387 if (wi != WEOF) __ungetwc(wi, fp);
388 n = p - p0;
389 } else {
390 if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
391 n = 0;
392 memset(&mbs, 0, sizeof(mbs));
393 while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
394 if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
395 nconv = wcrtomb(mbp, wi, &mbs);
396 if (nconv == static_cast<size_t>(-1)) goto input_failure;
397 } else {
398 nconv = wcrtomb(mbbuf, wi, &mbs);
399 if (nconv == static_cast<size_t>(-1)) goto input_failure;
400 if (nconv > width) break;
401 if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
402 }
403 if (!(flags & SUPPRESS)) mbp += nconv;
404 width -= nconv;
405 n++;
406 }
407 if (wi != WEOF) __ungetwc(wi, fp);
408 }
409 if (c == CT_CCL && n == 0) goto match_failure;
410 if (!(flags & SUPPRESS)) {
411 if (flags & LONG) {
412 *p = L'\0';
413 } else {
414 *mbp = '\0';
415 }
416 ++nassigned;
417 }
418 nread += n;
419 nconversions++;
420 break;
421
422 case CT_INT:
423 /* scan an integer as if by strtoimax/strtoumax */
424 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
425 width = sizeof(buf) / sizeof(*buf) - 1;
426 flags |= SIGNOK | NDIGITS | NZDIGITS;
427 for (p = buf; width; width--) {
428 c = __fgetwc_unlock(fp);
429 /*
430 * Switch on the character; `goto ok'
431 * if we accept it as a part of number.
432 */
433 switch (c) {
434 /*
435 * The digit 0 is always legal, but is
436 * special. For %i conversions, if no
437 * digits (zero or nonzero) have been
438 * scanned (only signs), we will have
439 * base==0. In that case, we should set
440 * it to 8 and enable 0b/0x prefixing.
441 * Also, if we have not scanned zero digits
442 * before this, do not turn off prefixing
443 * (someone else will turn it off if we
444 * have scanned any nonzero digits).
445 */
446 case '0':
447 if (base == 0) {
448 base = 8;
449 flags |= PFBOK | PFXOK;
450 }
451 if (flags & NZDIGITS) {
452 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
453 } else {
454 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
455 }
456 goto ok;
457
458 /* 1 through 7 always legal */
459 case 'B':
460 case 'b':
461 // Is this 'b' potentially part of an "0b" prefix?
462 if ((flags & PFBOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
463 base = 2;
464 flags &= ~PFBOK;
465 goto ok;
466 }
467 // No? Fall through and see if it's a hex digit instead then...
468 __BIONIC_FALLTHROUGH;
469 case '1':
470 case '2':
471 case '3':
472 case '4':
473 case '5':
474 case '6':
475 case '7':
476 case '8':
477 case '9':
478 case 'A':
479 case 'C':
480 case 'D':
481 case 'E':
482 case 'F':
483 case 'a':
484 case 'c':
485 case 'd':
486 case 'e':
487 case 'f':
488 if (base == 0) base = 10;
489 if (base != 16 && static_cast<int>(c - '0') >= base) break; /* not legal here */
490 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
491 goto ok;
492
493 /* sign ok only as first character */
494 case '+':
495 case '-':
496 if (flags & SIGNOK) {
497 flags &= ~SIGNOK;
498 flags |= HAVESIGN;
499 goto ok;
500 }
501 break;
502
503 /*
504 * x ok iff flag still set and 2nd char (or
505 * 3rd char if we have a sign).
506 */
507 case 'x':
508 case 'X':
509 if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
510 base = 16; /* if %i */
511 flags &= ~PFXOK;
512 goto ok;
513 }
514 break;
515 }
516
517 /*
518 * If we got here, c is not a legal character
519 * for a number. Stop accumulating digits.
520 */
521 if (c != WEOF) __ungetwc(c, fp);
522 break;
523 ok:
524 /*
525 * c is legal: store it and look at the next.
526 */
527 *p++ = static_cast<wchar_t>(c);
528 }
529 /*
530 * If we had only a sign, it is no good; push back the sign.
531 * If the number was `[-+]0[BbXx]`, push back and treat it
532 * as `[-+]0`.
533 */
534 if (flags & NDIGITS) {
535 if (p > buf) __ungetwc(*--p, fp);
536 goto match_failure;
537 }
538 c = p[-1];
539 if ((base == 2 && (c == 'b' || c == 'B')) || c == 'x' || c == 'X') {
540 --p;
541 __ungetwc(c, fp);
542 }
543 if ((flags & SUPPRESS) == 0) {
544 uintmax_t res;
545
546 *p = '\0';
547 if (flags & UNSIGNED)
548 res = wcstoimax(buf, NULL, base);
549 else
550 res = wcstoumax(buf, NULL, base);
551 if (flags & POINTER)
552 *va_arg(ap, void**) = reinterpret_cast<void*>(res);
553 else if (flags & MAXINT)
554 *va_arg(ap, intmax_t*) = res;
555 else if (flags & LLONG)
556 *va_arg(ap, long long*) = res;
557 else if (flags & SIZEINT)
558 *va_arg(ap, ssize_t*) = res;
559 else if (flags & PTRINT)
560 *va_arg(ap, ptrdiff_t*) = res;
561 else if (flags & LONG)
562 *va_arg(ap, long*) = res;
563 else if (flags & SHORT)
564 *va_arg(ap, short*) = res;
565 else if (flags & SHORTSHORT)
566 *va_arg(ap, signed char*) = res;
567 else
568 *va_arg(ap, int*) = res;
569 nassigned++;
570 }
571 nread += p - buf;
572 nconversions++;
573 break;
574
575 case CT_FLOAT:
576 /* scan a floating point number as if by strtod */
577 if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
578 width = sizeof(buf) / sizeof(*buf) - 1;
579 if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
580 if ((flags & SUPPRESS) == 0) {
581 if (flags & LONGDBL) {
582 long double res = wcstold(buf, &p);
583 *va_arg(ap, long double*) = res;
584 } else if (flags & LONG) {
585 double res = wcstod(buf, &p);
586 *va_arg(ap, double*) = res;
587 } else {
588 float res = wcstof(buf, &p);
589 *va_arg(ap, float*) = res;
590 }
591 if (static_cast<size_t>(p - buf) != width) abort();
592 nassigned++;
593 }
594 nread += width;
595 nconversions++;
596 break;
597 }
598 }
599 input_failure:
600 return (nconversions != 0 ? nassigned : EOF);
601 match_failure:
602 return (nassigned);
603 }
604 #pragma GCC diagnostic pop
605