1 /*	$OpenBSD: vfwscanf.c,v 1.4 2014/03/19 05:17:01 guenther Exp $ */
2 /*-
3  * Copyright (c) 1990, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * Chris Torek.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 
34 #include "scanf_common.h"
35 
36 // An interpretive version of __sccl from vfscanf.c --- a table of all wchar_t values would
37 // be a little too expensive, and some kind of compressed version isn't worth the trouble.
in_ccl(wchar_t wc,const wchar_t * ccl)38 static inline bool in_ccl(wchar_t wc, const wchar_t* ccl) {
39   // Is this a negated set?
40   bool member_result = true;
41   if (*ccl == '^') {
42     member_result = false;
43     ++ccl;
44   }
45 
46   // The first character may be ']' or '-' without being special.
47   if (*ccl == '-' || *ccl == ']') {
48     // A literal match?
49     if (*ccl == wc) return member_result;
50     ++ccl;
51   }
52 
53   while (*ccl && *ccl != ']') {
54     // The last character may be '-' without being special.
55     if (*ccl == '-' && ccl[1] != '\0' && ccl[1] != ']') {
56       wchar_t first = *(ccl - 1);
57       wchar_t last = *(ccl + 1);
58       if (first <= last) {
59         // In the range?
60         if (wc >= first && wc <= last) return member_result;
61         ccl += 2;
62         continue;
63       }
64       // A '-' is not considered to be part of a range if the character after
65       // is not greater than the character before, so fall through...
66     }
67     // A literal match?
68     if (*ccl == wc) return member_result;
69     ++ccl;
70   }
71   return !member_result;
72 }
73 
74 #pragma GCC diagnostic push
75 #pragma GCC diagnostic ignored "-Wframe-larger-than="
76 
77 /*
78  * vfwscanf
79  */
__vfwscanf(FILE * __restrict fp,const wchar_t * __restrict fmt,__va_list ap)80 int __vfwscanf(FILE* __restrict fp, const wchar_t* __restrict fmt, __va_list ap) {
81   wint_t c;               /* character from format, or conversion */
82   size_t width;           /* field width, or 0 */
83   wchar_t* p;             /* points into all kinds of strings */
84   int n;                  /* handy integer */
85   int flags;              /* flags as defined above */
86   wchar_t* p0;            /* saves original value of p when necessary */
87   int nassigned;          /* number of fields assigned */
88   int nconversions;       /* number of conversions */
89   int nread;              /* number of characters consumed from fp */
90   int base;               /* base argument to strtoimax/strtouimax */
91   wchar_t buf[BUF];       /* buffer for numeric conversions */
92   const wchar_t* ccl;
93   wint_t wi;              /* handy wint_t */
94   char* mbp;              /* multibyte string pointer for %c %s %[ */
95   size_t nconv;           /* number of bytes in mb. conversion */
96   char mbbuf[MB_LEN_MAX]; /* temporary mb. character buffer */
97   mbstate_t mbs;
98 
99   _SET_ORIENTATION(fp, ORIENT_CHARS);
100 
101   nassigned = 0;
102   nconversions = 0;
103   nread = 0;
104   base = 0; /* XXX just to keep gcc happy */
105   for (;;) {
106     c = *fmt++;
107     if (c == 0) {
108       return (nassigned);
109     }
110     if (iswspace(c)) {
111       while ((c = __fgetwc_unlock(fp)) != WEOF && iswspace(c))
112         ;
113       if (c != WEOF) __ungetwc(c, fp);
114       continue;
115     }
116     if (c != '%') goto literal;
117     width = 0;
118     flags = 0;
119     /*
120      * switch on the format.  continue if done;
121      * break once format type is derived.
122      */
123   again:
124     c = *fmt++;
125   reswitch:
126     switch (c) {
127       case '%':
128       literal:
129         if ((wi = __fgetwc_unlock(fp)) == WEOF) goto input_failure;
130         if (wi != c) {
131           __ungetwc(wi, fp);
132           goto match_failure;
133         }
134         nread++;
135         continue;
136 
137       case '*':
138         flags |= SUPPRESS;
139         goto again;
140       case 'j':
141         flags |= MAXINT;
142         goto again;
143       case 'L':
144         flags |= LONGDBL;
145         goto again;
146       case 'h':
147         if (*fmt == 'h') {
148           fmt++;
149           flags |= SHORTSHORT;
150         } else {
151           flags |= SHORT;
152         }
153         goto again;
154       case 'l':
155         if (*fmt == 'l') {
156           fmt++;
157           flags |= LLONG;
158         } else {
159           flags |= LONG;
160         }
161         goto again;
162       case 'q':
163         flags |= LLONG; /* deprecated */
164         goto again;
165       case 't':
166         flags |= PTRINT;
167         goto again;
168       case 'z':
169         flags |= SIZEINT;
170         goto again;
171 
172       case '0':
173       case '1':
174       case '2':
175       case '3':
176       case '4':
177       case '5':
178       case '6':
179       case '7':
180       case '8':
181       case '9':
182         width = width * 10 + c - '0';
183         goto again;
184 
185       /*
186        * Conversions.
187        * Those marked `compat' are for 4.[123]BSD compatibility.
188        */
189       case 'b':
190         c = CT_INT;
191         base = 2;
192         flags |= PFBOK; /* enable 0b prefixing */
193         break;
194 
195       case 'D': /* compat */
196         flags |= LONG;
197         __BIONIC_FALLTHROUGH;
198       case 'd':
199         c = CT_INT;
200         base = 10;
201         break;
202 
203       case 'i':
204         c = CT_INT;
205         base = 0;
206         break;
207 
208       case 'O': /* compat */
209         flags |= LONG;
210         __BIONIC_FALLTHROUGH;
211       case 'o':
212         c = CT_INT;
213         flags |= UNSIGNED;
214         base = 8;
215         break;
216 
217       case 'u':
218         c = CT_INT;
219         flags |= UNSIGNED;
220         base = 10;
221         break;
222 
223       case 'w': {
224         int size = 0;
225         bool fast = false;
226         c = *fmt++;
227         if (c == 'f') {
228           fast = true;
229           c = *fmt++;
230         }
231         while (is_digit(c)) {
232           APPEND_DIGIT(size, c);
233           c = *fmt++;
234         }
235         flags |= w_to_flag(size, fast);
236         goto reswitch;
237       }
238 
239       case 'X':
240       case 'x':
241         flags |= PFXOK; /* enable 0x prefixing */
242         c = CT_INT;
243         flags |= UNSIGNED;
244         base = 16;
245         break;
246 
247       case 'e':
248       case 'E':
249       case 'f':
250       case 'F':
251       case 'g':
252       case 'G':
253       case 'a':
254       case 'A':
255         c = CT_FLOAT;
256         break;
257 
258       case 's':
259         c = CT_STRING;
260         break;
261 
262       case '[':
263         ccl = fmt;
264         if (*fmt == '^') fmt++;
265         if (*fmt == ']') fmt++;
266         while (*fmt != '\0' && *fmt != ']') fmt++;
267         fmt++;
268         flags |= NOSKIP;
269         c = CT_CCL;
270         break;
271 
272       case 'c':
273         flags |= NOSKIP;
274         c = CT_CHAR;
275         break;
276 
277       case 'p': /* pointer format is like hex */
278         flags |= POINTER | PFXOK;
279         c = CT_INT;
280         flags |= UNSIGNED;
281         base = 16;
282         break;
283 
284       case 'n':
285         nconversions++;
286         if (flags & SUPPRESS) continue;
287         if (flags & SHORTSHORT)
288           *va_arg(ap, signed char*) = nread;
289         else if (flags & SHORT)
290           *va_arg(ap, short*) = nread;
291         else if (flags & LONG)
292           *va_arg(ap, long*) = nread;
293         else if (flags & SIZEINT)
294           *va_arg(ap, ssize_t*) = nread;
295         else if (flags & PTRINT)
296           *va_arg(ap, ptrdiff_t*) = nread;
297         else if (flags & LLONG)
298           *va_arg(ap, long long*) = nread;
299         else if (flags & MAXINT)
300           *va_arg(ap, intmax_t*) = nread;
301         else
302           *va_arg(ap, int*) = nread;
303         continue;
304 
305       /*
306        * Disgusting backwards compatibility hacks.	XXX
307        */
308       case '\0': /* compat */
309         return (EOF);
310 
311       default: /* compat */
312         if (iswupper(c)) flags |= LONG;
313         c = CT_INT;
314         base = 10;
315         break;
316     }
317 
318     /*
319      * Consume leading white space, except for formats
320      * that suppress this.
321      */
322     if ((flags & NOSKIP) == 0) {
323       while ((wi = __fgetwc_unlock(fp)) != WEOF && iswspace(wi)) nread++;
324       if (wi == WEOF) goto input_failure;
325       __ungetwc(wi, fp);
326     }
327 
328     /*
329      * Do the conversion.
330      */
331     switch (c) {
332       case CT_CHAR:
333         /* scan arbitrary characters (sets NOSKIP) */
334         if (width == 0) width = 1;
335         if (flags & LONG) {
336           if (!(flags & SUPPRESS)) p = va_arg(ap, wchar_t*);
337           n = 0;
338           while (width-- != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
339             if (!(flags & SUPPRESS)) *p++ = static_cast<wchar_t>(wi);
340             n++;
341           }
342           if (n == 0) goto input_failure;
343           nread += n;
344           if (!(flags & SUPPRESS)) nassigned++;
345         } else {
346           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
347           n = 0;
348           memset(&mbs, 0, sizeof(mbs));
349           while (width != 0 && (wi = __fgetwc_unlock(fp)) != WEOF) {
350             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
351               nconv = wcrtomb(mbp, wi, &mbs);
352               if (nconv == static_cast<size_t>(-1)) goto input_failure;
353             } else {
354               nconv = wcrtomb(mbbuf, wi, &mbs);
355               if (nconv == static_cast<size_t>(-1)) goto input_failure;
356               if (nconv > width) {
357                 __ungetwc(wi, fp);
358                 break;
359               }
360               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
361             }
362             if (!(flags & SUPPRESS)) mbp += nconv;
363             width -= nconv;
364             n++;
365           }
366           if (n == 0) goto input_failure;
367           nread += n;
368           if (!(flags & SUPPRESS)) nassigned++;
369         }
370         nconversions++;
371         break;
372 
373       case CT_CCL:
374       case CT_STRING:
375         // CT_CCL: scan a (nonempty) character class (sets NOSKIP).
376         // CT_STRING: like CCL, but zero-length string OK, & no NOSKIP.
377         if (width == 0) width = SIZE_MAX; // 'infinity'.
378         if ((flags & SUPPRESS) && (flags & LONG)) {
379           n = 0;
380           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) n++;
381           if (wi != WEOF) __ungetwc(wi, fp);
382         } else if (flags & LONG) {
383           p0 = p = va_arg(ap, wchar_t*);
384           while ((wi = __fgetwc_unlock(fp)) != WEOF && width-- != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
385             *p++ = static_cast<wchar_t>(wi);
386           }
387           if (wi != WEOF) __ungetwc(wi, fp);
388           n = p - p0;
389         } else {
390           if (!(flags & SUPPRESS)) mbp = va_arg(ap, char*);
391           n = 0;
392           memset(&mbs, 0, sizeof(mbs));
393           while ((wi = __fgetwc_unlock(fp)) != WEOF && width != 0 && ((c == CT_CCL && in_ccl(wi, ccl)) || (c == CT_STRING && !iswspace(wi)))) {
394             if (width >= MB_CUR_MAX && !(flags & SUPPRESS)) {
395               nconv = wcrtomb(mbp, wi, &mbs);
396               if (nconv == static_cast<size_t>(-1)) goto input_failure;
397             } else {
398               nconv = wcrtomb(mbbuf, wi, &mbs);
399               if (nconv == static_cast<size_t>(-1)) goto input_failure;
400               if (nconv > width) break;
401               if (!(flags & SUPPRESS)) memcpy(mbp, mbbuf, nconv);
402             }
403             if (!(flags & SUPPRESS)) mbp += nconv;
404             width -= nconv;
405             n++;
406           }
407           if (wi != WEOF) __ungetwc(wi, fp);
408         }
409         if (c == CT_CCL && n == 0) goto match_failure;
410         if (!(flags & SUPPRESS)) {
411           if (flags & LONG) {
412             *p = L'\0';
413           } else {
414             *mbp = '\0';
415           }
416           ++nassigned;
417         }
418         nread += n;
419         nconversions++;
420         break;
421 
422       case CT_INT:
423         /* scan an integer as if by strtoimax/strtoumax */
424         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
425           width = sizeof(buf) / sizeof(*buf) - 1;
426         flags |= SIGNOK | NDIGITS | NZDIGITS;
427         for (p = buf; width; width--) {
428           c = __fgetwc_unlock(fp);
429           /*
430            * Switch on the character; `goto ok'
431            * if we accept it as a part of number.
432            */
433           switch (c) {
434             /*
435              * The digit 0 is always legal, but is
436              * special.  For %i conversions, if no
437              * digits (zero or nonzero) have been
438              * scanned (only signs), we will have
439              * base==0.  In that case, we should set
440              * it to 8 and enable 0b/0x prefixing.
441              * Also, if we have not scanned zero digits
442              * before this, do not turn off prefixing
443              * (someone else will turn it off if we
444              * have scanned any nonzero digits).
445              */
446             case '0':
447               if (base == 0) {
448                 base = 8;
449                 flags |= PFBOK | PFXOK;
450               }
451               if (flags & NZDIGITS) {
452                 flags &= ~(SIGNOK | NZDIGITS | NDIGITS);
453               } else {
454                 flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
455               }
456               goto ok;
457 
458             /* 1 through 7 always legal */
459             case 'B':
460             case 'b':
461               // Is this 'b' potentially part of an "0b" prefix?
462               if ((flags & PFBOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
463                 base = 2;
464                 flags &= ~PFBOK;
465                 goto ok;
466               }
467               // No? Fall through and see if it's a hex digit instead then...
468               __BIONIC_FALLTHROUGH;
469             case '1':
470             case '2':
471             case '3':
472             case '4':
473             case '5':
474             case '6':
475             case '7':
476             case '8':
477             case '9':
478             case 'A':
479             case 'C':
480             case 'D':
481             case 'E':
482             case 'F':
483             case 'a':
484             case 'c':
485             case 'd':
486             case 'e':
487             case 'f':
488               if (base == 0) base = 10;
489               if (base != 16 && static_cast<int>(c - '0') >= base) break; /* not legal here */
490               flags &= ~(SIGNOK | PFBOK | PFXOK | NDIGITS);
491               goto ok;
492 
493             /* sign ok only as first character */
494             case '+':
495             case '-':
496               if (flags & SIGNOK) {
497                 flags &= ~SIGNOK;
498                 flags |= HAVESIGN;
499                 goto ok;
500               }
501               break;
502 
503             /*
504              * x ok iff flag still set and 2nd char (or
505              * 3rd char if we have a sign).
506              */
507             case 'x':
508             case 'X':
509               if ((flags & PFXOK) && p == buf + 1 + !!(flags & HAVESIGN)) {
510                 base = 16; /* if %i */
511                 flags &= ~PFXOK;
512                 goto ok;
513               }
514               break;
515           }
516 
517           /*
518            * If we got here, c is not a legal character
519            * for a number.  Stop accumulating digits.
520            */
521           if (c != WEOF) __ungetwc(c, fp);
522           break;
523         ok:
524           /*
525            * c is legal: store it and look at the next.
526            */
527           *p++ = static_cast<wchar_t>(c);
528         }
529         /*
530          * If we had only a sign, it is no good; push back the sign.
531          * If the number was `[-+]0[BbXx]`, push back and treat it
532          * as `[-+]0`.
533          */
534         if (flags & NDIGITS) {
535           if (p > buf) __ungetwc(*--p, fp);
536           goto match_failure;
537         }
538         c = p[-1];
539         if ((base == 2 && (c == 'b' || c == 'B')) || c == 'x' || c == 'X') {
540           --p;
541           __ungetwc(c, fp);
542         }
543         if ((flags & SUPPRESS) == 0) {
544           uintmax_t res;
545 
546           *p = '\0';
547           if (flags & UNSIGNED)
548             res = wcstoimax(buf, NULL, base);
549           else
550             res = wcstoumax(buf, NULL, base);
551           if (flags & POINTER)
552             *va_arg(ap, void**) = reinterpret_cast<void*>(res);
553           else if (flags & MAXINT)
554             *va_arg(ap, intmax_t*) = res;
555           else if (flags & LLONG)
556             *va_arg(ap, long long*) = res;
557           else if (flags & SIZEINT)
558             *va_arg(ap, ssize_t*) = res;
559           else if (flags & PTRINT)
560             *va_arg(ap, ptrdiff_t*) = res;
561           else if (flags & LONG)
562             *va_arg(ap, long*) = res;
563           else if (flags & SHORT)
564             *va_arg(ap, short*) = res;
565           else if (flags & SHORTSHORT)
566             *va_arg(ap, signed char*) = res;
567           else
568             *va_arg(ap, int*) = res;
569           nassigned++;
570         }
571         nread += p - buf;
572         nconversions++;
573         break;
574 
575       case CT_FLOAT:
576         /* scan a floating point number as if by strtod */
577         if (width == 0 || width > sizeof(buf) / sizeof(*buf) - 1)
578           width = sizeof(buf) / sizeof(*buf) - 1;
579         if ((width = wparsefloat(fp, buf, buf + width)) == 0) goto match_failure;
580         if ((flags & SUPPRESS) == 0) {
581           if (flags & LONGDBL) {
582             long double res = wcstold(buf, &p);
583             *va_arg(ap, long double*) = res;
584           } else if (flags & LONG) {
585             double res = wcstod(buf, &p);
586             *va_arg(ap, double*) = res;
587           } else {
588             float res = wcstof(buf, &p);
589             *va_arg(ap, float*) = res;
590           }
591           if (static_cast<size_t>(p - buf) != width) abort();
592           nassigned++;
593         }
594         nread += width;
595         nconversions++;
596         break;
597     }
598   }
599 input_failure:
600   return (nconversions != 0 ? nassigned : EOF);
601 match_failure:
602   return (nassigned);
603 }
604 #pragma GCC diagnostic pop
605