os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclScan.c
Update contrib.
4 * This file contains the implementation of the "scan" command.
6 * Copyright (c) 1998 by Scriptics Corporation.
8 * See the file "license.terms" for information on usage and redistribution
9 * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
11 * RCS: @(#) $Id: tclScan.c,v 1.12.2.2 2005/10/23 22:01:30 msofer Exp $
16 * For strtoll() and strtoull() declarations on some platforms...
21 * Flag values used by Tcl_ScanObjCmd.
24 #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
25 #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
26 #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
27 #define SCAN_WIDTH 0x8 /* A width value was supplied. */
29 #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
30 #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
31 #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
32 #define SCAN_XOK 0x80 /* An 'x' is allowed. */
33 #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
34 #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
36 #define SCAN_LONGER 0x400 /* Asked for a wide value. */
39 * The following structure contains the information associated with
43 typedef struct CharSet {
44 int exclude; /* 1 if this is an exclusion set. */
55 * Declarations for functions used only in this file.
58 static char * BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format));
59 static int CharInSet _ANSI_ARGS_((CharSet *cset, int ch));
60 static void ReleaseCharSet _ANSI_ARGS_((CharSet *cset));
61 static int ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format,
62 int numVars, int *totalVars));
65 *----------------------------------------------------------------------
69 * This function examines a character set format specification
70 * and builds a CharSet containing the individual characters and
71 * character ranges specified.
74 * Returns the next format position.
77 * Initializes the charset.
79 *----------------------------------------------------------------------
83 BuildCharSet(cset, format)
85 char *format; /* Points to first char of set. */
87 Tcl_UniChar ch, start;
91 memset(cset, 0, sizeof(CharSet));
93 offset = Tcl_UtfToUniChar(format, &ch);
97 offset = Tcl_UtfToUniChar(format, &ch);
99 end = format + offset;
102 * Find the close bracket so we can overallocate the set.
106 end += Tcl_UtfToUniChar(end, &ch);
113 end += Tcl_UtfToUniChar(end, &ch);
116 cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar)
117 * (end - format - 1));
119 cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges);
125 * Now build the character set.
128 cset->nchars = cset->nranges = 0;
129 format += Tcl_UtfToUniChar(format, &ch);
131 if (ch == ']' || ch == '-') {
132 cset->chars[cset->nchars++] = ch;
133 format += Tcl_UtfToUniChar(format, &ch);
136 if (*format == '-') {
138 * This may be the first character of a range, so don't add
143 } else if (ch == '-') {
145 * Check to see if this is the last character in the set, in which
146 * case it is not a range and we should add the previous character
147 * as well as the dash.
150 if (*format == ']') {
151 cset->chars[cset->nchars++] = start;
152 cset->chars[cset->nchars++] = ch;
154 format += Tcl_UtfToUniChar(format, &ch);
157 * Check to see if the range is in reverse order.
161 cset->ranges[cset->nranges].start = start;
162 cset->ranges[cset->nranges].end = ch;
164 cset->ranges[cset->nranges].start = ch;
165 cset->ranges[cset->nranges].end = start;
170 cset->chars[cset->nchars++] = ch;
172 format += Tcl_UtfToUniChar(format, &ch);
178 *----------------------------------------------------------------------
182 * Check to see if a character matches the given set.
185 * Returns non-zero if the character matches the given set.
190 *----------------------------------------------------------------------
196 int c; /* Character to test, passed as int because
197 * of non-ANSI prototypes. */
199 Tcl_UniChar ch = (Tcl_UniChar) c;
201 for (i = 0; i < cset->nchars; i++) {
202 if (cset->chars[i] == ch) {
208 for (i = 0; i < cset->nranges; i++) {
209 if ((cset->ranges[i].start <= ch)
210 && (ch <= cset->ranges[i].end)) {
216 return (cset->exclude ? !match : match);
220 *----------------------------------------------------------------------
224 * Free the storage associated with a character set.
232 *----------------------------------------------------------------------
239 ckfree((char *)cset->chars);
241 ckfree((char *)cset->ranges);
246 *----------------------------------------------------------------------
250 * Parse the format string and verify that it is properly formed
251 * and that there are exactly enough variables on the command line.
254 * A standard Tcl result.
257 * May place an error in the interpreter result.
259 *----------------------------------------------------------------------
263 ValidateFormat(interp, format, numVars, totalSubs)
264 Tcl_Interp *interp; /* Current interpreter. */
265 char *format; /* The format string. */
266 int numVars; /* The number of variables passed to the
268 int *totalSubs; /* The number of variables that will be
271 #define STATIC_LIST_SIZE 16
272 int gotXpg, gotSequential, value, i, flags;
275 int staticAssign[STATIC_LIST_SIZE];
276 int *nassign = staticAssign;
277 int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
278 char buf[TCL_UTF_MAX+1];
281 * Initialize an array that records the number of times a variable
282 * is assigned to by the format string. We use this to detect if
283 * a variable is multiply assigned or left unassigned.
286 if (numVars > nspace) {
287 nassign = (int*)ckalloc(sizeof(int) * numVars);
290 for (i = 0; i < nspace; i++) {
294 xpgSize = objIndex = gotXpg = gotSequential = 0;
296 while (*format != '\0') {
297 format += Tcl_UtfToUniChar(format, &ch);
304 format += Tcl_UtfToUniChar(format, &ch);
309 flags |= SCAN_SUPPRESS;
310 format += Tcl_UtfToUniChar(format, &ch);
314 if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
316 * Check for an XPG3-style %n$ specification. Note: there
317 * must not be a mixture of XPG3 specs and non-XPG3 specs
318 * in the same format string.
321 value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
326 format += Tcl_UtfToUniChar(format, &ch);
331 objIndex = value - 1;
332 if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
334 } else if (numVars == 0) {
336 * In the case where no vars are specified, the user can
337 * specify %9999$ legally, so we have to consider special
338 * rules for growing the assign array. 'value' is
339 * guaranteed to be > 0.
341 xpgSize = (xpgSize > value) ? xpgSize : value;
350 Tcl_SetResult(interp,
351 "cannot mix \"%\" and \"%n$\" conversion specifiers",
358 * Parse any width specifier.
361 if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
362 value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
364 format += Tcl_UtfToUniChar(format, &ch);
368 * Handle any size specifier.
374 flags |= SCAN_LONGER;
376 format += Tcl_UtfToUniChar(format, &ch);
379 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
384 * Handle the various field types.
389 if (flags & SCAN_WIDTH) {
390 Tcl_SetResult(interp,
391 "field width may not be specified in %c conversion",
400 if (flags & SCAN_LONGER) {
402 buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
403 Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
404 "'l' modifier may not be specified in %", buf,
405 " conversion", NULL);
421 * Bracket terms need special checking
424 if (flags & SCAN_LONGER) {
427 if (*format == '\0') {
430 format += Tcl_UtfToUniChar(format, &ch);
432 if (*format == '\0') {
435 format += Tcl_UtfToUniChar(format, &ch);
438 if (*format == '\0') {
441 format += Tcl_UtfToUniChar(format, &ch);
444 if (*format == '\0') {
447 format += Tcl_UtfToUniChar(format, &ch);
451 Tcl_SetResult(interp, "unmatched [ in format string",
456 char buf[TCL_UTF_MAX+1];
458 buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
459 Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
460 "bad scan conversion character \"", buf, "\"", NULL);
464 if (!(flags & SCAN_SUPPRESS)) {
465 if (objIndex >= nspace) {
467 * Expand the nassign buffer. If we are using XPG specifiers,
468 * make sure that we grow to a large enough size. xpgSize is
469 * guaranteed to be at least one larger than objIndex.
475 nspace += STATIC_LIST_SIZE;
477 if (nassign == staticAssign) {
478 nassign = (void *)ckalloc(nspace * sizeof(int));
479 for (i = 0; i < STATIC_LIST_SIZE; ++i) {
480 nassign[i] = staticAssign[i];
483 nassign = (void *)ckrealloc((void *)nassign,
484 nspace * sizeof(int));
486 for (i = value; i < nspace; i++) {
496 * Verify that all of the variable were assigned exactly once.
507 *totalSubs = numVars;
509 for (i = 0; i < numVars; i++) {
510 if (nassign[i] > 1) {
511 Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC);
513 } else if (!xpgSize && (nassign[i] == 0)) {
515 * If the space is empty, and xpgSize is 0 (means XPG wasn't
516 * used, and/or numVars != 0), then too many vars were given
518 Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC);
523 if (nassign != staticAssign) {
524 ckfree((char *)nassign);
530 Tcl_SetResult(interp, "\"%n$\" argument index out of range",
533 Tcl_SetResult(interp,
534 "different numbers of variable names and field specifiers",
539 if (nassign != staticAssign) {
540 ckfree((char *)nassign);
543 #undef STATIC_LIST_SIZE
547 *----------------------------------------------------------------------
551 * This procedure is invoked to process the "scan" Tcl command.
552 * See the user documentation for details on what it does.
555 * A standard Tcl result.
558 * See the user documentation.
560 *----------------------------------------------------------------------
565 Tcl_ScanObjCmd(dummy, interp, objc, objv)
566 ClientData dummy; /* Not used. */
567 Tcl_Interp *interp; /* Current interpreter. */
568 int objc; /* Number of arguments. */
569 Tcl_Obj *CONST objv[]; /* Argument objects. */
572 int numVars, nconversions, totalVars = -1;
573 int objIndex, offset, i, result, code;
575 char *string, *end, *baseString;
581 #ifndef TCL_WIDE_INT_IS_LONG
582 Tcl_WideInt (*lfn)() = NULL;
583 Tcl_WideInt wideValue;
586 Tcl_Obj **objs = NULL, *objPtr = NULL;
588 char buf[513]; /* Temporary buffer to hold scanned
589 * number strings before they are
590 * passed to strtoul. */
593 Tcl_WrongNumArgs(interp, 1, objv,
594 "string format ?varName varName ...?");
598 format = Tcl_GetStringFromObj(objv[2], NULL);
602 * Check for errors in the format string.
605 if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) {
610 * Allocate space for the result objects.
614 objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars);
615 for (i = 0; i < totalVars; i++) {
620 string = Tcl_GetStringFromObj(objv[1], NULL);
624 * Iterate over the format string filling in the result objects until
625 * we reach the end of input, the end of the format string, or there
631 while (*format != '\0') {
632 format += Tcl_UtfToUniChar(format, &ch);
637 * If we see whitespace in the format, skip whitespace in the string.
640 if (Tcl_UniCharIsSpace(ch)) {
641 offset = Tcl_UtfToUniChar(string, &sch);
642 while (Tcl_UniCharIsSpace(sch)) {
643 if (*string == '\0') {
647 offset = Tcl_UtfToUniChar(string, &sch);
654 if (*string == '\0') {
658 string += Tcl_UtfToUniChar(string, &sch);
665 format += Tcl_UtfToUniChar(format, &ch);
671 * Check for assignment suppression ('*') or an XPG3-style
672 * assignment ('%n$').
676 flags |= SCAN_SUPPRESS;
677 format += Tcl_UtfToUniChar(format, &ch);
678 } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
679 value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
682 format += Tcl_UtfToUniChar(format, &ch);
683 objIndex = (int) value - 1;
688 * Parse any width specifier.
691 if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
692 width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
693 format += Tcl_UtfToUniChar(format, &ch);
699 * Handle any size specifier.
705 flags |= SCAN_LONGER;
707 * Fall through so we skip to the next character.
710 format += Tcl_UtfToUniChar(format, &ch);
714 * Handle the various field types.
719 if (!(flags & SCAN_SUPPRESS)) {
720 objPtr = Tcl_NewIntObj(string - baseString);
721 Tcl_IncrRefCount(objPtr);
722 objs[objIndex++] = objPtr;
730 fn = (long (*)())strtol;
731 #ifndef TCL_WIDE_INT_IS_LONG
732 lfn = (Tcl_WideInt (*)())strtoll;
738 fn = (long (*)())strtol;
739 #ifndef TCL_WIDE_INT_IS_LONG
740 lfn = (Tcl_WideInt (*)())strtoll;
746 fn = (long (*)())strtoul;
747 #ifndef TCL_WIDE_INT_IS_LONG
748 lfn = (Tcl_WideInt (*)())strtoull;
754 fn = (long (*)())strtoul;
755 #ifndef TCL_WIDE_INT_IS_LONG
756 lfn = (Tcl_WideInt (*)())strtoull;
762 flags |= SCAN_UNSIGNED;
763 fn = (long (*)())strtoul;
764 #ifndef TCL_WIDE_INT_IS_LONG
765 lfn = (Tcl_WideInt (*)())strtoull;
781 flags |= SCAN_NOSKIP;
785 flags |= SCAN_NOSKIP;
790 * At this point, we will need additional characters from the
794 if (*string == '\0') {
800 * Skip any leading whitespace at the beginning of a field unless
801 * the format suppresses this behavior.
804 if (!(flags & SCAN_NOSKIP)) {
805 while (*string != '\0') {
806 offset = Tcl_UtfToUniChar(string, &sch);
807 if (!Tcl_UniCharIsSpace(sch)) {
812 if (*string == '\0') {
819 * Perform the requested scanning operation.
825 * Scan a string up to width characters or whitespace.
832 while (*end != '\0') {
833 offset = Tcl_UtfToUniChar(end, &sch);
834 if (Tcl_UniCharIsSpace(sch)) {
842 if (!(flags & SCAN_SUPPRESS)) {
843 objPtr = Tcl_NewStringObj(string, end-string);
844 Tcl_IncrRefCount(objPtr);
845 objs[objIndex++] = objPtr;
858 format = BuildCharSet(&cset, format);
859 while (*end != '\0') {
860 offset = Tcl_UtfToUniChar(end, &sch);
861 if (!CharInSet(&cset, (int)sch)) {
869 ReleaseCharSet(&cset);
873 * Nothing matched the range, stop processing
877 if (!(flags & SCAN_SUPPRESS)) {
878 objPtr = Tcl_NewStringObj(string, end-string);
879 Tcl_IncrRefCount(objPtr);
880 objs[objIndex++] = objPtr;
888 * Scan a single Unicode character.
891 string += Tcl_UtfToUniChar(string, &sch);
892 if (!(flags & SCAN_SUPPRESS)) {
893 objPtr = Tcl_NewIntObj((int)sch);
894 Tcl_IncrRefCount(objPtr);
895 objs[objIndex++] = objPtr;
901 * Scan an unsigned or signed integer.
904 if ((width == 0) || (width > sizeof(buf) - 1)) {
905 width = sizeof(buf) - 1;
907 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
908 for (end = buf; width > 0; width--) {
911 * The 0 digit has special meaning at the beginning of
912 * a number. If we are unsure of the base, it
913 * indicates that we are in base 8 or base 16 (if it is
914 * followed by an 'x').
916 * 8.1 - 8.3.4 incorrectly handled 0x... base-16
917 * cases for %x by not reading the 0x as the
918 * auto-prelude for base-16. [Bug #495213]
928 if (flags & SCAN_NOZERO) {
929 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS
932 flags &= ~(SCAN_SIGNOK | SCAN_XOK
937 case '1': case '2': case '3': case '4':
938 case '5': case '6': case '7':
942 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
952 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
955 case 'A': case 'B': case 'C':
956 case 'D': case 'E': case 'F':
957 case 'a': case 'b': case 'c':
958 case 'd': case 'e': case 'f':
962 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
966 if (flags & SCAN_SIGNOK) {
967 flags &= ~SCAN_SIGNOK;
973 if ((flags & SCAN_XOK) && (end == buf+1)) {
982 * We got an illegal character so we are done accumulating.
989 * Add the character to the temporary buffer.
993 if (*string == '\0') {
999 * Check to see if we need to back up because we only got a
1000 * sign or a trailing x after a 0.
1003 if (flags & SCAN_NODIGITS) {
1004 if (*string == '\0') {
1008 } else if (end[-1] == 'x' || end[-1] == 'X') {
1015 * Scan the value from the temporary buffer. If we are
1016 * returning a large unsigned value, we have to convert it back
1017 * to a string since Tcl only supports signed values.
1020 if (!(flags & SCAN_SUPPRESS)) {
1022 #ifndef TCL_WIDE_INT_IS_LONG
1023 if (flags & SCAN_LONGER) {
1024 wideValue = (Tcl_WideInt) (*lfn)(buf, NULL, base);
1025 if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) {
1026 /* INTL: ISO digit */
1027 sprintf(buf, "%" TCL_LL_MODIFIER "u",
1028 (Tcl_WideUInt)wideValue);
1029 objPtr = Tcl_NewStringObj(buf, -1);
1031 objPtr = Tcl_NewWideIntObj(wideValue);
1034 #endif /* !TCL_WIDE_INT_IS_LONG */
1035 value = (long) (*fn)(buf, NULL, base);
1036 if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1037 sprintf(buf, "%lu", value); /* INTL: ISO digit */
1038 objPtr = Tcl_NewStringObj(buf, -1);
1039 } else if ((flags & SCAN_LONGER)
1040 || (unsigned long) value > UINT_MAX) {
1041 objPtr = Tcl_NewLongObj(value);
1043 objPtr = Tcl_NewIntObj(value);
1045 #ifndef TCL_WIDE_INT_IS_LONG
1048 Tcl_IncrRefCount(objPtr);
1049 objs[objIndex++] = objPtr;
1056 * Scan a floating point number
1059 if ((width == 0) || (width > sizeof(buf) - 1)) {
1060 width = sizeof(buf) - 1;
1062 flags &= ~SCAN_LONGER;
1063 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1064 for (end = buf; width > 0; width--) {
1066 case '0': case '1': case '2': case '3':
1067 case '4': case '5': case '6': case '7':
1069 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1072 if (flags & SCAN_SIGNOK) {
1073 flags &= ~SCAN_SIGNOK;
1078 if (flags & SCAN_PTOK) {
1079 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1085 * An exponent is not allowed until there has
1086 * been at least one digit.
1089 if ((flags & (SCAN_NODIGITS | SCAN_EXPOK))
1091 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1092 | SCAN_SIGNOK | SCAN_NODIGITS;
1099 * We got an illegal character so we are done accumulating.
1106 * Add the character to the temporary buffer.
1110 if (*string == '\0') {
1116 * Check to see if we need to back up because we saw a
1117 * trailing 'e' or sign.
1120 if (flags & SCAN_NODIGITS) {
1121 if (flags & SCAN_EXPOK) {
1123 * There were no digits at all so scanning has
1124 * failed and we are done.
1126 if (*string == '\0') {
1133 * We got a bad exponent ('e' and maybe a sign).
1138 if (*end != 'e' && *end != 'E') {
1145 * Scan the value from the temporary buffer.
1148 if (!(flags & SCAN_SUPPRESS)) {
1151 dvalue = strtod(buf, NULL);
1152 objPtr = Tcl_NewDoubleObj(dvalue);
1153 Tcl_IncrRefCount(objPtr);
1154 objs[objIndex++] = objPtr;
1167 * In this case, variables were specified (classic scan)
1169 for (i = 0; i < totalVars; i++) {
1170 if (objs[i] != NULL) {
1174 tmpPtr = Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0);
1175 Tcl_DecrRefCount(objs[i]);
1176 if (tmpPtr == NULL) {
1177 Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
1178 "couldn't set variable \"",
1179 Tcl_GetString(objv[i+3]), "\"", (char *) NULL);
1186 * Here no vars were specified, we want a list returned (inline scan)
1188 objPtr = Tcl_NewObj();
1189 for (i = 0; i < totalVars; i++) {
1190 if (objs[i] != NULL) {
1191 Tcl_ListObjAppendElement(NULL, objPtr, objs[i]);
1192 Tcl_DecrRefCount(objs[i]);
1195 * More %-specifiers than matching chars, so we
1196 * just spit out empty strings for these
1198 Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj());
1203 ckfree((char*) objs);
1205 if (code == TCL_OK) {
1206 if (underflow && (nconversions == 0)) {
1208 objPtr = Tcl_NewIntObj(-1);
1211 Tcl_SetListObj(objPtr, 0, NULL);
1213 objPtr = Tcl_NewObj();
1216 } else if (numVars) {
1217 objPtr = Tcl_NewIntObj(result);
1219 Tcl_SetObjResult(interp, objPtr);