sl@0: /* sl@0: * tclScan.c -- sl@0: * sl@0: * This file contains the implementation of the "scan" command. sl@0: * sl@0: * Copyright (c) 1998 by Scriptics Corporation. sl@0: * sl@0: * See the file "license.terms" for information on usage and redistribution sl@0: * of this file, and for a DISCLAIMER OF ALL WARRANTIES. sl@0: * sl@0: * RCS: @(#) $Id: tclScan.c,v 1.12.2.2 2005/10/23 22:01:30 msofer Exp $ sl@0: */ sl@0: sl@0: #include "tclInt.h" sl@0: /* sl@0: * For strtoll() and strtoull() declarations on some platforms... sl@0: */ sl@0: #include "tclPort.h" sl@0: sl@0: /* sl@0: * Flag values used by Tcl_ScanObjCmd. sl@0: */ sl@0: sl@0: #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */ sl@0: #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */ sl@0: #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */ sl@0: #define SCAN_WIDTH 0x8 /* A width value was supplied. */ sl@0: sl@0: #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */ sl@0: #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */ sl@0: #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */ sl@0: #define SCAN_XOK 0x80 /* An 'x' is allowed. */ sl@0: #define SCAN_PTOK 0x100 /* Decimal point is allowed. */ sl@0: #define SCAN_EXPOK 0x200 /* An exponent is allowed. */ sl@0: sl@0: #define SCAN_LONGER 0x400 /* Asked for a wide value. */ sl@0: sl@0: /* sl@0: * The following structure contains the information associated with sl@0: * a character set. sl@0: */ sl@0: sl@0: typedef struct CharSet { sl@0: int exclude; /* 1 if this is an exclusion set. */ sl@0: int nchars; sl@0: Tcl_UniChar *chars; sl@0: int nranges; sl@0: struct Range { sl@0: Tcl_UniChar start; sl@0: Tcl_UniChar end; sl@0: } *ranges; sl@0: } CharSet; sl@0: sl@0: /* sl@0: * Declarations for functions used only in this file. sl@0: */ sl@0: sl@0: static char * BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format)); sl@0: static int CharInSet _ANSI_ARGS_((CharSet *cset, int ch)); sl@0: static void ReleaseCharSet _ANSI_ARGS_((CharSet *cset)); sl@0: static int ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format, sl@0: int numVars, int *totalVars)); sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * BuildCharSet -- sl@0: * sl@0: * This function examines a character set format specification sl@0: * and builds a CharSet containing the individual characters and sl@0: * character ranges specified. sl@0: * sl@0: * Results: sl@0: * Returns the next format position. sl@0: * sl@0: * Side effects: sl@0: * Initializes the charset. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static char * sl@0: BuildCharSet(cset, format) sl@0: CharSet *cset; sl@0: char *format; /* Points to first char of set. */ sl@0: { sl@0: Tcl_UniChar ch, start; sl@0: int offset, nranges; sl@0: char *end; sl@0: sl@0: memset(cset, 0, sizeof(CharSet)); sl@0: sl@0: offset = Tcl_UtfToUniChar(format, &ch); sl@0: if (ch == '^') { sl@0: cset->exclude = 1; sl@0: format += offset; sl@0: offset = Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: end = format + offset; sl@0: sl@0: /* sl@0: * Find the close bracket so we can overallocate the set. sl@0: */ sl@0: sl@0: if (ch == ']') { sl@0: end += Tcl_UtfToUniChar(end, &ch); sl@0: } sl@0: nranges = 0; sl@0: while (ch != ']') { sl@0: if (ch == '-') { sl@0: nranges++; sl@0: } sl@0: end += Tcl_UtfToUniChar(end, &ch); sl@0: } sl@0: sl@0: cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar) sl@0: * (end - format - 1)); sl@0: if (nranges > 0) { sl@0: cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges); sl@0: } else { sl@0: cset->ranges = NULL; sl@0: } sl@0: sl@0: /* sl@0: * Now build the character set. sl@0: */ sl@0: sl@0: cset->nchars = cset->nranges = 0; sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: start = ch; sl@0: if (ch == ']' || ch == '-') { sl@0: cset->chars[cset->nchars++] = ch; sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: while (ch != ']') { sl@0: if (*format == '-') { sl@0: /* sl@0: * This may be the first character of a range, so don't add sl@0: * it yet. sl@0: */ sl@0: sl@0: start = ch; sl@0: } else if (ch == '-') { sl@0: /* sl@0: * Check to see if this is the last character in the set, in which sl@0: * case it is not a range and we should add the previous character sl@0: * as well as the dash. sl@0: */ sl@0: sl@0: if (*format == ']') { sl@0: cset->chars[cset->nchars++] = start; sl@0: cset->chars[cset->nchars++] = ch; sl@0: } else { sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: sl@0: /* sl@0: * Check to see if the range is in reverse order. sl@0: */ sl@0: sl@0: if (start < ch) { sl@0: cset->ranges[cset->nranges].start = start; sl@0: cset->ranges[cset->nranges].end = ch; sl@0: } else { sl@0: cset->ranges[cset->nranges].start = ch; sl@0: cset->ranges[cset->nranges].end = start; sl@0: } sl@0: cset->nranges++; sl@0: } sl@0: } else { sl@0: cset->chars[cset->nchars++] = ch; sl@0: } sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: return format; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * CharInSet -- sl@0: * sl@0: * Check to see if a character matches the given set. sl@0: * sl@0: * Results: sl@0: * Returns non-zero if the character matches the given set. sl@0: * sl@0: * Side effects: sl@0: * None. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static int sl@0: CharInSet(cset, c) sl@0: CharSet *cset; sl@0: int c; /* Character to test, passed as int because sl@0: * of non-ANSI prototypes. */ sl@0: { sl@0: Tcl_UniChar ch = (Tcl_UniChar) c; sl@0: int i, match = 0; sl@0: for (i = 0; i < cset->nchars; i++) { sl@0: if (cset->chars[i] == ch) { sl@0: match = 1; sl@0: break; sl@0: } sl@0: } sl@0: if (!match) { sl@0: for (i = 0; i < cset->nranges; i++) { sl@0: if ((cset->ranges[i].start <= ch) sl@0: && (ch <= cset->ranges[i].end)) { sl@0: match = 1; sl@0: break; sl@0: } sl@0: } sl@0: } sl@0: return (cset->exclude ? !match : match); sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * ReleaseCharSet -- sl@0: * sl@0: * Free the storage associated with a character set. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * None. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: ReleaseCharSet(cset) sl@0: CharSet *cset; sl@0: { sl@0: ckfree((char *)cset->chars); sl@0: if (cset->ranges) { sl@0: ckfree((char *)cset->ranges); sl@0: } sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * ValidateFormat -- sl@0: * sl@0: * Parse the format string and verify that it is properly formed sl@0: * and that there are exactly enough variables on the command line. sl@0: * sl@0: * Results: sl@0: * A standard Tcl result. sl@0: * sl@0: * Side effects: sl@0: * May place an error in the interpreter result. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static int sl@0: ValidateFormat(interp, format, numVars, totalSubs) sl@0: Tcl_Interp *interp; /* Current interpreter. */ sl@0: char *format; /* The format string. */ sl@0: int numVars; /* The number of variables passed to the sl@0: * scan command. */ sl@0: int *totalSubs; /* The number of variables that will be sl@0: * required. */ sl@0: { sl@0: #define STATIC_LIST_SIZE 16 sl@0: int gotXpg, gotSequential, value, i, flags; sl@0: char *end; sl@0: Tcl_UniChar ch; sl@0: int staticAssign[STATIC_LIST_SIZE]; sl@0: int *nassign = staticAssign; sl@0: int objIndex, xpgSize, nspace = STATIC_LIST_SIZE; sl@0: char buf[TCL_UTF_MAX+1]; sl@0: sl@0: /* sl@0: * Initialize an array that records the number of times a variable sl@0: * is assigned to by the format string. We use this to detect if sl@0: * a variable is multiply assigned or left unassigned. sl@0: */ sl@0: sl@0: if (numVars > nspace) { sl@0: nassign = (int*)ckalloc(sizeof(int) * numVars); sl@0: nspace = numVars; sl@0: } sl@0: for (i = 0; i < nspace; i++) { sl@0: nassign[i] = 0; sl@0: } sl@0: sl@0: xpgSize = objIndex = gotXpg = gotSequential = 0; sl@0: sl@0: while (*format != '\0') { sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: sl@0: flags = 0; sl@0: sl@0: if (ch != '%') { sl@0: continue; sl@0: } sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: if (ch == '%') { sl@0: continue; sl@0: } sl@0: if (ch == '*') { sl@0: flags |= SCAN_SUPPRESS; sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: goto xpgCheckDone; sl@0: } sl@0: sl@0: if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ sl@0: /* sl@0: * Check for an XPG3-style %n$ specification. Note: there sl@0: * must not be a mixture of XPG3 specs and non-XPG3 specs sl@0: * in the same format string. sl@0: */ sl@0: sl@0: value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */ sl@0: if (*end != '$') { sl@0: goto notXpg; sl@0: } sl@0: format = end+1; sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: gotXpg = 1; sl@0: if (gotSequential) { sl@0: goto mixedXPG; sl@0: } sl@0: objIndex = value - 1; sl@0: if ((objIndex < 0) || (numVars && (objIndex >= numVars))) { sl@0: goto badIndex; sl@0: } else if (numVars == 0) { sl@0: /* sl@0: * In the case where no vars are specified, the user can sl@0: * specify %9999$ legally, so we have to consider special sl@0: * rules for growing the assign array. 'value' is sl@0: * guaranteed to be > 0. sl@0: */ sl@0: xpgSize = (xpgSize > value) ? xpgSize : value; sl@0: } sl@0: goto xpgCheckDone; sl@0: } sl@0: sl@0: notXpg: sl@0: gotSequential = 1; sl@0: if (gotXpg) { sl@0: mixedXPG: sl@0: Tcl_SetResult(interp, sl@0: "cannot mix \"%\" and \"%n$\" conversion specifiers", sl@0: TCL_STATIC); sl@0: goto error; sl@0: } sl@0: sl@0: xpgCheckDone: sl@0: /* sl@0: * Parse any width specifier. sl@0: */ sl@0: sl@0: if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ sl@0: value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */ sl@0: flags |= SCAN_WIDTH; sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: sl@0: /* sl@0: * Handle any size specifier. sl@0: */ sl@0: sl@0: switch (ch) { sl@0: case 'l': sl@0: case 'L': sl@0: flags |= SCAN_LONGER; sl@0: case 'h': sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: sl@0: if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) { sl@0: goto badIndex; sl@0: } sl@0: sl@0: /* sl@0: * Handle the various field types. sl@0: */ sl@0: sl@0: switch (ch) { sl@0: case 'c': sl@0: if (flags & SCAN_WIDTH) { sl@0: Tcl_SetResult(interp, sl@0: "field width may not be specified in %c conversion", sl@0: TCL_STATIC); sl@0: goto error; sl@0: } sl@0: /* sl@0: * Fall through! sl@0: */ sl@0: case 'n': sl@0: case 's': sl@0: if (flags & SCAN_LONGER) { sl@0: invalidLonger: sl@0: buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; sl@0: Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), sl@0: "'l' modifier may not be specified in %", buf, sl@0: " conversion", NULL); sl@0: goto error; sl@0: } sl@0: /* sl@0: * Fall through! sl@0: */ sl@0: case 'd': sl@0: case 'e': sl@0: case 'f': sl@0: case 'g': sl@0: case 'i': sl@0: case 'o': sl@0: case 'u': sl@0: case 'x': sl@0: break; sl@0: /* sl@0: * Bracket terms need special checking sl@0: */ sl@0: case '[': sl@0: if (flags & SCAN_LONGER) { sl@0: goto invalidLonger; sl@0: } sl@0: if (*format == '\0') { sl@0: goto badSet; sl@0: } sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: if (ch == '^') { sl@0: if (*format == '\0') { sl@0: goto badSet; sl@0: } sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: if (ch == ']') { sl@0: if (*format == '\0') { sl@0: goto badSet; sl@0: } sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: while (ch != ']') { sl@0: if (*format == '\0') { sl@0: goto badSet; sl@0: } sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: break; sl@0: badSet: sl@0: Tcl_SetResult(interp, "unmatched [ in format string", sl@0: TCL_STATIC); sl@0: goto error; sl@0: default: sl@0: { sl@0: char buf[TCL_UTF_MAX+1]; sl@0: sl@0: buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; sl@0: Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), sl@0: "bad scan conversion character \"", buf, "\"", NULL); sl@0: goto error; sl@0: } sl@0: } sl@0: if (!(flags & SCAN_SUPPRESS)) { sl@0: if (objIndex >= nspace) { sl@0: /* sl@0: * Expand the nassign buffer. If we are using XPG specifiers, sl@0: * make sure that we grow to a large enough size. xpgSize is sl@0: * guaranteed to be at least one larger than objIndex. sl@0: */ sl@0: value = nspace; sl@0: if (xpgSize) { sl@0: nspace = xpgSize; sl@0: } else { sl@0: nspace += STATIC_LIST_SIZE; sl@0: } sl@0: if (nassign == staticAssign) { sl@0: nassign = (void *)ckalloc(nspace * sizeof(int)); sl@0: for (i = 0; i < STATIC_LIST_SIZE; ++i) { sl@0: nassign[i] = staticAssign[i]; sl@0: } sl@0: } else { sl@0: nassign = (void *)ckrealloc((void *)nassign, sl@0: nspace * sizeof(int)); sl@0: } sl@0: for (i = value; i < nspace; i++) { sl@0: nassign[i] = 0; sl@0: } sl@0: } sl@0: nassign[objIndex]++; sl@0: objIndex++; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Verify that all of the variable were assigned exactly once. sl@0: */ sl@0: sl@0: if (numVars == 0) { sl@0: if (xpgSize) { sl@0: numVars = xpgSize; sl@0: } else { sl@0: numVars = objIndex; sl@0: } sl@0: } sl@0: if (totalSubs) { sl@0: *totalSubs = numVars; sl@0: } sl@0: for (i = 0; i < numVars; i++) { sl@0: if (nassign[i] > 1) { sl@0: Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC); sl@0: goto error; sl@0: } else if (!xpgSize && (nassign[i] == 0)) { sl@0: /* sl@0: * If the space is empty, and xpgSize is 0 (means XPG wasn't sl@0: * used, and/or numVars != 0), then too many vars were given sl@0: */ sl@0: Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC); sl@0: goto error; sl@0: } sl@0: } sl@0: sl@0: if (nassign != staticAssign) { sl@0: ckfree((char *)nassign); sl@0: } sl@0: return TCL_OK; sl@0: sl@0: badIndex: sl@0: if (gotXpg) { sl@0: Tcl_SetResult(interp, "\"%n$\" argument index out of range", sl@0: TCL_STATIC); sl@0: } else { sl@0: Tcl_SetResult(interp, sl@0: "different numbers of variable names and field specifiers", sl@0: TCL_STATIC); sl@0: } sl@0: sl@0: error: sl@0: if (nassign != staticAssign) { sl@0: ckfree((char *)nassign); sl@0: } sl@0: return TCL_ERROR; sl@0: #undef STATIC_LIST_SIZE sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_ScanObjCmd -- sl@0: * sl@0: * This procedure is invoked to process the "scan" Tcl command. sl@0: * See the user documentation for details on what it does. sl@0: * sl@0: * Results: sl@0: * A standard Tcl result. sl@0: * sl@0: * Side effects: sl@0: * See the user documentation. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: /* ARGSUSED */ sl@0: int sl@0: Tcl_ScanObjCmd(dummy, interp, objc, objv) sl@0: ClientData dummy; /* Not used. */ sl@0: Tcl_Interp *interp; /* Current interpreter. */ sl@0: int objc; /* Number of arguments. */ sl@0: Tcl_Obj *CONST objv[]; /* Argument objects. */ sl@0: { sl@0: char *format; sl@0: int numVars, nconversions, totalVars = -1; sl@0: int objIndex, offset, i, result, code; sl@0: long value; sl@0: char *string, *end, *baseString; sl@0: char op = 0; sl@0: int base = 0; sl@0: int underflow = 0; sl@0: size_t width; sl@0: long (*fn)() = NULL; sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: Tcl_WideInt (*lfn)() = NULL; sl@0: Tcl_WideInt wideValue; sl@0: #endif sl@0: Tcl_UniChar ch, sch; sl@0: Tcl_Obj **objs = NULL, *objPtr = NULL; sl@0: int flags; sl@0: char buf[513]; /* Temporary buffer to hold scanned sl@0: * number strings before they are sl@0: * passed to strtoul. */ sl@0: sl@0: if (objc < 3) { sl@0: Tcl_WrongNumArgs(interp, 1, objv, sl@0: "string format ?varName varName ...?"); sl@0: return TCL_ERROR; sl@0: } sl@0: sl@0: format = Tcl_GetStringFromObj(objv[2], NULL); sl@0: numVars = objc-3; sl@0: sl@0: /* sl@0: * Check for errors in the format string. sl@0: */ sl@0: sl@0: if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) { sl@0: return TCL_ERROR; sl@0: } sl@0: sl@0: /* sl@0: * Allocate space for the result objects. sl@0: */ sl@0: sl@0: if (totalVars > 0) { sl@0: objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars); sl@0: for (i = 0; i < totalVars; i++) { sl@0: objs[i] = NULL; sl@0: } sl@0: } sl@0: sl@0: string = Tcl_GetStringFromObj(objv[1], NULL); sl@0: baseString = string; sl@0: sl@0: /* sl@0: * Iterate over the format string filling in the result objects until sl@0: * we reach the end of input, the end of the format string, or there sl@0: * is a mismatch. sl@0: */ sl@0: sl@0: objIndex = 0; sl@0: nconversions = 0; sl@0: while (*format != '\0') { sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: sl@0: flags = 0; sl@0: sl@0: /* sl@0: * If we see whitespace in the format, skip whitespace in the string. sl@0: */ sl@0: sl@0: if (Tcl_UniCharIsSpace(ch)) { sl@0: offset = Tcl_UtfToUniChar(string, &sch); sl@0: while (Tcl_UniCharIsSpace(sch)) { sl@0: if (*string == '\0') { sl@0: goto done; sl@0: } sl@0: string += offset; sl@0: offset = Tcl_UtfToUniChar(string, &sch); sl@0: } sl@0: continue; sl@0: } sl@0: sl@0: if (ch != '%') { sl@0: literal: sl@0: if (*string == '\0') { sl@0: underflow = 1; sl@0: goto done; sl@0: } sl@0: string += Tcl_UtfToUniChar(string, &sch); sl@0: if (ch != sch) { sl@0: goto done; sl@0: } sl@0: continue; sl@0: } sl@0: sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: if (ch == '%') { sl@0: goto literal; sl@0: } sl@0: sl@0: /* sl@0: * Check for assignment suppression ('*') or an XPG3-style sl@0: * assignment ('%n$'). sl@0: */ sl@0: sl@0: if (ch == '*') { sl@0: flags |= SCAN_SUPPRESS; sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ sl@0: value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */ sl@0: if (*end == '$') { sl@0: format = end+1; sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: objIndex = (int) value - 1; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Parse any width specifier. sl@0: */ sl@0: sl@0: if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */ sl@0: width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */ sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } else { sl@0: width = 0; sl@0: } sl@0: sl@0: /* sl@0: * Handle any size specifier. sl@0: */ sl@0: sl@0: switch (ch) { sl@0: case 'l': sl@0: case 'L': sl@0: flags |= SCAN_LONGER; sl@0: /* sl@0: * Fall through so we skip to the next character. sl@0: */ sl@0: case 'h': sl@0: format += Tcl_UtfToUniChar(format, &ch); sl@0: } sl@0: sl@0: /* sl@0: * Handle the various field types. sl@0: */ sl@0: sl@0: switch (ch) { sl@0: case 'n': sl@0: if (!(flags & SCAN_SUPPRESS)) { sl@0: objPtr = Tcl_NewIntObj(string - baseString); sl@0: Tcl_IncrRefCount(objPtr); sl@0: objs[objIndex++] = objPtr; sl@0: } sl@0: nconversions++; sl@0: continue; sl@0: sl@0: case 'd': sl@0: op = 'i'; sl@0: base = 10; sl@0: fn = (long (*)())strtol; sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: lfn = (Tcl_WideInt (*)())strtoll; sl@0: #endif sl@0: break; sl@0: case 'i': sl@0: op = 'i'; sl@0: base = 0; sl@0: fn = (long (*)())strtol; sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: lfn = (Tcl_WideInt (*)())strtoll; sl@0: #endif sl@0: break; sl@0: case 'o': sl@0: op = 'i'; sl@0: base = 8; sl@0: fn = (long (*)())strtoul; sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: lfn = (Tcl_WideInt (*)())strtoull; sl@0: #endif sl@0: break; sl@0: case 'x': sl@0: op = 'i'; sl@0: base = 16; sl@0: fn = (long (*)())strtoul; sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: lfn = (Tcl_WideInt (*)())strtoull; sl@0: #endif sl@0: break; sl@0: case 'u': sl@0: op = 'i'; sl@0: base = 10; sl@0: flags |= SCAN_UNSIGNED; sl@0: fn = (long (*)())strtoul; sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: lfn = (Tcl_WideInt (*)())strtoull; sl@0: #endif sl@0: break; sl@0: sl@0: case 'f': sl@0: case 'e': sl@0: case 'g': sl@0: op = 'f'; sl@0: break; sl@0: sl@0: case 's': sl@0: op = 's'; sl@0: break; sl@0: sl@0: case 'c': sl@0: op = 'c'; sl@0: flags |= SCAN_NOSKIP; sl@0: break; sl@0: case '[': sl@0: op = '['; sl@0: flags |= SCAN_NOSKIP; sl@0: break; sl@0: } sl@0: sl@0: /* sl@0: * At this point, we will need additional characters from the sl@0: * string to proceed. sl@0: */ sl@0: sl@0: if (*string == '\0') { sl@0: underflow = 1; sl@0: goto done; sl@0: } sl@0: sl@0: /* sl@0: * Skip any leading whitespace at the beginning of a field unless sl@0: * the format suppresses this behavior. sl@0: */ sl@0: sl@0: if (!(flags & SCAN_NOSKIP)) { sl@0: while (*string != '\0') { sl@0: offset = Tcl_UtfToUniChar(string, &sch); sl@0: if (!Tcl_UniCharIsSpace(sch)) { sl@0: break; sl@0: } sl@0: string += offset; sl@0: } sl@0: if (*string == '\0') { sl@0: underflow = 1; sl@0: goto done; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Perform the requested scanning operation. sl@0: */ sl@0: sl@0: switch (op) { sl@0: case 's': sl@0: /* sl@0: * Scan a string up to width characters or whitespace. sl@0: */ sl@0: sl@0: if (width == 0) { sl@0: width = (size_t) ~0; sl@0: } sl@0: end = string; sl@0: while (*end != '\0') { sl@0: offset = Tcl_UtfToUniChar(end, &sch); sl@0: if (Tcl_UniCharIsSpace(sch)) { sl@0: break; sl@0: } sl@0: end += offset; sl@0: if (--width == 0) { sl@0: break; sl@0: } sl@0: } sl@0: if (!(flags & SCAN_SUPPRESS)) { sl@0: objPtr = Tcl_NewStringObj(string, end-string); sl@0: Tcl_IncrRefCount(objPtr); sl@0: objs[objIndex++] = objPtr; sl@0: } sl@0: string = end; sl@0: break; sl@0: sl@0: case '[': { sl@0: CharSet cset; sl@0: sl@0: if (width == 0) { sl@0: width = (size_t) ~0; sl@0: } sl@0: end = string; sl@0: sl@0: format = BuildCharSet(&cset, format); sl@0: while (*end != '\0') { sl@0: offset = Tcl_UtfToUniChar(end, &sch); sl@0: if (!CharInSet(&cset, (int)sch)) { sl@0: break; sl@0: } sl@0: end += offset; sl@0: if (--width == 0) { sl@0: break; sl@0: } sl@0: } sl@0: ReleaseCharSet(&cset); sl@0: sl@0: if (string == end) { sl@0: /* sl@0: * Nothing matched the range, stop processing sl@0: */ sl@0: goto done; sl@0: } sl@0: if (!(flags & SCAN_SUPPRESS)) { sl@0: objPtr = Tcl_NewStringObj(string, end-string); sl@0: Tcl_IncrRefCount(objPtr); sl@0: objs[objIndex++] = objPtr; sl@0: } sl@0: string = end; sl@0: sl@0: break; sl@0: } sl@0: case 'c': sl@0: /* sl@0: * Scan a single Unicode character. sl@0: */ sl@0: sl@0: string += Tcl_UtfToUniChar(string, &sch); sl@0: if (!(flags & SCAN_SUPPRESS)) { sl@0: objPtr = Tcl_NewIntObj((int)sch); sl@0: Tcl_IncrRefCount(objPtr); sl@0: objs[objIndex++] = objPtr; sl@0: } sl@0: break; sl@0: sl@0: case 'i': sl@0: /* sl@0: * Scan an unsigned or signed integer. sl@0: */ sl@0: sl@0: if ((width == 0) || (width > sizeof(buf) - 1)) { sl@0: width = sizeof(buf) - 1; sl@0: } sl@0: flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO; sl@0: for (end = buf; width > 0; width--) { sl@0: switch (*string) { sl@0: /* sl@0: * The 0 digit has special meaning at the beginning of sl@0: * a number. If we are unsure of the base, it sl@0: * indicates that we are in base 8 or base 16 (if it is sl@0: * followed by an 'x'). sl@0: * sl@0: * 8.1 - 8.3.4 incorrectly handled 0x... base-16 sl@0: * cases for %x by not reading the 0x as the sl@0: * auto-prelude for base-16. [Bug #495213] sl@0: */ sl@0: case '0': sl@0: if (base == 0) { sl@0: base = 8; sl@0: flags |= SCAN_XOK; sl@0: } sl@0: if (base == 16) { sl@0: flags |= SCAN_XOK; sl@0: } sl@0: if (flags & SCAN_NOZERO) { sl@0: flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS sl@0: | SCAN_NOZERO); sl@0: } else { sl@0: flags &= ~(SCAN_SIGNOK | SCAN_XOK sl@0: | SCAN_NODIGITS); sl@0: } sl@0: goto addToInt; sl@0: sl@0: case '1': case '2': case '3': case '4': sl@0: case '5': case '6': case '7': sl@0: if (base == 0) { sl@0: base = 10; sl@0: } sl@0: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); sl@0: goto addToInt; sl@0: sl@0: case '8': case '9': sl@0: if (base == 0) { sl@0: base = 10; sl@0: } sl@0: if (base <= 8) { sl@0: break; sl@0: } sl@0: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); sl@0: goto addToInt; sl@0: sl@0: case 'A': case 'B': case 'C': sl@0: case 'D': case 'E': case 'F': sl@0: case 'a': case 'b': case 'c': sl@0: case 'd': case 'e': case 'f': sl@0: if (base <= 10) { sl@0: break; sl@0: } sl@0: flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS); sl@0: goto addToInt; sl@0: sl@0: case '+': case '-': sl@0: if (flags & SCAN_SIGNOK) { sl@0: flags &= ~SCAN_SIGNOK; sl@0: goto addToInt; sl@0: } sl@0: break; sl@0: sl@0: case 'x': case 'X': sl@0: if ((flags & SCAN_XOK) && (end == buf+1)) { sl@0: base = 16; sl@0: flags &= ~SCAN_XOK; sl@0: goto addToInt; sl@0: } sl@0: break; sl@0: } sl@0: sl@0: /* sl@0: * We got an illegal character so we are done accumulating. sl@0: */ sl@0: sl@0: break; sl@0: sl@0: addToInt: sl@0: /* sl@0: * Add the character to the temporary buffer. sl@0: */ sl@0: sl@0: *end++ = *string++; sl@0: if (*string == '\0') { sl@0: break; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Check to see if we need to back up because we only got a sl@0: * sign or a trailing x after a 0. sl@0: */ sl@0: sl@0: if (flags & SCAN_NODIGITS) { sl@0: if (*string == '\0') { sl@0: underflow = 1; sl@0: } sl@0: goto done; sl@0: } else if (end[-1] == 'x' || end[-1] == 'X') { sl@0: end--; sl@0: string--; sl@0: } sl@0: sl@0: sl@0: /* sl@0: * Scan the value from the temporary buffer. If we are sl@0: * returning a large unsigned value, we have to convert it back sl@0: * to a string since Tcl only supports signed values. sl@0: */ sl@0: sl@0: if (!(flags & SCAN_SUPPRESS)) { sl@0: *end = '\0'; sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: if (flags & SCAN_LONGER) { sl@0: wideValue = (Tcl_WideInt) (*lfn)(buf, NULL, base); sl@0: if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) { sl@0: /* INTL: ISO digit */ sl@0: sprintf(buf, "%" TCL_LL_MODIFIER "u", sl@0: (Tcl_WideUInt)wideValue); sl@0: objPtr = Tcl_NewStringObj(buf, -1); sl@0: } else { sl@0: objPtr = Tcl_NewWideIntObj(wideValue); sl@0: } sl@0: } else { sl@0: #endif /* !TCL_WIDE_INT_IS_LONG */ sl@0: value = (long) (*fn)(buf, NULL, base); sl@0: if ((flags & SCAN_UNSIGNED) && (value < 0)) { sl@0: sprintf(buf, "%lu", value); /* INTL: ISO digit */ sl@0: objPtr = Tcl_NewStringObj(buf, -1); sl@0: } else if ((flags & SCAN_LONGER) sl@0: || (unsigned long) value > UINT_MAX) { sl@0: objPtr = Tcl_NewLongObj(value); sl@0: } else { sl@0: objPtr = Tcl_NewIntObj(value); sl@0: } sl@0: #ifndef TCL_WIDE_INT_IS_LONG sl@0: } sl@0: #endif sl@0: Tcl_IncrRefCount(objPtr); sl@0: objs[objIndex++] = objPtr; sl@0: } sl@0: sl@0: break; sl@0: sl@0: case 'f': sl@0: /* sl@0: * Scan a floating point number sl@0: */ sl@0: sl@0: if ((width == 0) || (width > sizeof(buf) - 1)) { sl@0: width = sizeof(buf) - 1; sl@0: } sl@0: flags &= ~SCAN_LONGER; sl@0: flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK; sl@0: for (end = buf; width > 0; width--) { sl@0: switch (*string) { sl@0: case '0': case '1': case '2': case '3': sl@0: case '4': case '5': case '6': case '7': sl@0: case '8': case '9': sl@0: flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS); sl@0: goto addToFloat; sl@0: case '+': case '-': sl@0: if (flags & SCAN_SIGNOK) { sl@0: flags &= ~SCAN_SIGNOK; sl@0: goto addToFloat; sl@0: } sl@0: break; sl@0: case '.': sl@0: if (flags & SCAN_PTOK) { sl@0: flags &= ~(SCAN_SIGNOK | SCAN_PTOK); sl@0: goto addToFloat; sl@0: } sl@0: break; sl@0: case 'e': case 'E': sl@0: /* sl@0: * An exponent is not allowed until there has sl@0: * been at least one digit. sl@0: */ sl@0: sl@0: if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) sl@0: == SCAN_EXPOK) { sl@0: flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK)) sl@0: | SCAN_SIGNOK | SCAN_NODIGITS; sl@0: goto addToFloat; sl@0: } sl@0: break; sl@0: } sl@0: sl@0: /* sl@0: * We got an illegal character so we are done accumulating. sl@0: */ sl@0: sl@0: break; sl@0: sl@0: addToFloat: sl@0: /* sl@0: * Add the character to the temporary buffer. sl@0: */ sl@0: sl@0: *end++ = *string++; sl@0: if (*string == '\0') { sl@0: break; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Check to see if we need to back up because we saw a sl@0: * trailing 'e' or sign. sl@0: */ sl@0: sl@0: if (flags & SCAN_NODIGITS) { sl@0: if (flags & SCAN_EXPOK) { sl@0: /* sl@0: * There were no digits at all so scanning has sl@0: * failed and we are done. sl@0: */ sl@0: if (*string == '\0') { sl@0: underflow = 1; sl@0: } sl@0: goto done; sl@0: } sl@0: sl@0: /* sl@0: * We got a bad exponent ('e' and maybe a sign). sl@0: */ sl@0: sl@0: end--; sl@0: string--; sl@0: if (*end != 'e' && *end != 'E') { sl@0: end--; sl@0: string--; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Scan the value from the temporary buffer. sl@0: */ sl@0: sl@0: if (!(flags & SCAN_SUPPRESS)) { sl@0: double dvalue; sl@0: *end = '\0'; sl@0: dvalue = strtod(buf, NULL); sl@0: objPtr = Tcl_NewDoubleObj(dvalue); sl@0: Tcl_IncrRefCount(objPtr); sl@0: objs[objIndex++] = objPtr; sl@0: } sl@0: break; sl@0: } sl@0: nconversions++; sl@0: } sl@0: sl@0: done: sl@0: result = 0; sl@0: code = TCL_OK; sl@0: sl@0: if (numVars) { sl@0: /* sl@0: * In this case, variables were specified (classic scan) sl@0: */ sl@0: for (i = 0; i < totalVars; i++) { sl@0: if (objs[i] != NULL) { sl@0: Tcl_Obj *tmpPtr; sl@0: sl@0: result++; sl@0: tmpPtr = Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0); sl@0: Tcl_DecrRefCount(objs[i]); sl@0: if (tmpPtr == NULL) { sl@0: Tcl_AppendStringsToObj(Tcl_GetObjResult(interp), sl@0: "couldn't set variable \"", sl@0: Tcl_GetString(objv[i+3]), "\"", (char *) NULL); sl@0: code = TCL_ERROR; sl@0: } sl@0: } sl@0: } sl@0: } else { sl@0: /* sl@0: * Here no vars were specified, we want a list returned (inline scan) sl@0: */ sl@0: objPtr = Tcl_NewObj(); sl@0: for (i = 0; i < totalVars; i++) { sl@0: if (objs[i] != NULL) { sl@0: Tcl_ListObjAppendElement(NULL, objPtr, objs[i]); sl@0: Tcl_DecrRefCount(objs[i]); sl@0: } else { sl@0: /* sl@0: * More %-specifiers than matching chars, so we sl@0: * just spit out empty strings for these sl@0: */ sl@0: Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj()); sl@0: } sl@0: } sl@0: } sl@0: if (objs != NULL) { sl@0: ckfree((char*) objs); sl@0: } sl@0: if (code == TCL_OK) { sl@0: if (underflow && (nconversions == 0)) { sl@0: if (numVars) { sl@0: objPtr = Tcl_NewIntObj(-1); sl@0: } else { sl@0: if (objPtr) { sl@0: Tcl_SetListObj(objPtr, 0, NULL); sl@0: } else { sl@0: objPtr = Tcl_NewObj(); sl@0: } sl@0: } sl@0: } else if (numVars) { sl@0: objPtr = Tcl_NewIntObj(result); sl@0: } sl@0: Tcl_SetObjResult(interp, objPtr); sl@0: } sl@0: return code; sl@0: }