os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclScan.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /* 
     2  * tclScan.c --
     3  *
     4  *	This file contains the implementation of the "scan" command.
     5  *
     6  * Copyright (c) 1998 by Scriptics Corporation.
     7  *
     8  * See the file "license.terms" for information on usage and redistribution
     9  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
    10  *
    11  * RCS: @(#) $Id: tclScan.c,v 1.12.2.2 2005/10/23 22:01:30 msofer Exp $
    12  */
    13 
    14 #include "tclInt.h"
    15 /*
    16  * For strtoll() and strtoull() declarations on some platforms...
    17  */
    18 #include "tclPort.h"
    19 
    20 /*
    21  * Flag values used by Tcl_ScanObjCmd.
    22  */
    23 
    24 #define SCAN_NOSKIP	0x1		  /* Don't skip blanks. */
    25 #define SCAN_SUPPRESS	0x2		  /* Suppress assignment. */
    26 #define SCAN_UNSIGNED	0x4		  /* Read an unsigned value. */
    27 #define SCAN_WIDTH	0x8		  /* A width value was supplied. */
    28 
    29 #define SCAN_SIGNOK	0x10		  /* A +/- character is allowed. */
    30 #define SCAN_NODIGITS	0x20		  /* No digits have been scanned. */
    31 #define SCAN_NOZERO	0x40		  /* No zero digits have been scanned. */
    32 #define SCAN_XOK	0x80		  /* An 'x' is allowed. */
    33 #define SCAN_PTOK	0x100		  /* Decimal point is allowed. */
    34 #define SCAN_EXPOK	0x200		  /* An exponent is allowed. */
    35 
    36 #define SCAN_LONGER	0x400		  /* Asked for a wide value. */
    37 
    38 /*
    39  * The following structure contains the information associated with
    40  * a character set.
    41  */
    42 
    43 typedef struct CharSet {
    44     int exclude;		/* 1 if this is an exclusion set. */
    45     int nchars;
    46     Tcl_UniChar *chars;
    47     int nranges;
    48     struct Range {
    49 	Tcl_UniChar start;
    50 	Tcl_UniChar end;
    51     } *ranges;
    52 } CharSet;
    53 
    54 /*
    55  * Declarations for functions used only in this file.
    56  */
    57 
    58 static char *	BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format));
    59 static int	CharInSet _ANSI_ARGS_((CharSet *cset, int ch));
    60 static void	ReleaseCharSet _ANSI_ARGS_((CharSet *cset));
    61 static int	ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format,
    62 		    int numVars, int *totalVars));
    63 
    64 /*
    65  *----------------------------------------------------------------------
    66  *
    67  * BuildCharSet --
    68  *
    69  *	This function examines a character set format specification
    70  *	and builds a CharSet containing the individual characters and
    71  *	character ranges specified.
    72  *
    73  * Results:
    74  *	Returns the next format position.
    75  *
    76  * Side effects:
    77  *	Initializes the charset.
    78  *
    79  *----------------------------------------------------------------------
    80  */
    81 
    82 static char *
    83 BuildCharSet(cset, format)
    84     CharSet *cset;
    85     char *format;		/* Points to first char of set. */
    86 {
    87     Tcl_UniChar ch, start;
    88     int offset, nranges;
    89     char *end;
    90 
    91     memset(cset, 0, sizeof(CharSet));
    92     
    93     offset = Tcl_UtfToUniChar(format, &ch);
    94     if (ch == '^') {
    95 	cset->exclude = 1;
    96 	format += offset;
    97 	offset = Tcl_UtfToUniChar(format, &ch);
    98     }
    99     end = format + offset;
   100 
   101     /*
   102      * Find the close bracket so we can overallocate the set.
   103      */
   104 
   105     if (ch == ']') {
   106 	end += Tcl_UtfToUniChar(end, &ch);
   107     }
   108     nranges = 0;
   109     while (ch != ']') {
   110 	if (ch == '-') {
   111 	    nranges++;
   112 	}
   113 	end += Tcl_UtfToUniChar(end, &ch);
   114     }
   115 
   116     cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar)
   117 	    * (end - format - 1));
   118     if (nranges > 0) {
   119 	cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges);
   120     } else {
   121 	cset->ranges = NULL;
   122     }
   123 
   124     /*
   125      * Now build the character set.
   126      */
   127 
   128     cset->nchars = cset->nranges = 0;
   129     format += Tcl_UtfToUniChar(format, &ch);
   130     start = ch;
   131     if (ch == ']' || ch == '-') {
   132 	cset->chars[cset->nchars++] = ch;
   133 	format += Tcl_UtfToUniChar(format, &ch);
   134     }
   135     while (ch != ']') {
   136 	if (*format == '-') {
   137 	    /*
   138 	     * This may be the first character of a range, so don't add
   139 	     * it yet.
   140 	     */
   141 
   142 	    start = ch;
   143 	} else if (ch == '-') {
   144 	    /*
   145 	     * Check to see if this is the last character in the set, in which
   146 	     * case it is not a range and we should add the previous character
   147 	     * as well as the dash.
   148 	     */
   149 
   150 	    if (*format == ']') {
   151 		cset->chars[cset->nchars++] = start;
   152 		cset->chars[cset->nchars++] = ch;
   153 	    } else {
   154 		format += Tcl_UtfToUniChar(format, &ch);
   155 
   156 		/*
   157 		 * Check to see if the range is in reverse order.
   158 		 */
   159 
   160 		if (start < ch) {
   161 		    cset->ranges[cset->nranges].start = start;
   162 		    cset->ranges[cset->nranges].end = ch;
   163 		} else {
   164 		    cset->ranges[cset->nranges].start = ch;
   165 		    cset->ranges[cset->nranges].end = start;
   166 		}		    
   167 		cset->nranges++;
   168 	    }
   169 	} else {
   170 	    cset->chars[cset->nchars++] = ch;
   171 	}
   172 	format += Tcl_UtfToUniChar(format, &ch);
   173     }
   174     return format;
   175 }
   176 
   177 /*
   178  *----------------------------------------------------------------------
   179  *
   180  * CharInSet --
   181  *
   182  *	Check to see if a character matches the given set.
   183  *
   184  * Results:
   185  *	Returns non-zero if the character matches the given set.
   186  *
   187  * Side effects:
   188  *	None.
   189  *
   190  *----------------------------------------------------------------------
   191  */
   192 
   193 static int
   194 CharInSet(cset, c)
   195     CharSet *cset;
   196     int c;			/* Character to test, passed as int because
   197 				 * of non-ANSI prototypes. */
   198 {
   199     Tcl_UniChar ch = (Tcl_UniChar) c;
   200     int i, match = 0;
   201     for (i = 0; i < cset->nchars; i++) {
   202 	if (cset->chars[i] == ch) {
   203 	    match = 1;
   204 	    break;
   205 	}
   206     }
   207     if (!match) {
   208 	for (i = 0; i < cset->nranges; i++) {
   209 	    if ((cset->ranges[i].start <= ch)
   210 		    && (ch <= cset->ranges[i].end)) {
   211 		match = 1;
   212 		break;
   213 	    }
   214 	}
   215     }
   216     return (cset->exclude ? !match : match);    
   217 }
   218 
   219 /*
   220  *----------------------------------------------------------------------
   221  *
   222  * ReleaseCharSet --
   223  *
   224  *	Free the storage associated with a character set.
   225  *
   226  * Results:
   227  *	None.
   228  *
   229  * Side effects:
   230  *	None.
   231  *
   232  *----------------------------------------------------------------------
   233  */
   234 
   235 static void
   236 ReleaseCharSet(cset)
   237     CharSet *cset;
   238 {
   239     ckfree((char *)cset->chars);
   240     if (cset->ranges) {
   241 	ckfree((char *)cset->ranges);
   242     }
   243 }
   244 
   245 /*
   246  *----------------------------------------------------------------------
   247  *
   248  * ValidateFormat --
   249  *
   250  *	Parse the format string and verify that it is properly formed
   251  *	and that there are exactly enough variables on the command line.
   252  *
   253  * Results:
   254  *	A standard Tcl result.
   255  *
   256  * Side effects:
   257  *	May place an error in the interpreter result.
   258  *
   259  *----------------------------------------------------------------------
   260  */
   261 
   262 static int
   263 ValidateFormat(interp, format, numVars, totalSubs)
   264     Tcl_Interp *interp;		/* Current interpreter. */
   265     char *format;		/* The format string. */
   266     int numVars;		/* The number of variables passed to the
   267 				 * scan command. */
   268     int *totalSubs;		/* The number of variables that will be
   269 				 * required. */
   270 {
   271 #define STATIC_LIST_SIZE 16
   272     int gotXpg, gotSequential, value, i, flags;
   273     char *end;
   274     Tcl_UniChar ch;
   275     int staticAssign[STATIC_LIST_SIZE];
   276     int *nassign = staticAssign;
   277     int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
   278     char buf[TCL_UTF_MAX+1];
   279 
   280     /*
   281      * Initialize an array that records the number of times a variable
   282      * is assigned to by the format string.  We use this to detect if
   283      * a variable is multiply assigned or left unassigned.
   284      */
   285 
   286     if (numVars > nspace) {
   287 	nassign = (int*)ckalloc(sizeof(int) * numVars);
   288 	nspace = numVars;
   289     }
   290     for (i = 0; i < nspace; i++) {
   291 	nassign[i] = 0;
   292     }
   293 
   294     xpgSize = objIndex = gotXpg = gotSequential = 0;
   295 
   296     while (*format != '\0') {
   297 	format += Tcl_UtfToUniChar(format, &ch);
   298 
   299 	flags = 0;
   300 
   301 	if (ch != '%') {
   302 	    continue;
   303 	}
   304 	format += Tcl_UtfToUniChar(format, &ch);
   305 	if (ch == '%') {
   306 	    continue;
   307 	}
   308 	if (ch == '*') {
   309 	    flags |= SCAN_SUPPRESS;
   310 	    format += Tcl_UtfToUniChar(format, &ch);
   311 	    goto xpgCheckDone;
   312 	}
   313 
   314 	if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
   315 	    /*
   316 	     * Check for an XPG3-style %n$ specification.  Note: there
   317 	     * must not be a mixture of XPG3 specs and non-XPG3 specs
   318 	     * in the same format string.
   319 	     */
   320 
   321 	    value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
   322 	    if (*end != '$') {
   323 		goto notXpg;
   324 	    }
   325 	    format = end+1;
   326 	    format += Tcl_UtfToUniChar(format, &ch);
   327 	    gotXpg = 1;
   328 	    if (gotSequential) {
   329 		goto mixedXPG;
   330 	    }
   331 	    objIndex = value - 1;
   332 	    if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
   333 		goto badIndex;
   334 	    } else if (numVars == 0) {
   335 		/*
   336 		 * In the case where no vars are specified, the user can
   337 		 * specify %9999$ legally, so we have to consider special
   338 		 * rules for growing the assign array.  'value' is
   339 		 * guaranteed to be > 0.
   340 		 */
   341 		xpgSize = (xpgSize > value) ? xpgSize : value;
   342 	    }
   343 	    goto xpgCheckDone;
   344 	}
   345 
   346 	notXpg:
   347 	gotSequential = 1;
   348 	if (gotXpg) {
   349 	    mixedXPG:
   350 	    Tcl_SetResult(interp,
   351 		    "cannot mix \"%\" and \"%n$\" conversion specifiers",
   352 		    TCL_STATIC);
   353 	    goto error;
   354 	}
   355 
   356 	xpgCheckDone:
   357 	/*
   358 	 * Parse any width specifier.
   359 	 */
   360 
   361 	if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
   362 	    value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
   363 	    flags |= SCAN_WIDTH;
   364 	    format += Tcl_UtfToUniChar(format, &ch);
   365 	}
   366 
   367 	/*
   368 	 * Handle any size specifier.
   369 	 */
   370 
   371 	switch (ch) {
   372 	case 'l':
   373 	case 'L':
   374 	    flags |= SCAN_LONGER;
   375 	case 'h':
   376 	    format += Tcl_UtfToUniChar(format, &ch);
   377 	}
   378 
   379 	if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
   380 	    goto badIndex;
   381 	}
   382 
   383 	/*
   384 	 * Handle the various field types.
   385 	 */
   386 
   387 	switch (ch) {
   388 	    case 'c':
   389                 if (flags & SCAN_WIDTH) {
   390 		    Tcl_SetResult(interp,
   391 			    "field width may not be specified in %c conversion",
   392 			    TCL_STATIC);
   393 		    goto error;
   394                 }
   395 		/*
   396 		 * Fall through!
   397 		 */
   398 	    case 'n':
   399 	    case 's':
   400 		if (flags & SCAN_LONGER) {
   401 		invalidLonger:
   402 		    buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
   403 		    Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
   404 			   "'l' modifier may not be specified in %", buf,
   405 			   " conversion", NULL);
   406 		    goto error;
   407 		}
   408 		/*
   409 		 * Fall through!
   410 		 */
   411 	    case 'd':
   412 	    case 'e':
   413 	    case 'f':
   414 	    case 'g':
   415 	    case 'i':
   416 	    case 'o':
   417 	    case 'u':
   418 	    case 'x':
   419  		break;
   420 		/*
   421 		 * Bracket terms need special checking
   422 		 */
   423 	    case '[':
   424 		if (flags & SCAN_LONGER) {
   425 		    goto invalidLonger;
   426 		}
   427 		if (*format == '\0') {
   428 		    goto badSet;
   429 		}
   430 		format += Tcl_UtfToUniChar(format, &ch);
   431 		if (ch == '^') {
   432 		    if (*format == '\0') {
   433 			goto badSet;
   434 		    }
   435 		    format += Tcl_UtfToUniChar(format, &ch);
   436 		}
   437 		if (ch == ']') {
   438 		    if (*format == '\0') {
   439 			goto badSet;
   440 		    }
   441 		    format += Tcl_UtfToUniChar(format, &ch);
   442 		}
   443 		while (ch != ']') {
   444 		    if (*format == '\0') {
   445 			goto badSet;
   446 		    }
   447 		    format += Tcl_UtfToUniChar(format, &ch);
   448 		}
   449 		break;
   450 	    badSet:
   451 		Tcl_SetResult(interp, "unmatched [ in format string",
   452 			TCL_STATIC);
   453 		goto error;
   454 	    default:
   455 	    {
   456 		char buf[TCL_UTF_MAX+1];
   457 
   458 		buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
   459 		Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
   460 			"bad scan conversion character \"", buf, "\"", NULL);
   461 		goto error;
   462 	    }
   463 	}
   464 	if (!(flags & SCAN_SUPPRESS)) {
   465 	    if (objIndex >= nspace) {
   466 		/*
   467 		 * Expand the nassign buffer.  If we are using XPG specifiers,
   468 		 * make sure that we grow to a large enough size.  xpgSize is
   469 		 * guaranteed to be at least one larger than objIndex.
   470 		 */
   471 		value = nspace;
   472 		if (xpgSize) {
   473 		    nspace = xpgSize;
   474 		} else {
   475 		    nspace += STATIC_LIST_SIZE;
   476 		}
   477 		if (nassign == staticAssign) {
   478 		    nassign = (void *)ckalloc(nspace * sizeof(int));
   479 		    for (i = 0; i < STATIC_LIST_SIZE; ++i) {
   480 			nassign[i] = staticAssign[i];
   481 		    }
   482 		} else {
   483 		    nassign = (void *)ckrealloc((void *)nassign,
   484 			    nspace * sizeof(int));
   485 		}
   486 		for (i = value; i < nspace; i++) {
   487 		    nassign[i] = 0;
   488 		}
   489 	    }
   490 	    nassign[objIndex]++;
   491 	    objIndex++;
   492 	}
   493     }
   494 
   495     /*
   496      * Verify that all of the variable were assigned exactly once.
   497      */
   498 
   499     if (numVars == 0) {
   500 	if (xpgSize) {
   501 	    numVars = xpgSize;
   502 	} else {
   503 	    numVars = objIndex;
   504 	}
   505     }
   506     if (totalSubs) {
   507 	*totalSubs = numVars;
   508     }
   509     for (i = 0; i < numVars; i++) {
   510 	if (nassign[i] > 1) {
   511 	    Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC);
   512 	    goto error;
   513 	} else if (!xpgSize && (nassign[i] == 0)) {
   514 	    /*
   515 	     * If the space is empty, and xpgSize is 0 (means XPG wasn't
   516 	     * used, and/or numVars != 0), then too many vars were given
   517 	     */
   518 	    Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC);
   519 	    goto error;
   520 	}
   521     }
   522 
   523     if (nassign != staticAssign) {
   524 	ckfree((char *)nassign);
   525     }
   526     return TCL_OK;
   527 
   528     badIndex:
   529     if (gotXpg) {
   530 	Tcl_SetResult(interp, "\"%n$\" argument index out of range",
   531 		TCL_STATIC);
   532     } else {
   533 	Tcl_SetResult(interp, 
   534 		"different numbers of variable names and field specifiers",
   535 		TCL_STATIC);
   536     }
   537 
   538     error:
   539     if (nassign != staticAssign) {
   540 	ckfree((char *)nassign);
   541     }
   542     return TCL_ERROR;
   543 #undef STATIC_LIST_SIZE
   544 }
   545 
   546 /*
   547  *----------------------------------------------------------------------
   548  *
   549  * Tcl_ScanObjCmd --
   550  *
   551  *	This procedure is invoked to process the "scan" Tcl command.
   552  *	See the user documentation for details on what it does.
   553  *
   554  * Results:
   555  *	A standard Tcl result.
   556  *
   557  * Side effects:
   558  *	See the user documentation.
   559  *
   560  *----------------------------------------------------------------------
   561  */
   562 
   563 	/* ARGSUSED */
   564 int
   565 Tcl_ScanObjCmd(dummy, interp, objc, objv)
   566     ClientData dummy;    	/* Not used. */
   567     Tcl_Interp *interp;		/* Current interpreter. */
   568     int objc;			/* Number of arguments. */
   569     Tcl_Obj *CONST objv[];	/* Argument objects. */
   570 {
   571     char *format;
   572     int numVars, nconversions, totalVars = -1;
   573     int objIndex, offset, i, result, code;
   574     long value;
   575     char *string, *end, *baseString;
   576     char op = 0;
   577     int base = 0;
   578     int underflow = 0;
   579     size_t width;
   580     long (*fn)() = NULL;
   581 #ifndef TCL_WIDE_INT_IS_LONG
   582     Tcl_WideInt (*lfn)() = NULL;
   583     Tcl_WideInt wideValue;
   584 #endif
   585     Tcl_UniChar ch, sch;
   586     Tcl_Obj **objs = NULL, *objPtr = NULL;
   587     int flags;
   588     char buf[513];			  /* Temporary buffer to hold scanned
   589 					   * number strings before they are
   590 					   * passed to strtoul. */
   591 
   592     if (objc < 3) {
   593         Tcl_WrongNumArgs(interp, 1, objv,
   594 		"string format ?varName varName ...?");
   595 	return TCL_ERROR;
   596     }
   597 
   598     format = Tcl_GetStringFromObj(objv[2], NULL);
   599     numVars = objc-3;
   600 
   601     /*
   602      * Check for errors in the format string.
   603      */
   604     
   605     if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) {
   606 	return TCL_ERROR;
   607     }
   608 
   609     /*
   610      * Allocate space for the result objects.
   611      */
   612 
   613     if (totalVars > 0) {
   614 	objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars);
   615 	for (i = 0; i < totalVars; i++) {
   616 	    objs[i] = NULL;
   617 	}
   618     }
   619 
   620     string = Tcl_GetStringFromObj(objv[1], NULL);
   621     baseString = string;
   622 
   623     /*
   624      * Iterate over the format string filling in the result objects until
   625      * we reach the end of input, the end of the format string, or there
   626      * is a mismatch.
   627      */
   628 
   629     objIndex = 0;
   630     nconversions = 0;
   631     while (*format != '\0') {
   632 	format += Tcl_UtfToUniChar(format, &ch);
   633 
   634 	flags = 0;
   635 
   636 	/*
   637 	 * If we see whitespace in the format, skip whitespace in the string.
   638 	 */
   639 
   640 	if (Tcl_UniCharIsSpace(ch)) {
   641 	    offset = Tcl_UtfToUniChar(string, &sch);
   642 	    while (Tcl_UniCharIsSpace(sch)) {
   643 		if (*string == '\0') {
   644 		    goto done;
   645 		}
   646 		string += offset;
   647 		offset = Tcl_UtfToUniChar(string, &sch);
   648 	    }
   649 	    continue;
   650 	}
   651 	    
   652 	if (ch != '%') {
   653 	    literal:
   654 	    if (*string == '\0') {
   655 		underflow = 1;
   656 		goto done;
   657 	    }
   658 	    string += Tcl_UtfToUniChar(string, &sch);
   659 	    if (ch != sch) {
   660 		goto done;
   661 	    }
   662 	    continue;
   663 	}
   664 
   665 	format += Tcl_UtfToUniChar(format, &ch);
   666 	if (ch == '%') {
   667 	    goto literal;
   668 	}
   669 
   670 	/*
   671 	 * Check for assignment suppression ('*') or an XPG3-style
   672 	 * assignment ('%n$').
   673 	 */
   674 
   675 	if (ch == '*') {
   676 	    flags |= SCAN_SUPPRESS;
   677 	    format += Tcl_UtfToUniChar(format, &ch);
   678 	} else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
   679 	    value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
   680 	    if (*end == '$') {
   681 		format = end+1;
   682 		format += Tcl_UtfToUniChar(format, &ch);
   683 		objIndex = (int) value - 1;
   684 	    }
   685 	}
   686 
   687 	/*
   688 	 * Parse any width specifier.
   689 	 */
   690 
   691 	if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
   692 	    width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
   693 	    format += Tcl_UtfToUniChar(format, &ch);
   694 	} else {
   695 	    width = 0;
   696 	}
   697 
   698 	/*
   699 	 * Handle any size specifier.
   700 	 */
   701 
   702 	switch (ch) {
   703 	case 'l':
   704 	case 'L':
   705 	    flags |= SCAN_LONGER;
   706 	    /*
   707 	     * Fall through so we skip to the next character.
   708 	     */
   709 	case 'h':
   710 	    format += Tcl_UtfToUniChar(format, &ch);
   711 	}
   712 
   713 	/*
   714 	 * Handle the various field types.
   715 	 */
   716 
   717 	switch (ch) {
   718 	    case 'n':
   719 		if (!(flags & SCAN_SUPPRESS)) {
   720 		    objPtr = Tcl_NewIntObj(string - baseString);
   721 		    Tcl_IncrRefCount(objPtr);
   722 		    objs[objIndex++] = objPtr;
   723 		}
   724 		nconversions++;
   725 		continue;
   726 
   727 	    case 'd':
   728 		op = 'i';
   729 		base = 10;
   730 		fn = (long (*)())strtol;
   731 #ifndef TCL_WIDE_INT_IS_LONG
   732 		lfn = (Tcl_WideInt (*)())strtoll;
   733 #endif
   734 		break;
   735 	    case 'i':
   736 		op = 'i';
   737 		base = 0;
   738 		fn = (long (*)())strtol;
   739 #ifndef TCL_WIDE_INT_IS_LONG
   740 		lfn = (Tcl_WideInt (*)())strtoll;
   741 #endif
   742 		break;
   743 	    case 'o':
   744 		op = 'i';
   745 		base = 8;
   746 		fn = (long (*)())strtoul;
   747 #ifndef TCL_WIDE_INT_IS_LONG
   748 		lfn = (Tcl_WideInt (*)())strtoull;
   749 #endif
   750 		break;
   751 	    case 'x':
   752 		op = 'i';
   753 		base = 16;
   754 		fn = (long (*)())strtoul;
   755 #ifndef TCL_WIDE_INT_IS_LONG
   756 		lfn = (Tcl_WideInt (*)())strtoull;
   757 #endif
   758 		break;
   759 	    case 'u':
   760 		op = 'i';
   761 		base = 10;
   762 		flags |= SCAN_UNSIGNED;
   763 		fn = (long (*)())strtoul;
   764 #ifndef TCL_WIDE_INT_IS_LONG
   765 		lfn = (Tcl_WideInt (*)())strtoull;
   766 #endif
   767 		break;
   768 
   769 	    case 'f':
   770 	    case 'e':
   771 	    case 'g':
   772 		op = 'f';
   773 		break;
   774 
   775 	    case 's':
   776 		op = 's';
   777 		break;
   778 
   779 	    case 'c':
   780 		op = 'c';
   781 		flags |= SCAN_NOSKIP;
   782 		break;
   783 	    case '[':
   784 		op = '[';
   785 		flags |= SCAN_NOSKIP;
   786 		break;
   787 	}
   788 
   789 	/*
   790 	 * At this point, we will need additional characters from the
   791 	 * string to proceed.
   792 	 */
   793 
   794 	if (*string == '\0') {
   795 	    underflow = 1;
   796 	    goto done;
   797 	}
   798 	
   799 	/*
   800 	 * Skip any leading whitespace at the beginning of a field unless
   801 	 * the format suppresses this behavior.
   802 	 */
   803 
   804 	if (!(flags & SCAN_NOSKIP)) {
   805 	    while (*string != '\0') {
   806 		offset = Tcl_UtfToUniChar(string, &sch);
   807 		if (!Tcl_UniCharIsSpace(sch)) {
   808 		    break;
   809 		}
   810 		string += offset;
   811 	    }
   812 	    if (*string == '\0') {
   813 		underflow = 1;
   814 		goto done;
   815 	    }
   816 	}
   817 
   818 	/*
   819 	 * Perform the requested scanning operation.
   820 	 */
   821 	
   822 	switch (op) {
   823 	    case 's':
   824 		/*
   825 		 * Scan a string up to width characters or whitespace.
   826 		 */
   827 
   828 		if (width == 0) {
   829 		    width = (size_t) ~0;
   830 		}
   831 		end = string;
   832 		while (*end != '\0') {
   833 		    offset = Tcl_UtfToUniChar(end, &sch);
   834 		    if (Tcl_UniCharIsSpace(sch)) {
   835 			break;
   836 		    }
   837 		    end += offset;
   838 		    if (--width == 0) {
   839 			break;
   840 		    }
   841 		}
   842 		if (!(flags & SCAN_SUPPRESS)) {
   843 		    objPtr = Tcl_NewStringObj(string, end-string);
   844 		    Tcl_IncrRefCount(objPtr);
   845 		    objs[objIndex++] = objPtr;
   846 		}
   847 		string = end;
   848 		break;
   849 
   850 	    case '[': {
   851 		CharSet cset;
   852 
   853 		if (width == 0) {
   854 		    width = (size_t) ~0;
   855 		}
   856 		end = string;
   857 
   858 		format = BuildCharSet(&cset, format);
   859 		while (*end != '\0') {
   860 		    offset = Tcl_UtfToUniChar(end, &sch);
   861 		    if (!CharInSet(&cset, (int)sch)) {
   862 			break;
   863 		    }
   864 		    end += offset;
   865 		    if (--width == 0) {
   866 			break;
   867 		    }
   868 		}
   869 		ReleaseCharSet(&cset);
   870 
   871 		if (string == end) {
   872 		    /*
   873 		     * Nothing matched the range, stop processing
   874 		     */
   875 		    goto done;
   876 		}
   877 		if (!(flags & SCAN_SUPPRESS)) {
   878 		    objPtr = Tcl_NewStringObj(string, end-string);
   879 		    Tcl_IncrRefCount(objPtr);
   880 		    objs[objIndex++] = objPtr;
   881 		}
   882 		string = end;
   883 		
   884 		break;
   885 	    }
   886 	    case 'c':
   887 		/*
   888 		 * Scan a single Unicode character.
   889 		 */
   890 
   891 		string += Tcl_UtfToUniChar(string, &sch);
   892 		if (!(flags & SCAN_SUPPRESS)) {
   893 		    objPtr = Tcl_NewIntObj((int)sch);
   894 		    Tcl_IncrRefCount(objPtr);
   895 		    objs[objIndex++] = objPtr;
   896 		}
   897 		break;
   898 
   899 	    case 'i':
   900 		/*
   901 		 * Scan an unsigned or signed integer.
   902 		 */
   903 
   904 		if ((width == 0) || (width > sizeof(buf) - 1)) {
   905 		    width = sizeof(buf) - 1;
   906 		}
   907 		flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
   908 		for (end = buf; width > 0; width--) {
   909 		    switch (*string) {
   910 			/*
   911 			 * The 0 digit has special meaning at the beginning of
   912 			 * a number.  If we are unsure of the base, it
   913 			 * indicates that we are in base 8 or base 16 (if it is
   914 			 * followed by an 'x').
   915 			 *
   916 			 * 8.1 - 8.3.4 incorrectly handled 0x... base-16
   917 			 * cases for %x by not reading the 0x as the
   918 			 * auto-prelude for base-16. [Bug #495213]
   919 			 */
   920 			case '0':
   921 			    if (base == 0) {
   922 				base = 8;
   923 				flags |= SCAN_XOK;
   924 			    }
   925 			    if (base == 16) {
   926 				flags |= SCAN_XOK;
   927 			    }
   928 			    if (flags & SCAN_NOZERO) {
   929 				flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS
   930 					| SCAN_NOZERO);
   931 			    } else {
   932 				flags &= ~(SCAN_SIGNOK | SCAN_XOK
   933 					| SCAN_NODIGITS);
   934 			    }
   935 			    goto addToInt;
   936 
   937 			case '1': case '2': case '3': case '4':
   938 			case '5': case '6': case '7':
   939 			    if (base == 0) {
   940 				base = 10;
   941 			    }
   942 			    flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
   943 			    goto addToInt;
   944 
   945 			case '8': case '9':
   946 			    if (base == 0) {
   947 				base = 10;
   948 			    }
   949 			    if (base <= 8) {
   950 				break;
   951 			    }
   952 			    flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
   953 			    goto addToInt;
   954 
   955 			case 'A': case 'B': case 'C':
   956 			case 'D': case 'E': case 'F': 
   957 			case 'a': case 'b': case 'c':
   958 			case 'd': case 'e': case 'f':
   959 			    if (base <= 10) {
   960 				break;
   961 			    }
   962 			    flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
   963 			    goto addToInt;
   964 
   965 			case '+': case '-':
   966 			    if (flags & SCAN_SIGNOK) {
   967 				flags &= ~SCAN_SIGNOK;
   968 				goto addToInt;
   969 			    }
   970 			    break;
   971 
   972 			case 'x': case 'X':
   973 			    if ((flags & SCAN_XOK) && (end == buf+1)) {
   974 				base = 16;
   975 				flags &= ~SCAN_XOK;
   976 				goto addToInt;
   977 			    }
   978 			    break;
   979 		    }
   980 
   981 		    /*
   982 		     * We got an illegal character so we are done accumulating.
   983 		     */
   984 
   985 		    break;
   986 
   987 		    addToInt:
   988 		    /*
   989 		     * Add the character to the temporary buffer.
   990 		     */
   991 
   992 		    *end++ = *string++;
   993 		    if (*string == '\0') {
   994 			break;
   995 		    }
   996 		}
   997 
   998 		/*
   999 		 * Check to see if we need to back up because we only got a
  1000 		 * sign or a trailing x after a 0.
  1001 		 */
  1002 
  1003 		if (flags & SCAN_NODIGITS) {
  1004 		    if (*string == '\0') {
  1005 			underflow = 1;
  1006 		    }
  1007 		    goto done;
  1008 		} else if (end[-1] == 'x' || end[-1] == 'X') {
  1009 		    end--;
  1010 		    string--;
  1011 		}
  1012 
  1013 
  1014 		/*
  1015 		 * Scan the value from the temporary buffer.  If we are
  1016 		 * returning a large unsigned value, we have to convert it back
  1017 		 * to a string since Tcl only supports signed values.
  1018 		 */
  1019 
  1020 		if (!(flags & SCAN_SUPPRESS)) {
  1021 		    *end = '\0';
  1022 #ifndef TCL_WIDE_INT_IS_LONG
  1023 		    if (flags & SCAN_LONGER) {
  1024 			wideValue = (Tcl_WideInt) (*lfn)(buf, NULL, base);
  1025 			if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) {
  1026 			    /* INTL: ISO digit */
  1027 			    sprintf(buf, "%" TCL_LL_MODIFIER "u",
  1028 				    (Tcl_WideUInt)wideValue);
  1029 			    objPtr = Tcl_NewStringObj(buf, -1);
  1030 			} else {
  1031 			    objPtr = Tcl_NewWideIntObj(wideValue);
  1032 			}
  1033 		    } else {
  1034 #endif /* !TCL_WIDE_INT_IS_LONG */
  1035 			value = (long) (*fn)(buf, NULL, base);
  1036 			if ((flags & SCAN_UNSIGNED) && (value < 0)) {
  1037 			    sprintf(buf, "%lu", value); /* INTL: ISO digit */
  1038 			    objPtr = Tcl_NewStringObj(buf, -1);
  1039 			} else if ((flags & SCAN_LONGER)
  1040 				|| (unsigned long) value > UINT_MAX) {
  1041 			    objPtr = Tcl_NewLongObj(value);
  1042 			} else {
  1043 			    objPtr = Tcl_NewIntObj(value);
  1044 			}
  1045 #ifndef TCL_WIDE_INT_IS_LONG
  1046 		    }
  1047 #endif
  1048 		    Tcl_IncrRefCount(objPtr);
  1049 		    objs[objIndex++] = objPtr;
  1050 		}
  1051 
  1052 		break;
  1053 
  1054 	    case 'f':
  1055 		/*
  1056 		 * Scan a floating point number
  1057 		 */
  1058 
  1059 		if ((width == 0) || (width > sizeof(buf) - 1)) {
  1060 		    width = sizeof(buf) - 1;
  1061 		}
  1062 		flags &= ~SCAN_LONGER;
  1063 		flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
  1064 		for (end = buf; width > 0; width--) {
  1065 		    switch (*string) {
  1066 			case '0': case '1': case '2': case '3':
  1067 			case '4': case '5': case '6': case '7':
  1068 			case '8': case '9':
  1069 			    flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
  1070 			    goto addToFloat;
  1071 			case '+': case '-':
  1072 			    if (flags & SCAN_SIGNOK) {
  1073 				flags &= ~SCAN_SIGNOK;
  1074 				goto addToFloat;
  1075 			    }
  1076 			    break;
  1077 			case '.':
  1078 			    if (flags & SCAN_PTOK) {
  1079 				flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
  1080 				goto addToFloat;
  1081 			    }
  1082 			    break;
  1083 			case 'e': case 'E':
  1084 			    /*
  1085 			     * An exponent is not allowed until there has
  1086 			     * been at least one digit.
  1087 			     */
  1088 
  1089 			    if ((flags & (SCAN_NODIGITS | SCAN_EXPOK))
  1090 				    == SCAN_EXPOK) {
  1091 				flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
  1092 				    | SCAN_SIGNOK | SCAN_NODIGITS;
  1093 				goto addToFloat;
  1094 			    }
  1095 			    break;
  1096 		    }
  1097 
  1098 		    /*
  1099 		     * We got an illegal character so we are done accumulating.
  1100 		     */
  1101 
  1102 		    break;
  1103 
  1104 		    addToFloat:
  1105 		    /*
  1106 		     * Add the character to the temporary buffer.
  1107 		     */
  1108 
  1109 		    *end++ = *string++;
  1110 		    if (*string == '\0') {
  1111 			break;
  1112 		    }
  1113 		}
  1114 
  1115 		/*
  1116 		 * Check to see if we need to back up because we saw a
  1117 		 * trailing 'e' or sign.
  1118 		 */
  1119 
  1120 		if (flags & SCAN_NODIGITS) {
  1121 		    if (flags & SCAN_EXPOK) {
  1122 			/*
  1123 			 * There were no digits at all so scanning has
  1124 			 * failed and we are done.
  1125 			 */
  1126 			if (*string == '\0') {
  1127 			    underflow = 1;
  1128 			}
  1129 			goto done;
  1130 		    }
  1131 
  1132 		    /*
  1133 		     * We got a bad exponent ('e' and maybe a sign).
  1134 		     */
  1135 
  1136 		    end--;
  1137 		    string--;
  1138 		    if (*end != 'e' && *end != 'E') {
  1139 			end--;
  1140 			string--;
  1141 		    }
  1142 		}
  1143 
  1144 		/*
  1145 		 * Scan the value from the temporary buffer.
  1146 		 */
  1147 
  1148 		if (!(flags & SCAN_SUPPRESS)) {
  1149 		    double dvalue;
  1150 		    *end = '\0';
  1151 		    dvalue = strtod(buf, NULL);
  1152 		    objPtr = Tcl_NewDoubleObj(dvalue);
  1153 		    Tcl_IncrRefCount(objPtr);
  1154 		    objs[objIndex++] = objPtr;
  1155 		}
  1156 		break;
  1157 	}
  1158 	nconversions++;
  1159     }
  1160 
  1161     done:
  1162     result = 0;
  1163     code = TCL_OK;
  1164 
  1165     if (numVars) {
  1166 	/*
  1167 	 * In this case, variables were specified (classic scan)
  1168 	 */
  1169 	for (i = 0; i < totalVars; i++) {
  1170 	    if (objs[i] != NULL) {
  1171 		Tcl_Obj *tmpPtr;
  1172 		
  1173 		result++;
  1174 		tmpPtr = Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0);
  1175 		Tcl_DecrRefCount(objs[i]);
  1176 		if (tmpPtr == NULL) {
  1177 		    Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
  1178 			    "couldn't set variable \"",
  1179 			    Tcl_GetString(objv[i+3]), "\"", (char *) NULL);
  1180 		    code = TCL_ERROR;
  1181 		}
  1182 	    }
  1183 	}
  1184     } else {
  1185 	/*
  1186 	 * Here no vars were specified, we want a list returned (inline scan)
  1187 	 */
  1188 	objPtr = Tcl_NewObj();
  1189 	for (i = 0; i < totalVars; i++) {
  1190 	    if (objs[i] != NULL) {
  1191 		Tcl_ListObjAppendElement(NULL, objPtr, objs[i]);
  1192 		Tcl_DecrRefCount(objs[i]);
  1193 	    } else {
  1194 		/*
  1195 		 * More %-specifiers than matching chars, so we
  1196 		 * just spit out empty strings for these
  1197 		 */
  1198 		Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj());
  1199 	    }
  1200 	}
  1201     }
  1202     if (objs != NULL) {
  1203 	ckfree((char*) objs);
  1204     }
  1205     if (code == TCL_OK) {
  1206 	if (underflow && (nconversions == 0)) {
  1207 	    if (numVars) {
  1208 		objPtr = Tcl_NewIntObj(-1);
  1209 	    } else {
  1210 		if (objPtr) {
  1211 		    Tcl_SetListObj(objPtr, 0, NULL);
  1212 		} else {
  1213 		    objPtr = Tcl_NewObj();
  1214 		}
  1215 	    }
  1216 	} else if (numVars) {
  1217 	    objPtr = Tcl_NewIntObj(result);
  1218 	}
  1219 	Tcl_SetObjResult(interp, objPtr);
  1220     }
  1221     return code;
  1222 }