os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclScan.c
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclScan.c Fri Jun 15 03:10:57 2012 +0200
1.3 @@ -0,0 +1,1222 @@
1.4 +/*
1.5 + * tclScan.c --
1.6 + *
1.7 + * This file contains the implementation of the "scan" command.
1.8 + *
1.9 + * Copyright (c) 1998 by Scriptics Corporation.
1.10 + *
1.11 + * See the file "license.terms" for information on usage and redistribution
1.12 + * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
1.13 + *
1.14 + * RCS: @(#) $Id: tclScan.c,v 1.12.2.2 2005/10/23 22:01:30 msofer Exp $
1.15 + */
1.16 +
1.17 +#include "tclInt.h"
1.18 +/*
1.19 + * For strtoll() and strtoull() declarations on some platforms...
1.20 + */
1.21 +#include "tclPort.h"
1.22 +
1.23 +/*
1.24 + * Flag values used by Tcl_ScanObjCmd.
1.25 + */
1.26 +
1.27 +#define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
1.28 +#define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
1.29 +#define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
1.30 +#define SCAN_WIDTH 0x8 /* A width value was supplied. */
1.31 +
1.32 +#define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
1.33 +#define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
1.34 +#define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
1.35 +#define SCAN_XOK 0x80 /* An 'x' is allowed. */
1.36 +#define SCAN_PTOK 0x100 /* Decimal point is allowed. */
1.37 +#define SCAN_EXPOK 0x200 /* An exponent is allowed. */
1.38 +
1.39 +#define SCAN_LONGER 0x400 /* Asked for a wide value. */
1.40 +
1.41 +/*
1.42 + * The following structure contains the information associated with
1.43 + * a character set.
1.44 + */
1.45 +
1.46 +typedef struct CharSet {
1.47 + int exclude; /* 1 if this is an exclusion set. */
1.48 + int nchars;
1.49 + Tcl_UniChar *chars;
1.50 + int nranges;
1.51 + struct Range {
1.52 + Tcl_UniChar start;
1.53 + Tcl_UniChar end;
1.54 + } *ranges;
1.55 +} CharSet;
1.56 +
1.57 +/*
1.58 + * Declarations for functions used only in this file.
1.59 + */
1.60 +
1.61 +static char * BuildCharSet _ANSI_ARGS_((CharSet *cset, char *format));
1.62 +static int CharInSet _ANSI_ARGS_((CharSet *cset, int ch));
1.63 +static void ReleaseCharSet _ANSI_ARGS_((CharSet *cset));
1.64 +static int ValidateFormat _ANSI_ARGS_((Tcl_Interp *interp, char *format,
1.65 + int numVars, int *totalVars));
1.66 +
1.67 +/*
1.68 + *----------------------------------------------------------------------
1.69 + *
1.70 + * BuildCharSet --
1.71 + *
1.72 + * This function examines a character set format specification
1.73 + * and builds a CharSet containing the individual characters and
1.74 + * character ranges specified.
1.75 + *
1.76 + * Results:
1.77 + * Returns the next format position.
1.78 + *
1.79 + * Side effects:
1.80 + * Initializes the charset.
1.81 + *
1.82 + *----------------------------------------------------------------------
1.83 + */
1.84 +
1.85 +static char *
1.86 +BuildCharSet(cset, format)
1.87 + CharSet *cset;
1.88 + char *format; /* Points to first char of set. */
1.89 +{
1.90 + Tcl_UniChar ch, start;
1.91 + int offset, nranges;
1.92 + char *end;
1.93 +
1.94 + memset(cset, 0, sizeof(CharSet));
1.95 +
1.96 + offset = Tcl_UtfToUniChar(format, &ch);
1.97 + if (ch == '^') {
1.98 + cset->exclude = 1;
1.99 + format += offset;
1.100 + offset = Tcl_UtfToUniChar(format, &ch);
1.101 + }
1.102 + end = format + offset;
1.103 +
1.104 + /*
1.105 + * Find the close bracket so we can overallocate the set.
1.106 + */
1.107 +
1.108 + if (ch == ']') {
1.109 + end += Tcl_UtfToUniChar(end, &ch);
1.110 + }
1.111 + nranges = 0;
1.112 + while (ch != ']') {
1.113 + if (ch == '-') {
1.114 + nranges++;
1.115 + }
1.116 + end += Tcl_UtfToUniChar(end, &ch);
1.117 + }
1.118 +
1.119 + cset->chars = (Tcl_UniChar *) ckalloc(sizeof(Tcl_UniChar)
1.120 + * (end - format - 1));
1.121 + if (nranges > 0) {
1.122 + cset->ranges = (struct Range *) ckalloc(sizeof(struct Range)*nranges);
1.123 + } else {
1.124 + cset->ranges = NULL;
1.125 + }
1.126 +
1.127 + /*
1.128 + * Now build the character set.
1.129 + */
1.130 +
1.131 + cset->nchars = cset->nranges = 0;
1.132 + format += Tcl_UtfToUniChar(format, &ch);
1.133 + start = ch;
1.134 + if (ch == ']' || ch == '-') {
1.135 + cset->chars[cset->nchars++] = ch;
1.136 + format += Tcl_UtfToUniChar(format, &ch);
1.137 + }
1.138 + while (ch != ']') {
1.139 + if (*format == '-') {
1.140 + /*
1.141 + * This may be the first character of a range, so don't add
1.142 + * it yet.
1.143 + */
1.144 +
1.145 + start = ch;
1.146 + } else if (ch == '-') {
1.147 + /*
1.148 + * Check to see if this is the last character in the set, in which
1.149 + * case it is not a range and we should add the previous character
1.150 + * as well as the dash.
1.151 + */
1.152 +
1.153 + if (*format == ']') {
1.154 + cset->chars[cset->nchars++] = start;
1.155 + cset->chars[cset->nchars++] = ch;
1.156 + } else {
1.157 + format += Tcl_UtfToUniChar(format, &ch);
1.158 +
1.159 + /*
1.160 + * Check to see if the range is in reverse order.
1.161 + */
1.162 +
1.163 + if (start < ch) {
1.164 + cset->ranges[cset->nranges].start = start;
1.165 + cset->ranges[cset->nranges].end = ch;
1.166 + } else {
1.167 + cset->ranges[cset->nranges].start = ch;
1.168 + cset->ranges[cset->nranges].end = start;
1.169 + }
1.170 + cset->nranges++;
1.171 + }
1.172 + } else {
1.173 + cset->chars[cset->nchars++] = ch;
1.174 + }
1.175 + format += Tcl_UtfToUniChar(format, &ch);
1.176 + }
1.177 + return format;
1.178 +}
1.179 +
1.180 +/*
1.181 + *----------------------------------------------------------------------
1.182 + *
1.183 + * CharInSet --
1.184 + *
1.185 + * Check to see if a character matches the given set.
1.186 + *
1.187 + * Results:
1.188 + * Returns non-zero if the character matches the given set.
1.189 + *
1.190 + * Side effects:
1.191 + * None.
1.192 + *
1.193 + *----------------------------------------------------------------------
1.194 + */
1.195 +
1.196 +static int
1.197 +CharInSet(cset, c)
1.198 + CharSet *cset;
1.199 + int c; /* Character to test, passed as int because
1.200 + * of non-ANSI prototypes. */
1.201 +{
1.202 + Tcl_UniChar ch = (Tcl_UniChar) c;
1.203 + int i, match = 0;
1.204 + for (i = 0; i < cset->nchars; i++) {
1.205 + if (cset->chars[i] == ch) {
1.206 + match = 1;
1.207 + break;
1.208 + }
1.209 + }
1.210 + if (!match) {
1.211 + for (i = 0; i < cset->nranges; i++) {
1.212 + if ((cset->ranges[i].start <= ch)
1.213 + && (ch <= cset->ranges[i].end)) {
1.214 + match = 1;
1.215 + break;
1.216 + }
1.217 + }
1.218 + }
1.219 + return (cset->exclude ? !match : match);
1.220 +}
1.221 +
1.222 +/*
1.223 + *----------------------------------------------------------------------
1.224 + *
1.225 + * ReleaseCharSet --
1.226 + *
1.227 + * Free the storage associated with a character set.
1.228 + *
1.229 + * Results:
1.230 + * None.
1.231 + *
1.232 + * Side effects:
1.233 + * None.
1.234 + *
1.235 + *----------------------------------------------------------------------
1.236 + */
1.237 +
1.238 +static void
1.239 +ReleaseCharSet(cset)
1.240 + CharSet *cset;
1.241 +{
1.242 + ckfree((char *)cset->chars);
1.243 + if (cset->ranges) {
1.244 + ckfree((char *)cset->ranges);
1.245 + }
1.246 +}
1.247 +
1.248 +/*
1.249 + *----------------------------------------------------------------------
1.250 + *
1.251 + * ValidateFormat --
1.252 + *
1.253 + * Parse the format string and verify that it is properly formed
1.254 + * and that there are exactly enough variables on the command line.
1.255 + *
1.256 + * Results:
1.257 + * A standard Tcl result.
1.258 + *
1.259 + * Side effects:
1.260 + * May place an error in the interpreter result.
1.261 + *
1.262 + *----------------------------------------------------------------------
1.263 + */
1.264 +
1.265 +static int
1.266 +ValidateFormat(interp, format, numVars, totalSubs)
1.267 + Tcl_Interp *interp; /* Current interpreter. */
1.268 + char *format; /* The format string. */
1.269 + int numVars; /* The number of variables passed to the
1.270 + * scan command. */
1.271 + int *totalSubs; /* The number of variables that will be
1.272 + * required. */
1.273 +{
1.274 +#define STATIC_LIST_SIZE 16
1.275 + int gotXpg, gotSequential, value, i, flags;
1.276 + char *end;
1.277 + Tcl_UniChar ch;
1.278 + int staticAssign[STATIC_LIST_SIZE];
1.279 + int *nassign = staticAssign;
1.280 + int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
1.281 + char buf[TCL_UTF_MAX+1];
1.282 +
1.283 + /*
1.284 + * Initialize an array that records the number of times a variable
1.285 + * is assigned to by the format string. We use this to detect if
1.286 + * a variable is multiply assigned or left unassigned.
1.287 + */
1.288 +
1.289 + if (numVars > nspace) {
1.290 + nassign = (int*)ckalloc(sizeof(int) * numVars);
1.291 + nspace = numVars;
1.292 + }
1.293 + for (i = 0; i < nspace; i++) {
1.294 + nassign[i] = 0;
1.295 + }
1.296 +
1.297 + xpgSize = objIndex = gotXpg = gotSequential = 0;
1.298 +
1.299 + while (*format != '\0') {
1.300 + format += Tcl_UtfToUniChar(format, &ch);
1.301 +
1.302 + flags = 0;
1.303 +
1.304 + if (ch != '%') {
1.305 + continue;
1.306 + }
1.307 + format += Tcl_UtfToUniChar(format, &ch);
1.308 + if (ch == '%') {
1.309 + continue;
1.310 + }
1.311 + if (ch == '*') {
1.312 + flags |= SCAN_SUPPRESS;
1.313 + format += Tcl_UtfToUniChar(format, &ch);
1.314 + goto xpgCheckDone;
1.315 + }
1.316 +
1.317 + if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
1.318 + /*
1.319 + * Check for an XPG3-style %n$ specification. Note: there
1.320 + * must not be a mixture of XPG3 specs and non-XPG3 specs
1.321 + * in the same format string.
1.322 + */
1.323 +
1.324 + value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
1.325 + if (*end != '$') {
1.326 + goto notXpg;
1.327 + }
1.328 + format = end+1;
1.329 + format += Tcl_UtfToUniChar(format, &ch);
1.330 + gotXpg = 1;
1.331 + if (gotSequential) {
1.332 + goto mixedXPG;
1.333 + }
1.334 + objIndex = value - 1;
1.335 + if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
1.336 + goto badIndex;
1.337 + } else if (numVars == 0) {
1.338 + /*
1.339 + * In the case where no vars are specified, the user can
1.340 + * specify %9999$ legally, so we have to consider special
1.341 + * rules for growing the assign array. 'value' is
1.342 + * guaranteed to be > 0.
1.343 + */
1.344 + xpgSize = (xpgSize > value) ? xpgSize : value;
1.345 + }
1.346 + goto xpgCheckDone;
1.347 + }
1.348 +
1.349 + notXpg:
1.350 + gotSequential = 1;
1.351 + if (gotXpg) {
1.352 + mixedXPG:
1.353 + Tcl_SetResult(interp,
1.354 + "cannot mix \"%\" and \"%n$\" conversion specifiers",
1.355 + TCL_STATIC);
1.356 + goto error;
1.357 + }
1.358 +
1.359 + xpgCheckDone:
1.360 + /*
1.361 + * Parse any width specifier.
1.362 + */
1.363 +
1.364 + if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
1.365 + value = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
1.366 + flags |= SCAN_WIDTH;
1.367 + format += Tcl_UtfToUniChar(format, &ch);
1.368 + }
1.369 +
1.370 + /*
1.371 + * Handle any size specifier.
1.372 + */
1.373 +
1.374 + switch (ch) {
1.375 + case 'l':
1.376 + case 'L':
1.377 + flags |= SCAN_LONGER;
1.378 + case 'h':
1.379 + format += Tcl_UtfToUniChar(format, &ch);
1.380 + }
1.381 +
1.382 + if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
1.383 + goto badIndex;
1.384 + }
1.385 +
1.386 + /*
1.387 + * Handle the various field types.
1.388 + */
1.389 +
1.390 + switch (ch) {
1.391 + case 'c':
1.392 + if (flags & SCAN_WIDTH) {
1.393 + Tcl_SetResult(interp,
1.394 + "field width may not be specified in %c conversion",
1.395 + TCL_STATIC);
1.396 + goto error;
1.397 + }
1.398 + /*
1.399 + * Fall through!
1.400 + */
1.401 + case 'n':
1.402 + case 's':
1.403 + if (flags & SCAN_LONGER) {
1.404 + invalidLonger:
1.405 + buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
1.406 + Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
1.407 + "'l' modifier may not be specified in %", buf,
1.408 + " conversion", NULL);
1.409 + goto error;
1.410 + }
1.411 + /*
1.412 + * Fall through!
1.413 + */
1.414 + case 'd':
1.415 + case 'e':
1.416 + case 'f':
1.417 + case 'g':
1.418 + case 'i':
1.419 + case 'o':
1.420 + case 'u':
1.421 + case 'x':
1.422 + break;
1.423 + /*
1.424 + * Bracket terms need special checking
1.425 + */
1.426 + case '[':
1.427 + if (flags & SCAN_LONGER) {
1.428 + goto invalidLonger;
1.429 + }
1.430 + if (*format == '\0') {
1.431 + goto badSet;
1.432 + }
1.433 + format += Tcl_UtfToUniChar(format, &ch);
1.434 + if (ch == '^') {
1.435 + if (*format == '\0') {
1.436 + goto badSet;
1.437 + }
1.438 + format += Tcl_UtfToUniChar(format, &ch);
1.439 + }
1.440 + if (ch == ']') {
1.441 + if (*format == '\0') {
1.442 + goto badSet;
1.443 + }
1.444 + format += Tcl_UtfToUniChar(format, &ch);
1.445 + }
1.446 + while (ch != ']') {
1.447 + if (*format == '\0') {
1.448 + goto badSet;
1.449 + }
1.450 + format += Tcl_UtfToUniChar(format, &ch);
1.451 + }
1.452 + break;
1.453 + badSet:
1.454 + Tcl_SetResult(interp, "unmatched [ in format string",
1.455 + TCL_STATIC);
1.456 + goto error;
1.457 + default:
1.458 + {
1.459 + char buf[TCL_UTF_MAX+1];
1.460 +
1.461 + buf[Tcl_UniCharToUtf(ch, buf)] = '\0';
1.462 + Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
1.463 + "bad scan conversion character \"", buf, "\"", NULL);
1.464 + goto error;
1.465 + }
1.466 + }
1.467 + if (!(flags & SCAN_SUPPRESS)) {
1.468 + if (objIndex >= nspace) {
1.469 + /*
1.470 + * Expand the nassign buffer. If we are using XPG specifiers,
1.471 + * make sure that we grow to a large enough size. xpgSize is
1.472 + * guaranteed to be at least one larger than objIndex.
1.473 + */
1.474 + value = nspace;
1.475 + if (xpgSize) {
1.476 + nspace = xpgSize;
1.477 + } else {
1.478 + nspace += STATIC_LIST_SIZE;
1.479 + }
1.480 + if (nassign == staticAssign) {
1.481 + nassign = (void *)ckalloc(nspace * sizeof(int));
1.482 + for (i = 0; i < STATIC_LIST_SIZE; ++i) {
1.483 + nassign[i] = staticAssign[i];
1.484 + }
1.485 + } else {
1.486 + nassign = (void *)ckrealloc((void *)nassign,
1.487 + nspace * sizeof(int));
1.488 + }
1.489 + for (i = value; i < nspace; i++) {
1.490 + nassign[i] = 0;
1.491 + }
1.492 + }
1.493 + nassign[objIndex]++;
1.494 + objIndex++;
1.495 + }
1.496 + }
1.497 +
1.498 + /*
1.499 + * Verify that all of the variable were assigned exactly once.
1.500 + */
1.501 +
1.502 + if (numVars == 0) {
1.503 + if (xpgSize) {
1.504 + numVars = xpgSize;
1.505 + } else {
1.506 + numVars = objIndex;
1.507 + }
1.508 + }
1.509 + if (totalSubs) {
1.510 + *totalSubs = numVars;
1.511 + }
1.512 + for (i = 0; i < numVars; i++) {
1.513 + if (nassign[i] > 1) {
1.514 + Tcl_SetResult(interp, "variable is assigned by multiple \"%n$\" conversion specifiers", TCL_STATIC);
1.515 + goto error;
1.516 + } else if (!xpgSize && (nassign[i] == 0)) {
1.517 + /*
1.518 + * If the space is empty, and xpgSize is 0 (means XPG wasn't
1.519 + * used, and/or numVars != 0), then too many vars were given
1.520 + */
1.521 + Tcl_SetResult(interp, "variable is not assigned by any conversion specifiers", TCL_STATIC);
1.522 + goto error;
1.523 + }
1.524 + }
1.525 +
1.526 + if (nassign != staticAssign) {
1.527 + ckfree((char *)nassign);
1.528 + }
1.529 + return TCL_OK;
1.530 +
1.531 + badIndex:
1.532 + if (gotXpg) {
1.533 + Tcl_SetResult(interp, "\"%n$\" argument index out of range",
1.534 + TCL_STATIC);
1.535 + } else {
1.536 + Tcl_SetResult(interp,
1.537 + "different numbers of variable names and field specifiers",
1.538 + TCL_STATIC);
1.539 + }
1.540 +
1.541 + error:
1.542 + if (nassign != staticAssign) {
1.543 + ckfree((char *)nassign);
1.544 + }
1.545 + return TCL_ERROR;
1.546 +#undef STATIC_LIST_SIZE
1.547 +}
1.548 +
1.549 +/*
1.550 + *----------------------------------------------------------------------
1.551 + *
1.552 + * Tcl_ScanObjCmd --
1.553 + *
1.554 + * This procedure is invoked to process the "scan" Tcl command.
1.555 + * See the user documentation for details on what it does.
1.556 + *
1.557 + * Results:
1.558 + * A standard Tcl result.
1.559 + *
1.560 + * Side effects:
1.561 + * See the user documentation.
1.562 + *
1.563 + *----------------------------------------------------------------------
1.564 + */
1.565 +
1.566 + /* ARGSUSED */
1.567 +int
1.568 +Tcl_ScanObjCmd(dummy, interp, objc, objv)
1.569 + ClientData dummy; /* Not used. */
1.570 + Tcl_Interp *interp; /* Current interpreter. */
1.571 + int objc; /* Number of arguments. */
1.572 + Tcl_Obj *CONST objv[]; /* Argument objects. */
1.573 +{
1.574 + char *format;
1.575 + int numVars, nconversions, totalVars = -1;
1.576 + int objIndex, offset, i, result, code;
1.577 + long value;
1.578 + char *string, *end, *baseString;
1.579 + char op = 0;
1.580 + int base = 0;
1.581 + int underflow = 0;
1.582 + size_t width;
1.583 + long (*fn)() = NULL;
1.584 +#ifndef TCL_WIDE_INT_IS_LONG
1.585 + Tcl_WideInt (*lfn)() = NULL;
1.586 + Tcl_WideInt wideValue;
1.587 +#endif
1.588 + Tcl_UniChar ch, sch;
1.589 + Tcl_Obj **objs = NULL, *objPtr = NULL;
1.590 + int flags;
1.591 + char buf[513]; /* Temporary buffer to hold scanned
1.592 + * number strings before they are
1.593 + * passed to strtoul. */
1.594 +
1.595 + if (objc < 3) {
1.596 + Tcl_WrongNumArgs(interp, 1, objv,
1.597 + "string format ?varName varName ...?");
1.598 + return TCL_ERROR;
1.599 + }
1.600 +
1.601 + format = Tcl_GetStringFromObj(objv[2], NULL);
1.602 + numVars = objc-3;
1.603 +
1.604 + /*
1.605 + * Check for errors in the format string.
1.606 + */
1.607 +
1.608 + if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) {
1.609 + return TCL_ERROR;
1.610 + }
1.611 +
1.612 + /*
1.613 + * Allocate space for the result objects.
1.614 + */
1.615 +
1.616 + if (totalVars > 0) {
1.617 + objs = (Tcl_Obj **) ckalloc(sizeof(Tcl_Obj*) * totalVars);
1.618 + for (i = 0; i < totalVars; i++) {
1.619 + objs[i] = NULL;
1.620 + }
1.621 + }
1.622 +
1.623 + string = Tcl_GetStringFromObj(objv[1], NULL);
1.624 + baseString = string;
1.625 +
1.626 + /*
1.627 + * Iterate over the format string filling in the result objects until
1.628 + * we reach the end of input, the end of the format string, or there
1.629 + * is a mismatch.
1.630 + */
1.631 +
1.632 + objIndex = 0;
1.633 + nconversions = 0;
1.634 + while (*format != '\0') {
1.635 + format += Tcl_UtfToUniChar(format, &ch);
1.636 +
1.637 + flags = 0;
1.638 +
1.639 + /*
1.640 + * If we see whitespace in the format, skip whitespace in the string.
1.641 + */
1.642 +
1.643 + if (Tcl_UniCharIsSpace(ch)) {
1.644 + offset = Tcl_UtfToUniChar(string, &sch);
1.645 + while (Tcl_UniCharIsSpace(sch)) {
1.646 + if (*string == '\0') {
1.647 + goto done;
1.648 + }
1.649 + string += offset;
1.650 + offset = Tcl_UtfToUniChar(string, &sch);
1.651 + }
1.652 + continue;
1.653 + }
1.654 +
1.655 + if (ch != '%') {
1.656 + literal:
1.657 + if (*string == '\0') {
1.658 + underflow = 1;
1.659 + goto done;
1.660 + }
1.661 + string += Tcl_UtfToUniChar(string, &sch);
1.662 + if (ch != sch) {
1.663 + goto done;
1.664 + }
1.665 + continue;
1.666 + }
1.667 +
1.668 + format += Tcl_UtfToUniChar(format, &ch);
1.669 + if (ch == '%') {
1.670 + goto literal;
1.671 + }
1.672 +
1.673 + /*
1.674 + * Check for assignment suppression ('*') or an XPG3-style
1.675 + * assignment ('%n$').
1.676 + */
1.677 +
1.678 + if (ch == '*') {
1.679 + flags |= SCAN_SUPPRESS;
1.680 + format += Tcl_UtfToUniChar(format, &ch);
1.681 + } else if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
1.682 + value = strtoul(format-1, &end, 10); /* INTL: "C" locale. */
1.683 + if (*end == '$') {
1.684 + format = end+1;
1.685 + format += Tcl_UtfToUniChar(format, &ch);
1.686 + objIndex = (int) value - 1;
1.687 + }
1.688 + }
1.689 +
1.690 + /*
1.691 + * Parse any width specifier.
1.692 + */
1.693 +
1.694 + if ((ch < 0x80) && isdigit(UCHAR(ch))) { /* INTL: "C" locale. */
1.695 + width = strtoul(format-1, &format, 10); /* INTL: "C" locale. */
1.696 + format += Tcl_UtfToUniChar(format, &ch);
1.697 + } else {
1.698 + width = 0;
1.699 + }
1.700 +
1.701 + /*
1.702 + * Handle any size specifier.
1.703 + */
1.704 +
1.705 + switch (ch) {
1.706 + case 'l':
1.707 + case 'L':
1.708 + flags |= SCAN_LONGER;
1.709 + /*
1.710 + * Fall through so we skip to the next character.
1.711 + */
1.712 + case 'h':
1.713 + format += Tcl_UtfToUniChar(format, &ch);
1.714 + }
1.715 +
1.716 + /*
1.717 + * Handle the various field types.
1.718 + */
1.719 +
1.720 + switch (ch) {
1.721 + case 'n':
1.722 + if (!(flags & SCAN_SUPPRESS)) {
1.723 + objPtr = Tcl_NewIntObj(string - baseString);
1.724 + Tcl_IncrRefCount(objPtr);
1.725 + objs[objIndex++] = objPtr;
1.726 + }
1.727 + nconversions++;
1.728 + continue;
1.729 +
1.730 + case 'd':
1.731 + op = 'i';
1.732 + base = 10;
1.733 + fn = (long (*)())strtol;
1.734 +#ifndef TCL_WIDE_INT_IS_LONG
1.735 + lfn = (Tcl_WideInt (*)())strtoll;
1.736 +#endif
1.737 + break;
1.738 + case 'i':
1.739 + op = 'i';
1.740 + base = 0;
1.741 + fn = (long (*)())strtol;
1.742 +#ifndef TCL_WIDE_INT_IS_LONG
1.743 + lfn = (Tcl_WideInt (*)())strtoll;
1.744 +#endif
1.745 + break;
1.746 + case 'o':
1.747 + op = 'i';
1.748 + base = 8;
1.749 + fn = (long (*)())strtoul;
1.750 +#ifndef TCL_WIDE_INT_IS_LONG
1.751 + lfn = (Tcl_WideInt (*)())strtoull;
1.752 +#endif
1.753 + break;
1.754 + case 'x':
1.755 + op = 'i';
1.756 + base = 16;
1.757 + fn = (long (*)())strtoul;
1.758 +#ifndef TCL_WIDE_INT_IS_LONG
1.759 + lfn = (Tcl_WideInt (*)())strtoull;
1.760 +#endif
1.761 + break;
1.762 + case 'u':
1.763 + op = 'i';
1.764 + base = 10;
1.765 + flags |= SCAN_UNSIGNED;
1.766 + fn = (long (*)())strtoul;
1.767 +#ifndef TCL_WIDE_INT_IS_LONG
1.768 + lfn = (Tcl_WideInt (*)())strtoull;
1.769 +#endif
1.770 + break;
1.771 +
1.772 + case 'f':
1.773 + case 'e':
1.774 + case 'g':
1.775 + op = 'f';
1.776 + break;
1.777 +
1.778 + case 's':
1.779 + op = 's';
1.780 + break;
1.781 +
1.782 + case 'c':
1.783 + op = 'c';
1.784 + flags |= SCAN_NOSKIP;
1.785 + break;
1.786 + case '[':
1.787 + op = '[';
1.788 + flags |= SCAN_NOSKIP;
1.789 + break;
1.790 + }
1.791 +
1.792 + /*
1.793 + * At this point, we will need additional characters from the
1.794 + * string to proceed.
1.795 + */
1.796 +
1.797 + if (*string == '\0') {
1.798 + underflow = 1;
1.799 + goto done;
1.800 + }
1.801 +
1.802 + /*
1.803 + * Skip any leading whitespace at the beginning of a field unless
1.804 + * the format suppresses this behavior.
1.805 + */
1.806 +
1.807 + if (!(flags & SCAN_NOSKIP)) {
1.808 + while (*string != '\0') {
1.809 + offset = Tcl_UtfToUniChar(string, &sch);
1.810 + if (!Tcl_UniCharIsSpace(sch)) {
1.811 + break;
1.812 + }
1.813 + string += offset;
1.814 + }
1.815 + if (*string == '\0') {
1.816 + underflow = 1;
1.817 + goto done;
1.818 + }
1.819 + }
1.820 +
1.821 + /*
1.822 + * Perform the requested scanning operation.
1.823 + */
1.824 +
1.825 + switch (op) {
1.826 + case 's':
1.827 + /*
1.828 + * Scan a string up to width characters or whitespace.
1.829 + */
1.830 +
1.831 + if (width == 0) {
1.832 + width = (size_t) ~0;
1.833 + }
1.834 + end = string;
1.835 + while (*end != '\0') {
1.836 + offset = Tcl_UtfToUniChar(end, &sch);
1.837 + if (Tcl_UniCharIsSpace(sch)) {
1.838 + break;
1.839 + }
1.840 + end += offset;
1.841 + if (--width == 0) {
1.842 + break;
1.843 + }
1.844 + }
1.845 + if (!(flags & SCAN_SUPPRESS)) {
1.846 + objPtr = Tcl_NewStringObj(string, end-string);
1.847 + Tcl_IncrRefCount(objPtr);
1.848 + objs[objIndex++] = objPtr;
1.849 + }
1.850 + string = end;
1.851 + break;
1.852 +
1.853 + case '[': {
1.854 + CharSet cset;
1.855 +
1.856 + if (width == 0) {
1.857 + width = (size_t) ~0;
1.858 + }
1.859 + end = string;
1.860 +
1.861 + format = BuildCharSet(&cset, format);
1.862 + while (*end != '\0') {
1.863 + offset = Tcl_UtfToUniChar(end, &sch);
1.864 + if (!CharInSet(&cset, (int)sch)) {
1.865 + break;
1.866 + }
1.867 + end += offset;
1.868 + if (--width == 0) {
1.869 + break;
1.870 + }
1.871 + }
1.872 + ReleaseCharSet(&cset);
1.873 +
1.874 + if (string == end) {
1.875 + /*
1.876 + * Nothing matched the range, stop processing
1.877 + */
1.878 + goto done;
1.879 + }
1.880 + if (!(flags & SCAN_SUPPRESS)) {
1.881 + objPtr = Tcl_NewStringObj(string, end-string);
1.882 + Tcl_IncrRefCount(objPtr);
1.883 + objs[objIndex++] = objPtr;
1.884 + }
1.885 + string = end;
1.886 +
1.887 + break;
1.888 + }
1.889 + case 'c':
1.890 + /*
1.891 + * Scan a single Unicode character.
1.892 + */
1.893 +
1.894 + string += Tcl_UtfToUniChar(string, &sch);
1.895 + if (!(flags & SCAN_SUPPRESS)) {
1.896 + objPtr = Tcl_NewIntObj((int)sch);
1.897 + Tcl_IncrRefCount(objPtr);
1.898 + objs[objIndex++] = objPtr;
1.899 + }
1.900 + break;
1.901 +
1.902 + case 'i':
1.903 + /*
1.904 + * Scan an unsigned or signed integer.
1.905 + */
1.906 +
1.907 + if ((width == 0) || (width > sizeof(buf) - 1)) {
1.908 + width = sizeof(buf) - 1;
1.909 + }
1.910 + flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
1.911 + for (end = buf; width > 0; width--) {
1.912 + switch (*string) {
1.913 + /*
1.914 + * The 0 digit has special meaning at the beginning of
1.915 + * a number. If we are unsure of the base, it
1.916 + * indicates that we are in base 8 or base 16 (if it is
1.917 + * followed by an 'x').
1.918 + *
1.919 + * 8.1 - 8.3.4 incorrectly handled 0x... base-16
1.920 + * cases for %x by not reading the 0x as the
1.921 + * auto-prelude for base-16. [Bug #495213]
1.922 + */
1.923 + case '0':
1.924 + if (base == 0) {
1.925 + base = 8;
1.926 + flags |= SCAN_XOK;
1.927 + }
1.928 + if (base == 16) {
1.929 + flags |= SCAN_XOK;
1.930 + }
1.931 + if (flags & SCAN_NOZERO) {
1.932 + flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS
1.933 + | SCAN_NOZERO);
1.934 + } else {
1.935 + flags &= ~(SCAN_SIGNOK | SCAN_XOK
1.936 + | SCAN_NODIGITS);
1.937 + }
1.938 + goto addToInt;
1.939 +
1.940 + case '1': case '2': case '3': case '4':
1.941 + case '5': case '6': case '7':
1.942 + if (base == 0) {
1.943 + base = 10;
1.944 + }
1.945 + flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1.946 + goto addToInt;
1.947 +
1.948 + case '8': case '9':
1.949 + if (base == 0) {
1.950 + base = 10;
1.951 + }
1.952 + if (base <= 8) {
1.953 + break;
1.954 + }
1.955 + flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1.956 + goto addToInt;
1.957 +
1.958 + case 'A': case 'B': case 'C':
1.959 + case 'D': case 'E': case 'F':
1.960 + case 'a': case 'b': case 'c':
1.961 + case 'd': case 'e': case 'f':
1.962 + if (base <= 10) {
1.963 + break;
1.964 + }
1.965 + flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1.966 + goto addToInt;
1.967 +
1.968 + case '+': case '-':
1.969 + if (flags & SCAN_SIGNOK) {
1.970 + flags &= ~SCAN_SIGNOK;
1.971 + goto addToInt;
1.972 + }
1.973 + break;
1.974 +
1.975 + case 'x': case 'X':
1.976 + if ((flags & SCAN_XOK) && (end == buf+1)) {
1.977 + base = 16;
1.978 + flags &= ~SCAN_XOK;
1.979 + goto addToInt;
1.980 + }
1.981 + break;
1.982 + }
1.983 +
1.984 + /*
1.985 + * We got an illegal character so we are done accumulating.
1.986 + */
1.987 +
1.988 + break;
1.989 +
1.990 + addToInt:
1.991 + /*
1.992 + * Add the character to the temporary buffer.
1.993 + */
1.994 +
1.995 + *end++ = *string++;
1.996 + if (*string == '\0') {
1.997 + break;
1.998 + }
1.999 + }
1.1000 +
1.1001 + /*
1.1002 + * Check to see if we need to back up because we only got a
1.1003 + * sign or a trailing x after a 0.
1.1004 + */
1.1005 +
1.1006 + if (flags & SCAN_NODIGITS) {
1.1007 + if (*string == '\0') {
1.1008 + underflow = 1;
1.1009 + }
1.1010 + goto done;
1.1011 + } else if (end[-1] == 'x' || end[-1] == 'X') {
1.1012 + end--;
1.1013 + string--;
1.1014 + }
1.1015 +
1.1016 +
1.1017 + /*
1.1018 + * Scan the value from the temporary buffer. If we are
1.1019 + * returning a large unsigned value, we have to convert it back
1.1020 + * to a string since Tcl only supports signed values.
1.1021 + */
1.1022 +
1.1023 + if (!(flags & SCAN_SUPPRESS)) {
1.1024 + *end = '\0';
1.1025 +#ifndef TCL_WIDE_INT_IS_LONG
1.1026 + if (flags & SCAN_LONGER) {
1.1027 + wideValue = (Tcl_WideInt) (*lfn)(buf, NULL, base);
1.1028 + if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) {
1.1029 + /* INTL: ISO digit */
1.1030 + sprintf(buf, "%" TCL_LL_MODIFIER "u",
1.1031 + (Tcl_WideUInt)wideValue);
1.1032 + objPtr = Tcl_NewStringObj(buf, -1);
1.1033 + } else {
1.1034 + objPtr = Tcl_NewWideIntObj(wideValue);
1.1035 + }
1.1036 + } else {
1.1037 +#endif /* !TCL_WIDE_INT_IS_LONG */
1.1038 + value = (long) (*fn)(buf, NULL, base);
1.1039 + if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1.1040 + sprintf(buf, "%lu", value); /* INTL: ISO digit */
1.1041 + objPtr = Tcl_NewStringObj(buf, -1);
1.1042 + } else if ((flags & SCAN_LONGER)
1.1043 + || (unsigned long) value > UINT_MAX) {
1.1044 + objPtr = Tcl_NewLongObj(value);
1.1045 + } else {
1.1046 + objPtr = Tcl_NewIntObj(value);
1.1047 + }
1.1048 +#ifndef TCL_WIDE_INT_IS_LONG
1.1049 + }
1.1050 +#endif
1.1051 + Tcl_IncrRefCount(objPtr);
1.1052 + objs[objIndex++] = objPtr;
1.1053 + }
1.1054 +
1.1055 + break;
1.1056 +
1.1057 + case 'f':
1.1058 + /*
1.1059 + * Scan a floating point number
1.1060 + */
1.1061 +
1.1062 + if ((width == 0) || (width > sizeof(buf) - 1)) {
1.1063 + width = sizeof(buf) - 1;
1.1064 + }
1.1065 + flags &= ~SCAN_LONGER;
1.1066 + flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1.1067 + for (end = buf; width > 0; width--) {
1.1068 + switch (*string) {
1.1069 + case '0': case '1': case '2': case '3':
1.1070 + case '4': case '5': case '6': case '7':
1.1071 + case '8': case '9':
1.1072 + flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1.1073 + goto addToFloat;
1.1074 + case '+': case '-':
1.1075 + if (flags & SCAN_SIGNOK) {
1.1076 + flags &= ~SCAN_SIGNOK;
1.1077 + goto addToFloat;
1.1078 + }
1.1079 + break;
1.1080 + case '.':
1.1081 + if (flags & SCAN_PTOK) {
1.1082 + flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1.1083 + goto addToFloat;
1.1084 + }
1.1085 + break;
1.1086 + case 'e': case 'E':
1.1087 + /*
1.1088 + * An exponent is not allowed until there has
1.1089 + * been at least one digit.
1.1090 + */
1.1091 +
1.1092 + if ((flags & (SCAN_NODIGITS | SCAN_EXPOK))
1.1093 + == SCAN_EXPOK) {
1.1094 + flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1.1095 + | SCAN_SIGNOK | SCAN_NODIGITS;
1.1096 + goto addToFloat;
1.1097 + }
1.1098 + break;
1.1099 + }
1.1100 +
1.1101 + /*
1.1102 + * We got an illegal character so we are done accumulating.
1.1103 + */
1.1104 +
1.1105 + break;
1.1106 +
1.1107 + addToFloat:
1.1108 + /*
1.1109 + * Add the character to the temporary buffer.
1.1110 + */
1.1111 +
1.1112 + *end++ = *string++;
1.1113 + if (*string == '\0') {
1.1114 + break;
1.1115 + }
1.1116 + }
1.1117 +
1.1118 + /*
1.1119 + * Check to see if we need to back up because we saw a
1.1120 + * trailing 'e' or sign.
1.1121 + */
1.1122 +
1.1123 + if (flags & SCAN_NODIGITS) {
1.1124 + if (flags & SCAN_EXPOK) {
1.1125 + /*
1.1126 + * There were no digits at all so scanning has
1.1127 + * failed and we are done.
1.1128 + */
1.1129 + if (*string == '\0') {
1.1130 + underflow = 1;
1.1131 + }
1.1132 + goto done;
1.1133 + }
1.1134 +
1.1135 + /*
1.1136 + * We got a bad exponent ('e' and maybe a sign).
1.1137 + */
1.1138 +
1.1139 + end--;
1.1140 + string--;
1.1141 + if (*end != 'e' && *end != 'E') {
1.1142 + end--;
1.1143 + string--;
1.1144 + }
1.1145 + }
1.1146 +
1.1147 + /*
1.1148 + * Scan the value from the temporary buffer.
1.1149 + */
1.1150 +
1.1151 + if (!(flags & SCAN_SUPPRESS)) {
1.1152 + double dvalue;
1.1153 + *end = '\0';
1.1154 + dvalue = strtod(buf, NULL);
1.1155 + objPtr = Tcl_NewDoubleObj(dvalue);
1.1156 + Tcl_IncrRefCount(objPtr);
1.1157 + objs[objIndex++] = objPtr;
1.1158 + }
1.1159 + break;
1.1160 + }
1.1161 + nconversions++;
1.1162 + }
1.1163 +
1.1164 + done:
1.1165 + result = 0;
1.1166 + code = TCL_OK;
1.1167 +
1.1168 + if (numVars) {
1.1169 + /*
1.1170 + * In this case, variables were specified (classic scan)
1.1171 + */
1.1172 + for (i = 0; i < totalVars; i++) {
1.1173 + if (objs[i] != NULL) {
1.1174 + Tcl_Obj *tmpPtr;
1.1175 +
1.1176 + result++;
1.1177 + tmpPtr = Tcl_ObjSetVar2(interp, objv[i+3], NULL, objs[i], 0);
1.1178 + Tcl_DecrRefCount(objs[i]);
1.1179 + if (tmpPtr == NULL) {
1.1180 + Tcl_AppendStringsToObj(Tcl_GetObjResult(interp),
1.1181 + "couldn't set variable \"",
1.1182 + Tcl_GetString(objv[i+3]), "\"", (char *) NULL);
1.1183 + code = TCL_ERROR;
1.1184 + }
1.1185 + }
1.1186 + }
1.1187 + } else {
1.1188 + /*
1.1189 + * Here no vars were specified, we want a list returned (inline scan)
1.1190 + */
1.1191 + objPtr = Tcl_NewObj();
1.1192 + for (i = 0; i < totalVars; i++) {
1.1193 + if (objs[i] != NULL) {
1.1194 + Tcl_ListObjAppendElement(NULL, objPtr, objs[i]);
1.1195 + Tcl_DecrRefCount(objs[i]);
1.1196 + } else {
1.1197 + /*
1.1198 + * More %-specifiers than matching chars, so we
1.1199 + * just spit out empty strings for these
1.1200 + */
1.1201 + Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewObj());
1.1202 + }
1.1203 + }
1.1204 + }
1.1205 + if (objs != NULL) {
1.1206 + ckfree((char*) objs);
1.1207 + }
1.1208 + if (code == TCL_OK) {
1.1209 + if (underflow && (nconversions == 0)) {
1.1210 + if (numVars) {
1.1211 + objPtr = Tcl_NewIntObj(-1);
1.1212 + } else {
1.1213 + if (objPtr) {
1.1214 + Tcl_SetListObj(objPtr, 0, NULL);
1.1215 + } else {
1.1216 + objPtr = Tcl_NewObj();
1.1217 + }
1.1218 + }
1.1219 + } else if (numVars) {
1.1220 + objPtr = Tcl_NewIntObj(result);
1.1221 + }
1.1222 + Tcl_SetObjResult(interp, objPtr);
1.1223 + }
1.1224 + return code;
1.1225 +}