os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclEncoding.c
changeset 0 bde4ae8d615e
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclEncoding.c	Fri Jun 15 03:10:57 2012 +0200
     1.3 @@ -0,0 +1,3178 @@
     1.4 +/*
     1.5 + * tclEncoding.c --
     1.6 + *
     1.7 + *	Contains the implementation of the encoding conversion package.
     1.8 + *
     1.9 + * Copyright (c) 1996-1998 Sun Microsystems, Inc.
    1.10 + * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.  
    1.11 + *
    1.12 + * See the file "license.terms" for information on usage and redistribution
    1.13 + * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
    1.14 + *
    1.15 + * RCS: @(#) $Id: tclEncoding.c,v 1.16.2.14 2007/02/12 19:25:42 andreas_kupries Exp $
    1.16 + */
    1.17 +
    1.18 +#include "tclInt.h"
    1.19 +#include "tclPort.h"
    1.20 +#if defined(__SYMBIAN32__) 
    1.21 +#include "tclSymbianGlobals.h"
    1.22 +#endif 
    1.23 +
    1.24 +typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src));
    1.25 +
    1.26 +/*
    1.27 + * The following data structure represents an encoding, which describes how
    1.28 + * to convert between various character sets and UTF-8.
    1.29 + */
    1.30 +
    1.31 +typedef struct Encoding {
    1.32 +    char *name;			/* Name of encoding.  Malloced because (1)
    1.33 +				 * hash table entry that owns this encoding
    1.34 +				 * may be freed prior to this encoding being
    1.35 +				 * freed, (2) string passed in the
    1.36 +				 * Tcl_EncodingType structure may not be
    1.37 +				 * persistent. */
    1.38 +    Tcl_EncodingConvertProc *toUtfProc;
    1.39 +				/* Procedure to convert from external
    1.40 +				 * encoding into UTF-8. */
    1.41 +    Tcl_EncodingConvertProc *fromUtfProc;
    1.42 +				/* Procedure to convert from UTF-8 into
    1.43 +				 * external encoding. */
    1.44 +    Tcl_EncodingFreeProc *freeProc;
    1.45 +				/* If non-NULL, procedure to call when this
    1.46 +				 * encoding is deleted. */
    1.47 +    int nullSize;		/* Number of 0x00 bytes that signify
    1.48 +				 * end-of-string in this encoding.  This
    1.49 +				 * number is used to determine the source
    1.50 +				 * string length when the srcLen argument is
    1.51 +				 * negative.  This number can be 1 or 2. */
    1.52 +    ClientData clientData;	/* Arbitrary value associated with encoding
    1.53 +				 * type.  Passed to conversion procedures. */
    1.54 +    LengthProc *lengthProc;	/* Function to compute length of
    1.55 +				 * null-terminated strings in this encoding.
    1.56 +				 * If nullSize is 1, this is strlen; if
    1.57 +				 * nullSize is 2, this is a function that
    1.58 +				 * returns the number of bytes in a 0x0000
    1.59 +				 * terminated string. */
    1.60 +    int refCount;		/* Number of uses of this structure. */
    1.61 +    Tcl_HashEntry *hPtr;	/* Hash table entry that owns this encoding. */
    1.62 +} Encoding;
    1.63 +
    1.64 +/*
    1.65 + * The following structure is the clientData for a dynamically-loaded,
    1.66 + * table-driven encoding created by LoadTableEncoding().  It maps between
    1.67 + * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only)
    1.68 + * encoding.
    1.69 + */
    1.70 +
    1.71 +typedef struct TableEncodingData {
    1.72 +    int fallback;		/* Character (in this encoding) to
    1.73 +				 * substitute when this encoding cannot
    1.74 +				 * represent a UTF-8 character. */
    1.75 +    char prefixBytes[256];	/* If a byte in the input stream is a lead
    1.76 +				 * byte for a 2-byte sequence, the
    1.77 +				 * corresponding entry in this array is 1,
    1.78 +				 * otherwise it is 0. */
    1.79 +    unsigned short **toUnicode;	/* Two dimensional sparse matrix to map
    1.80 +				 * characters from the encoding to Unicode.
    1.81 +				 * Each element of the toUnicode array points
    1.82 +				 * to an array of 256 shorts.  If there is no
    1.83 +				 * corresponding character in Unicode, the
    1.84 +				 * value in the matrix is 0x0000.  malloc'd. */
    1.85 +    unsigned short **fromUnicode;
    1.86 +				/* Two dimensional sparse matrix to map
    1.87 +				 * characters from Unicode to the encoding.
    1.88 +				 * Each element of the fromUnicode array
    1.89 +				 * points to an array of 256 shorts.  If there
    1.90 +				 * is no corresponding character the encoding,
    1.91 +				 * the value in the matrix is 0x0000.
    1.92 +				 * malloc'd. */
    1.93 +} TableEncodingData;
    1.94 +
    1.95 +/*
    1.96 + * The following structures is the clientData for a dynamically-loaded,
    1.97 + * escape-driven encoding that is itself comprised of other simpler
    1.98 + * encodings.  An example is "iso-2022-jp", which uses escape sequences to
    1.99 + * switch between ascii, jis0208, jis0212, gb2312, and ksc5601.  Note that
   1.100 + * "escape-driven" does not necessarily mean that the ESCAPE character is
   1.101 + * the character used for switching character sets.
   1.102 + */
   1.103 +
   1.104 +typedef struct EscapeSubTable {
   1.105 +    unsigned int sequenceLen;	/* Length of following string. */
   1.106 +    char sequence[16];		/* Escape code that marks this encoding. */
   1.107 +    char name[32];		/* Name for encoding. */
   1.108 +    Encoding *encodingPtr;	/* Encoding loaded using above name, or NULL
   1.109 +				 * if this sub-encoding has not been needed
   1.110 +				 * yet. */
   1.111 +} EscapeSubTable;
   1.112 +
   1.113 +typedef struct EscapeEncodingData {
   1.114 +    int fallback;		/* Character (in this encoding) to
   1.115 +				 * substitute when this encoding cannot
   1.116 +				 * represent a UTF-8 character. */
   1.117 +    unsigned int initLen;	/* Length of following string. */
   1.118 +    char init[16];		/* String to emit or expect before first char
   1.119 +				 * in conversion. */
   1.120 +    unsigned int finalLen;	/* Length of following string. */
   1.121 +    char final[16];		/* String to emit or expect after last char
   1.122 +				 * in conversion. */
   1.123 +    char prefixBytes[256];	/* If a byte in the input stream is the 
   1.124 +				 * first character of one of the escape 
   1.125 +				 * sequences in the following array, the 
   1.126 +				 * corresponding entry in this array is 1,
   1.127 +				 * otherwise it is 0. */
   1.128 +    int numSubTables;		/* Length of following array. */
   1.129 +    EscapeSubTable subTables[1];/* Information about each EscapeSubTable
   1.130 +				 * used by this encoding type.  The actual 
   1.131 +				 * size will be as large as necessary to 
   1.132 +				 * hold all EscapeSubTables. */
   1.133 +} EscapeEncodingData;
   1.134 +
   1.135 +/*
   1.136 + * Constants used when loading an encoding file to identify the type of the
   1.137 + * file.
   1.138 + */
   1.139 +
   1.140 +#define ENCODING_SINGLEBYTE	0
   1.141 +#define ENCODING_DOUBLEBYTE	1
   1.142 +#define ENCODING_MULTIBYTE	2
   1.143 +#define ENCODING_ESCAPE		3
   1.144 +
   1.145 +#if !defined(__SYMBIAN32__) || !defined(__WINSCW__)
   1.146 +/*
   1.147 + * Initialize the default encoding directory.  If this variable contains
   1.148 + * a non NULL value, it will be the first path used to locate the
   1.149 + * system encoding files.
   1.150 + */
   1.151 +
   1.152 +char *tclDefaultEncodingDir = NULL;
   1.153 +
   1.154 +static int encodingsInitialized  = 0;
   1.155 +
   1.156 +/*
   1.157 + * Hash table that keeps track of all loaded Encodings.  Keys are
   1.158 + * the string names that represent the encoding, values are (Encoding *).
   1.159 + */
   1.160 + 
   1.161 +static Tcl_HashTable encodingTable;
   1.162 +TCL_DECLARE_MUTEX(encodingMutex)
   1.163 +
   1.164 +/*
   1.165 + * The following are used to hold the default and current system encodings.  
   1.166 + * If NULL is passed to one of the conversion routines, the current setting 
   1.167 + * of the system encoding will be used to perform the conversion.
   1.168 + */
   1.169 +
   1.170 +static Tcl_Encoding defaultEncoding;
   1.171 +static Tcl_Encoding systemEncoding;
   1.172 +#endif
   1.173 +/*
   1.174 + * The following variable is used in the sparse matrix code for a
   1.175 + * TableEncoding to represent a page in the table that has no entries.
   1.176 + */
   1.177 +
   1.178 +static unsigned short emptyPage[256];
   1.179 +
   1.180 +/*
   1.181 + * Procedures used only in this module.
   1.182 + */
   1.183 +
   1.184 +static int		BinaryProc _ANSI_ARGS_((ClientData clientData,
   1.185 +			    CONST char *src, int srcLen, int flags,
   1.186 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.187 +			    int *srcReadPtr, int *dstWrotePtr,
   1.188 +			    int *dstCharsPtr));
   1.189 +static void		DupEncodingIntRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
   1.190 +			    Tcl_Obj *dupPtr));
   1.191 +static void		EscapeFreeProc _ANSI_ARGS_((ClientData clientData));
   1.192 +static int		EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData,
   1.193 +			    CONST char *src, int srcLen, int flags,
   1.194 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.195 +			    int *srcReadPtr, int *dstWrotePtr,
   1.196 +			    int *dstCharsPtr));
   1.197 +static int		EscapeToUtfProc _ANSI_ARGS_((ClientData clientData,
   1.198 +			    CONST char *src, int srcLen, int flags,
   1.199 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.200 +			    int *srcReadPtr, int *dstWrotePtr,
   1.201 +			    int *dstCharsPtr));
   1.202 +static void		FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding));
   1.203 +static void		FreeEncodingIntRep _ANSI_ARGS_((Tcl_Obj *objPtr));
   1.204 +static Encoding *	GetTableEncoding _ANSI_ARGS_((
   1.205 +			    EscapeEncodingData *dataPtr, int state));
   1.206 +static Tcl_Encoding	LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp,
   1.207 +			    CONST char *name));
   1.208 +static Tcl_Encoding	LoadTableEncoding _ANSI_ARGS_((Tcl_Interp *interp,
   1.209 +			    CONST char *name, int type, Tcl_Channel chan));
   1.210 +static Tcl_Encoding	LoadEscapeEncoding _ANSI_ARGS_((CONST char *name, 
   1.211 +			    Tcl_Channel chan));
   1.212 +static Tcl_Channel	OpenEncodingFile _ANSI_ARGS_((CONST char *dir,
   1.213 +			    CONST char *name));
   1.214 +static void		TableFreeProc _ANSI_ARGS_((ClientData clientData));
   1.215 +static int		TableFromUtfProc _ANSI_ARGS_((ClientData clientData,
   1.216 +			    CONST char *src, int srcLen, int flags,
   1.217 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.218 +			    int *srcReadPtr, int *dstWrotePtr,
   1.219 +			    int *dstCharsPtr));
   1.220 +static int		TableToUtfProc _ANSI_ARGS_((ClientData clientData,
   1.221 +			    CONST char *src, int srcLen, int flags,
   1.222 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.223 +			    int *srcReadPtr, int *dstWrotePtr,
   1.224 +			    int *dstCharsPtr));
   1.225 +static size_t		unilen _ANSI_ARGS_((CONST char *src));
   1.226 +static int		UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData,
   1.227 +			    CONST char *src, int srcLen, int flags,
   1.228 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.229 +			    int *srcReadPtr, int *dstWrotePtr,
   1.230 +			    int *dstCharsPtr));
   1.231 +static int		UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData,
   1.232 +			    CONST char *src, int srcLen, int flags,
   1.233 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.234 +			    int *srcReadPtr, int *dstWrotePtr,
   1.235 +			    int *dstCharsPtr));
   1.236 +static int		UtfToUtfProc _ANSI_ARGS_((ClientData clientData,
   1.237 +			    CONST char *src, int srcLen, int flags,
   1.238 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.239 +			    int *srcReadPtr, int *dstWrotePtr,
   1.240 +			    int *dstCharsPtr, int pureNullMode));
   1.241 +static int		UtfIntToUtfExtProc _ANSI_ARGS_((ClientData clientData,
   1.242 +			    CONST char *src, int srcLen, int flags,
   1.243 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.244 +			    int *srcReadPtr, int *dstWrotePtr,
   1.245 +			    int *dstCharsPtr));
   1.246 +static int		UtfExtToUtfIntProc _ANSI_ARGS_((ClientData clientData,
   1.247 +			    CONST char *src, int srcLen, int flags,
   1.248 +			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   1.249 +			    int *srcReadPtr, int *dstWrotePtr,
   1.250 +			    int *dstCharsPtr));
   1.251 +static int		TclFindEncodings _ANSI_ARGS_((CONST char *argv0));
   1.252 +
   1.253 +/*
   1.254 + * A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep.
   1.255 + * This should help the lifetime of encodings be more useful.  
   1.256 + * See concerns raised in [Bug 1077262].
   1.257 + */
   1.258 +
   1.259 +static Tcl_ObjType EncodingType = {
   1.260 +    "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
   1.261 +};
   1.262 +
   1.263 +
   1.264 +/*
   1.265 + *----------------------------------------------------------------------
   1.266 + *
   1.267 + * TclGetEncodingFromObj --
   1.268 + *
   1.269 + *      Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr),
   1.270 + *      if possible, and returns TCL_OK.  If no such encoding exists,
   1.271 + *      TCL_ERROR is returned, and if interp is non-NULL, an error message
   1.272 + *      is written there.
   1.273 + *
   1.274 + * Results:
   1.275 + *      Standard Tcl return code.
   1.276 + *
   1.277 + * Side effects:
   1.278 + * 	Caches the Tcl_Encoding value as the internal rep of (*objPtr).
   1.279 + *
   1.280 + *----------------------------------------------------------------------
   1.281 + */
   1.282 +int 
   1.283 +TclGetEncodingFromObj(interp, objPtr, encodingPtr)
   1.284 +    Tcl_Interp *interp;
   1.285 +    Tcl_Obj *objPtr;
   1.286 +    Tcl_Encoding *encodingPtr;
   1.287 +{
   1.288 +    CONST char *name = Tcl_GetString(objPtr);
   1.289 +    if (objPtr->typePtr != &EncodingType) {
   1.290 +	Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
   1.291 +
   1.292 +	if (encoding == NULL) {
   1.293 +	    return TCL_ERROR;
   1.294 +	}
   1.295 +	if (objPtr->typePtr && objPtr->typePtr->freeIntRepProc) {
   1.296 +	    objPtr->typePtr->freeIntRepProc(objPtr);
   1.297 +	}
   1.298 +	objPtr->internalRep.otherValuePtr = (VOID *) encoding;
   1.299 +	objPtr->typePtr = &EncodingType;
   1.300 +    }
   1.301 +    *encodingPtr = Tcl_GetEncoding(NULL, name);
   1.302 +    return TCL_OK;
   1.303 +}
   1.304 +
   1.305 +/*
   1.306 + *----------------------------------------------------------------------
   1.307 + *
   1.308 + * FreeEncodingIntRep --
   1.309 + *
   1.310 + *      The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType.
   1.311 + *
   1.312 + *----------------------------------------------------------------------
   1.313 + */
   1.314 +static void
   1.315 +FreeEncodingIntRep(objPtr)
   1.316 +    Tcl_Obj *objPtr;
   1.317 +{
   1.318 +    Tcl_FreeEncoding((Tcl_Encoding) objPtr->internalRep.otherValuePtr);
   1.319 +}
   1.320 +
   1.321 +/*
   1.322 + *----------------------------------------------------------------------
   1.323 + *
   1.324 + * DupEncodingIntRep --
   1.325 + *
   1.326 + *      The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType.
   1.327 + *
   1.328 + *----------------------------------------------------------------------
   1.329 + */
   1.330 +static void
   1.331 +DupEncodingIntRep(srcPtr, dupPtr)
   1.332 +    Tcl_Obj *srcPtr;
   1.333 +    Tcl_Obj *dupPtr;
   1.334 +{
   1.335 +    dupPtr->internalRep.otherValuePtr = (VOID *)
   1.336 +	    Tcl_GetEncoding(NULL, srcPtr->bytes);
   1.337 +}
   1.338 +
   1.339 +/*
   1.340 + *---------------------------------------------------------------------------
   1.341 + *
   1.342 + * TclInitEncodingSubsystem --
   1.343 + *
   1.344 + *	Initialize all resources used by this subsystem on a per-process
   1.345 + *	basis.  
   1.346 + *
   1.347 + * Results:
   1.348 + *	None.
   1.349 + *
   1.350 + * Side effects:
   1.351 + *	Depends on the memory, object, and IO subsystems.
   1.352 + *
   1.353 + *---------------------------------------------------------------------------
   1.354 + */
   1.355 +
   1.356 +void
   1.357 +TclInitEncodingSubsystem()
   1.358 +{
   1.359 +    Tcl_EncodingType type;
   1.360 +
   1.361 +    Tcl_MutexLock(&encodingMutex);
   1.362 +    Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
   1.363 +    Tcl_MutexUnlock(&encodingMutex);
   1.364 +    
   1.365 +    /*
   1.366 +     * Create a few initial encodings.  Note that the UTF-8 to UTF-8 
   1.367 +     * translation is not a no-op, because it will turn a stream of
   1.368 +     * improperly formed UTF-8 into a properly formed stream.
   1.369 +     */
   1.370 +
   1.371 +    type.encodingName	= "identity";
   1.372 +    type.toUtfProc	= BinaryProc;
   1.373 +    type.fromUtfProc	= BinaryProc;
   1.374 +    type.freeProc	= NULL;
   1.375 +    type.nullSize	= 1;
   1.376 +    type.clientData	= NULL;
   1.377 +
   1.378 +    defaultEncoding	= Tcl_CreateEncoding(&type);
   1.379 +    systemEncoding	= Tcl_GetEncoding(NULL, type.encodingName);
   1.380 +
   1.381 +    type.encodingName	= "utf-8";
   1.382 +    type.toUtfProc	= UtfExtToUtfIntProc;
   1.383 +    type.fromUtfProc	= UtfIntToUtfExtProc;
   1.384 +    type.freeProc	= NULL;
   1.385 +    type.nullSize	= 1;
   1.386 +    type.clientData	= NULL;
   1.387 +    Tcl_CreateEncoding(&type);
   1.388 +
   1.389 +    type.encodingName   = "unicode";
   1.390 +    type.toUtfProc	= UnicodeToUtfProc;
   1.391 +    type.fromUtfProc    = UtfToUnicodeProc;
   1.392 +    type.freeProc	= NULL;
   1.393 +    type.nullSize	= 2;
   1.394 +    type.clientData	= NULL;
   1.395 +    Tcl_CreateEncoding(&type);
   1.396 +}
   1.397 +
   1.398 +
   1.399 +/*
   1.400 + *----------------------------------------------------------------------
   1.401 + *
   1.402 + * TclFinalizeEncodingSubsystem --
   1.403 + *
   1.404 + *	Release the state associated with the encoding subsystem.
   1.405 + *
   1.406 + * Results:
   1.407 + *	None.
   1.408 + *
   1.409 + * Side effects:
   1.410 + *	Frees all of the encodings.
   1.411 + *
   1.412 + *----------------------------------------------------------------------
   1.413 + */
   1.414 +
   1.415 +void
   1.416 +TclFinalizeEncodingSubsystem()
   1.417 +{
   1.418 +    Tcl_HashSearch search;
   1.419 +    Tcl_HashEntry *hPtr;
   1.420 +
   1.421 +    Tcl_MutexLock(&encodingMutex);
   1.422 +    encodingsInitialized  = 0;
   1.423 +    FreeEncoding(systemEncoding);
   1.424 +    hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
   1.425 +    while (hPtr != NULL) {
   1.426 +	/*
   1.427 +	 * Call FreeEncoding instead of doing it directly to handle refcounts
   1.428 +	 * like escape encodings use.  [Bug #524674]
   1.429 +	 * Make sure to call Tcl_FirstHashEntry repeatedly so that all
   1.430 +	 * encodings are eventually cleaned up.
   1.431 +	 */
   1.432 +	FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr));
   1.433 +	hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
   1.434 +    }
   1.435 +    Tcl_DeleteHashTable(&encodingTable);
   1.436 +    Tcl_MutexUnlock(&encodingMutex);
   1.437 +}
   1.438 +
   1.439 +/*
   1.440 + *-------------------------------------------------------------------------
   1.441 + *
   1.442 + * Tcl_GetDefaultEncodingDir --
   1.443 + *
   1.444 + *
   1.445 + * Results:
   1.446 + *
   1.447 + * Side effects:
   1.448 + *
   1.449 + *-------------------------------------------------------------------------
   1.450 + */
   1.451 +
   1.452 +EXPORT_C CONST char *
   1.453 +Tcl_GetDefaultEncodingDir()
   1.454 +{
   1.455 +    return tclDefaultEncodingDir;
   1.456 +}
   1.457 +
   1.458 +/*
   1.459 + *-------------------------------------------------------------------------
   1.460 + *
   1.461 + * Tcl_SetDefaultEncodingDir --
   1.462 + *
   1.463 + *
   1.464 + * Results:
   1.465 + *
   1.466 + * Side effects:
   1.467 + *
   1.468 + *-------------------------------------------------------------------------
   1.469 + */
   1.470 +
   1.471 +EXPORT_C void
   1.472 +Tcl_SetDefaultEncodingDir(path)
   1.473 +    CONST char *path;
   1.474 +{
   1.475 +    tclDefaultEncodingDir = (char *)ckalloc((unsigned) strlen(path) + 1);
   1.476 +    strcpy(tclDefaultEncodingDir, path);
   1.477 +}
   1.478 +
   1.479 +/*
   1.480 + *-------------------------------------------------------------------------
   1.481 + *
   1.482 + * Tcl_GetEncoding --
   1.483 + *
   1.484 + *	Given the name of a encoding, find the corresponding Tcl_Encoding
   1.485 + *	token.  If the encoding did not already exist, Tcl attempts to
   1.486 + *	dynamically load an encoding by that name.
   1.487 + *
   1.488 + * Results:
   1.489 + *	Returns a token that represents the encoding.  If the name didn't
   1.490 + *	refer to any known or loadable encoding, NULL is returned.  If
   1.491 + *	NULL was returned, an error message is left in interp's result
   1.492 + *	object, unless interp was NULL.
   1.493 + *
   1.494 + * Side effects:
   1.495 + *	The new encoding type is entered into a table visible to all
   1.496 + *	interpreters, keyed off the encoding's name.  For each call to
   1.497 + *	this procedure, there should eventually be a call to
   1.498 + *	Tcl_FreeEncoding, so that the database can be cleaned up when
   1.499 + *	encodings aren't needed anymore.
   1.500 + *
   1.501 + *-------------------------------------------------------------------------
   1.502 + */
   1.503 +
   1.504 +EXPORT_C Tcl_Encoding
   1.505 +Tcl_GetEncoding(interp, name)
   1.506 +    Tcl_Interp *interp;		/* Interp for error reporting, if not NULL. */
   1.507 +    CONST char *name;		/* The name of the desired encoding. */
   1.508 +{
   1.509 +    Tcl_HashEntry *hPtr;
   1.510 +    Encoding *encodingPtr;
   1.511 +
   1.512 +    Tcl_MutexLock(&encodingMutex);
   1.513 +    if (name == NULL) {
   1.514 +	encodingPtr = (Encoding *) systemEncoding;
   1.515 +	encodingPtr->refCount++;
   1.516 +	Tcl_MutexUnlock(&encodingMutex);
   1.517 +	return systemEncoding;
   1.518 +    }
   1.519 +
   1.520 +    hPtr = Tcl_FindHashEntry(&encodingTable, name);
   1.521 +    if (hPtr != NULL) {
   1.522 +	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
   1.523 +	encodingPtr->refCount++;
   1.524 +	Tcl_MutexUnlock(&encodingMutex);
   1.525 +	return (Tcl_Encoding) encodingPtr;
   1.526 +    }
   1.527 +    Tcl_MutexUnlock(&encodingMutex);
   1.528 +    return LoadEncodingFile(interp, name);
   1.529 +}
   1.530 +
   1.531 +/*
   1.532 + *---------------------------------------------------------------------------
   1.533 + *
   1.534 + * Tcl_FreeEncoding --
   1.535 + *
   1.536 + *	This procedure is called to release an encoding allocated by
   1.537 + *	Tcl_CreateEncoding() or Tcl_GetEncoding().
   1.538 + *
   1.539 + * Results:
   1.540 + *	None.
   1.541 + *
   1.542 + * Side effects:
   1.543 + *	The reference count associated with the encoding is decremented
   1.544 + *	and the encoding may be deleted if nothing is using it anymore.
   1.545 + *
   1.546 + *---------------------------------------------------------------------------
   1.547 + */
   1.548 +
   1.549 +EXPORT_C void
   1.550 +Tcl_FreeEncoding(encoding)
   1.551 +    Tcl_Encoding encoding;
   1.552 +{
   1.553 +    Tcl_MutexLock(&encodingMutex);
   1.554 +    FreeEncoding(encoding);
   1.555 +    Tcl_MutexUnlock(&encodingMutex);
   1.556 +}
   1.557 +
   1.558 +/*
   1.559 + *----------------------------------------------------------------------
   1.560 + *
   1.561 + * FreeEncoding --
   1.562 + *
   1.563 + *	This procedure is called to release an encoding by procedures
   1.564 + *	that already have the encodingMutex.
   1.565 + *
   1.566 + * Results:
   1.567 + *	None.
   1.568 + *
   1.569 + * Side effects:
   1.570 + *	The reference count associated with the encoding is decremented
   1.571 + *	and the encoding may be deleted if nothing is using it anymore.
   1.572 + *
   1.573 + *----------------------------------------------------------------------
   1.574 + */
   1.575 +
   1.576 +static void
   1.577 +FreeEncoding(encoding)
   1.578 +    Tcl_Encoding encoding;
   1.579 +{
   1.580 +    Encoding *encodingPtr;
   1.581 +    
   1.582 +    encodingPtr = (Encoding *) encoding;
   1.583 +    if (encodingPtr == NULL) {
   1.584 +	return;
   1.585 +    }
   1.586 +    encodingPtr->refCount--;
   1.587 +    if (encodingPtr->refCount == 0) {
   1.588 +	if (encodingPtr->freeProc != NULL) {
   1.589 +	    (*encodingPtr->freeProc)(encodingPtr->clientData);
   1.590 +	}
   1.591 +	if (encodingPtr->hPtr != NULL) {
   1.592 +	    Tcl_DeleteHashEntry(encodingPtr->hPtr);
   1.593 +	}
   1.594 +	ckfree((char *) encodingPtr->name);
   1.595 +	ckfree((char *) encodingPtr);
   1.596 +    }
   1.597 +}
   1.598 +
   1.599 +/*
   1.600 + *-------------------------------------------------------------------------
   1.601 + *
   1.602 + * Tcl_GetEncodingName --
   1.603 + *
   1.604 + *	Given an encoding, return the name that was used to constuct
   1.605 + *	the encoding.
   1.606 + *
   1.607 + * Results:
   1.608 + *	The name of the encoding.
   1.609 + *
   1.610 + * Side effects:
   1.611 + *	None.
   1.612 + *
   1.613 + *---------------------------------------------------------------------------
   1.614 + */
   1.615 +
   1.616 +EXPORT_C CONST char *
   1.617 +Tcl_GetEncodingName(encoding)
   1.618 +    Tcl_Encoding encoding;	/* The encoding whose name to fetch. */
   1.619 +{
   1.620 +    Encoding *encodingPtr;
   1.621 +
   1.622 +    if (encoding == NULL) {
   1.623 +	encoding = systemEncoding;
   1.624 +    }
   1.625 +    encodingPtr = (Encoding *) encoding;
   1.626 +    return encodingPtr->name;
   1.627 +}
   1.628 +
   1.629 +/*
   1.630 + *-------------------------------------------------------------------------
   1.631 + *
   1.632 + * Tcl_GetEncodingNames --
   1.633 + *
   1.634 + *	Get the list of all known encodings, including the ones stored
   1.635 + *	as files on disk in the encoding path.
   1.636 + *
   1.637 + * Results:
   1.638 + *	Modifies interp's result object to hold a list of all the available
   1.639 + *	encodings.
   1.640 + *
   1.641 + * Side effects:
   1.642 + *	None.
   1.643 + *
   1.644 + *-------------------------------------------------------------------------
   1.645 + */
   1.646 +
   1.647 +EXPORT_C void
   1.648 +Tcl_GetEncodingNames(interp)
   1.649 +    Tcl_Interp *interp;		/* Interp to hold result. */
   1.650 +{
   1.651 +    Tcl_HashSearch search;
   1.652 +    Tcl_HashEntry *hPtr;
   1.653 +    Tcl_Obj *pathPtr, *resultPtr;
   1.654 +    int dummy;
   1.655 +
   1.656 +    Tcl_HashTable table;
   1.657 +
   1.658 +    Tcl_MutexLock(&encodingMutex);
   1.659 +    Tcl_InitHashTable(&table, TCL_STRING_KEYS);
   1.660 +    hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
   1.661 +    while (hPtr != NULL) {
   1.662 +	Encoding *encodingPtr;
   1.663 +	
   1.664 +	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
   1.665 +	Tcl_CreateHashEntry(&table, encodingPtr->name, &dummy);
   1.666 +	hPtr = Tcl_NextHashEntry(&search);
   1.667 +    }
   1.668 +    Tcl_MutexUnlock(&encodingMutex);
   1.669 +
   1.670 +    pathPtr = TclGetLibraryPath();
   1.671 +    if (pathPtr != NULL) {
   1.672 +	int i, objc;
   1.673 +	Tcl_Obj **objv;
   1.674 +	char globArgString[10];
   1.675 +	Tcl_Obj* encodingObj = Tcl_NewStringObj("encoding",-1);
   1.676 +	Tcl_IncrRefCount(encodingObj);
   1.677 +	
   1.678 +	objc = 0;
   1.679 +	Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
   1.680 +
   1.681 +	for (i = 0; i < objc; i++) {
   1.682 +	    Tcl_Obj *searchIn;
   1.683 +	    
   1.684 +	    /* 
   1.685 +	     * Construct the path from the element of pathPtr,
   1.686 +	     * joined with 'encoding'.
   1.687 +	     */
   1.688 +	    searchIn = Tcl_FSJoinToPath(objv[i],1,&encodingObj);
   1.689 +	    Tcl_IncrRefCount(searchIn);
   1.690 +	    Tcl_ResetResult(interp);
   1.691 +
   1.692 +	    /*
   1.693 +	     * TclGlob() changes the contents of globArgString, which causes
   1.694 +	     * a segfault if we pass in a pointer to non-writeable memory.
   1.695 +	     * TclGlob() puts its results directly into interp.
   1.696 +	     */
   1.697 +
   1.698 +	    strcpy(globArgString, "*.enc");
   1.699 +	    /* 
   1.700 +	     * The GLOBMODE_TAILS flag returns just the tail of each file
   1.701 +	     * which is the encoding name with a .enc extension 
   1.702 +	     */
   1.703 +	    if ((TclGlob(interp, globArgString, searchIn, 
   1.704 +			 TCL_GLOBMODE_TAILS, NULL) == TCL_OK)) {
   1.705 +		int objc2 = 0;
   1.706 +		Tcl_Obj **objv2;
   1.707 +		int j;
   1.708 +
   1.709 +		Tcl_ListObjGetElements(NULL, Tcl_GetObjResult(interp), &objc2,
   1.710 +			&objv2);
   1.711 +
   1.712 +		for (j = 0; j < objc2; j++) {
   1.713 +		    int length;
   1.714 +		    char *string;
   1.715 +		    string = Tcl_GetStringFromObj(objv2[j], &length);
   1.716 +		    length -= 4;
   1.717 +		    if (length > 0) {
   1.718 +			string[length] = '\0';
   1.719 +			Tcl_CreateHashEntry(&table, string, &dummy);
   1.720 +			string[length] = '.';
   1.721 +		    }
   1.722 +		}
   1.723 +	    }
   1.724 +	    Tcl_DecrRefCount(searchIn);
   1.725 +	}
   1.726 +	Tcl_DecrRefCount(encodingObj);
   1.727 +    }
   1.728 +
   1.729 +    /*
   1.730 +     * Clear any values placed in the result by globbing.
   1.731 +     */
   1.732 +
   1.733 +    Tcl_ResetResult(interp);
   1.734 +    resultPtr = Tcl_GetObjResult(interp);
   1.735 +
   1.736 +    hPtr = Tcl_FirstHashEntry(&table, &search);
   1.737 +    while (hPtr != NULL) {
   1.738 +	Tcl_Obj *strPtr;
   1.739 +
   1.740 +	strPtr = Tcl_NewStringObj(Tcl_GetHashKey(&table, hPtr), -1);
   1.741 +	Tcl_ListObjAppendElement(NULL, resultPtr, strPtr);
   1.742 +	hPtr = Tcl_NextHashEntry(&search);
   1.743 +    }
   1.744 +    Tcl_DeleteHashTable(&table);
   1.745 +}
   1.746 +
   1.747 +/*
   1.748 + *------------------------------------------------------------------------
   1.749 + *
   1.750 + * Tcl_SetSystemEncoding --
   1.751 + *
   1.752 + *	Sets the default encoding that should be used whenever the user
   1.753 + *	passes a NULL value in to one of the conversion routines.
   1.754 + *	If the supplied name is NULL, the system encoding is reset to the
   1.755 + *	default system encoding.
   1.756 + *
   1.757 + * Results:
   1.758 + *	The return value is TCL_OK if the system encoding was successfully
   1.759 + *	set to the encoding specified by name, TCL_ERROR otherwise.  If
   1.760 + *	TCL_ERROR is returned, an error message is left in interp's result
   1.761 + *	object, unless interp was NULL.
   1.762 + *
   1.763 + * Side effects:
   1.764 + *	The reference count of the new system encoding is incremented.
   1.765 + *	The reference count of the old system encoding is decremented and 
   1.766 + *	it may be freed.  
   1.767 + *
   1.768 + *------------------------------------------------------------------------
   1.769 + */
   1.770 +
   1.771 +EXPORT_C int
   1.772 +Tcl_SetSystemEncoding(interp, name)
   1.773 +    Tcl_Interp *interp;		/* Interp for error reporting, if not NULL. */
   1.774 +    CONST char *name;		/* The name of the desired encoding, or NULL
   1.775 +				 * to reset to default encoding. */
   1.776 +{
   1.777 +    Tcl_Encoding encoding;
   1.778 +    Encoding *encodingPtr;
   1.779 +
   1.780 +    if (name == NULL) {
   1.781 +	Tcl_MutexLock(&encodingMutex);
   1.782 +	encoding = defaultEncoding;
   1.783 +	encodingPtr = (Encoding *) encoding;
   1.784 +	encodingPtr->refCount++;
   1.785 +	Tcl_MutexUnlock(&encodingMutex);
   1.786 +    } else {
   1.787 +	encoding = Tcl_GetEncoding(interp, name);
   1.788 +	if (encoding == NULL) {
   1.789 +	    return TCL_ERROR;
   1.790 +	}
   1.791 +    }
   1.792 +
   1.793 +    Tcl_MutexLock(&encodingMutex);
   1.794 +    FreeEncoding(systemEncoding);
   1.795 +    systemEncoding = encoding;
   1.796 +    Tcl_MutexUnlock(&encodingMutex);
   1.797 +
   1.798 +    return TCL_OK;
   1.799 +}
   1.800 +
   1.801 +/*
   1.802 + *---------------------------------------------------------------------------
   1.803 + *
   1.804 + * Tcl_CreateEncoding --
   1.805 + *
   1.806 + *	This procedure is called to define a new encoding and the procedures
   1.807 + *	that are used to convert between the specified encoding and Unicode.  
   1.808 + *
   1.809 + * Results:
   1.810 + *	Returns a token that represents the encoding.  If an encoding with
   1.811 + *	the same name already existed, the old encoding token remains
   1.812 + *	valid and continues to behave as it used to, and will eventually
   1.813 + *	be garbage collected when the last reference to it goes away.  Any
   1.814 + *	subsequent calls to Tcl_GetEncoding with the specified name will
   1.815 + *	retrieve the most recent encoding token.
   1.816 + *
   1.817 + * Side effects:
   1.818 + *	The new encoding type is entered into a table visible to all
   1.819 + *	interpreters, keyed off the encoding's name.  For each call to
   1.820 + *	this procedure, there should eventually be a call to
   1.821 + *	Tcl_FreeEncoding, so that the database can be cleaned up when
   1.822 + *	encodings aren't needed anymore.
   1.823 + *
   1.824 + *---------------------------------------------------------------------------
   1.825 + */ 
   1.826 +
   1.827 +EXPORT_C Tcl_Encoding
   1.828 +Tcl_CreateEncoding(typePtr)
   1.829 +    Tcl_EncodingType *typePtr;	/* The encoding type. */
   1.830 +{
   1.831 +    Tcl_HashEntry *hPtr;
   1.832 +    int new;
   1.833 +    Encoding *encodingPtr;
   1.834 +    char *name;
   1.835 +
   1.836 +    Tcl_MutexLock(&encodingMutex);
   1.837 +    hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &new);
   1.838 +    if (new == 0) {
   1.839 +	/*
   1.840 +	 * Remove old encoding from hash table, but don't delete it until
   1.841 +	 * last reference goes away.
   1.842 +	 */
   1.843 +	 
   1.844 +	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
   1.845 +	encodingPtr->hPtr = NULL;
   1.846 +    }
   1.847 +
   1.848 +    name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1);
   1.849 +    
   1.850 +    encodingPtr = (Encoding *) ckalloc(sizeof(Encoding));
   1.851 +    encodingPtr->name		= strcpy(name, typePtr->encodingName);
   1.852 +    encodingPtr->toUtfProc	= typePtr->toUtfProc;
   1.853 +    encodingPtr->fromUtfProc	= typePtr->fromUtfProc;
   1.854 +    encodingPtr->freeProc	= typePtr->freeProc;
   1.855 +    encodingPtr->nullSize	= typePtr->nullSize;
   1.856 +    encodingPtr->clientData	= typePtr->clientData;
   1.857 +    if (typePtr->nullSize == 1) {
   1.858 +	encodingPtr->lengthProc = (LengthProc *) strlen;
   1.859 +    } else {
   1.860 +	encodingPtr->lengthProc = (LengthProc *) unilen;
   1.861 +    }
   1.862 +    encodingPtr->refCount	= 1;
   1.863 +    encodingPtr->hPtr		= hPtr;
   1.864 +    Tcl_SetHashValue(hPtr, encodingPtr);
   1.865 +
   1.866 +    Tcl_MutexUnlock(&encodingMutex);
   1.867 +
   1.868 +    return (Tcl_Encoding) encodingPtr;
   1.869 +}
   1.870 +
   1.871 +/*
   1.872 + *-------------------------------------------------------------------------
   1.873 + *
   1.874 + * Tcl_ExternalToUtfDString --
   1.875 + *
   1.876 + *	Convert a source buffer from the specified encoding into UTF-8.
   1.877 + *	If any of the bytes in the source buffer are invalid or cannot
   1.878 + *	be represented in the target encoding, a default fallback
   1.879 + *	character will be substituted.
   1.880 + *
   1.881 + * Results:
   1.882 + *	The converted bytes are stored in the DString, which is then NULL
   1.883 + *	terminated.  The return value is a pointer to the value stored 
   1.884 + *	in the DString.
   1.885 + *
   1.886 + * Side effects:
   1.887 + *	None.
   1.888 + *
   1.889 + *-------------------------------------------------------------------------
   1.890 + */
   1.891 +
   1.892 +EXPORT_C char * 
   1.893 +Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)
   1.894 +    Tcl_Encoding encoding;	/* The encoding for the source string, or
   1.895 +				 * NULL for the default system encoding. */
   1.896 +    CONST char *src;		/* Source string in specified encoding. */
   1.897 +    int srcLen;			/* Source string length in bytes, or < 0 for
   1.898 +				 * encoding-specific string length. */
   1.899 +    Tcl_DString *dstPtr;	/* Uninitialized or free DString in which 
   1.900 +				 * the converted string is stored. */
   1.901 +{
   1.902 +    char *dst;
   1.903 +    Tcl_EncodingState state;
   1.904 +    Encoding *encodingPtr;
   1.905 +    int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
   1.906 +
   1.907 +    Tcl_DStringInit(dstPtr);
   1.908 +    dst = Tcl_DStringValue(dstPtr);
   1.909 +    dstLen = dstPtr->spaceAvl - 1;
   1.910 +    
   1.911 +    if (encoding == NULL) {
   1.912 +	encoding = systemEncoding;
   1.913 +    }
   1.914 +    encodingPtr = (Encoding *) encoding;
   1.915 +
   1.916 +    if (src == NULL) {
   1.917 +	srcLen = 0;
   1.918 +    } else if (srcLen < 0) {
   1.919 +	srcLen = (*encodingPtr->lengthProc)(src);
   1.920 +    }
   1.921 +    flags = TCL_ENCODING_START | TCL_ENCODING_END;
   1.922 +    while (1) {
   1.923 +	result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src,
   1.924 +		srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
   1.925 +		&dstChars);
   1.926 +	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
   1.927 +	if (result != TCL_CONVERT_NOSPACE) {
   1.928 +	    Tcl_DStringSetLength(dstPtr, soFar);
   1.929 +	    return Tcl_DStringValue(dstPtr);
   1.930 +	}
   1.931 +	flags &= ~TCL_ENCODING_START;
   1.932 +	src += srcRead;
   1.933 +	srcLen -= srcRead;
   1.934 +	if (Tcl_DStringLength(dstPtr) == 0) {
   1.935 +	    Tcl_DStringSetLength(dstPtr, dstLen);
   1.936 +	}
   1.937 +	Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
   1.938 +	dst = Tcl_DStringValue(dstPtr) + soFar;
   1.939 +	dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
   1.940 +    }
   1.941 +}
   1.942 +
   1.943 +/*
   1.944 + *-------------------------------------------------------------------------
   1.945 + *
   1.946 + * Tcl_ExternalToUtf --
   1.947 + *
   1.948 + *	Convert a source buffer from the specified encoding into UTF-8.
   1.949 + *
   1.950 + * Results:
   1.951 + *	The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
   1.952 + *	TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
   1.953 + *	as documented in tcl.h.
   1.954 + *
   1.955 + * Side effects:
   1.956 + *	The converted bytes are stored in the output buffer.  
   1.957 + *
   1.958 + *-------------------------------------------------------------------------
   1.959 + */
   1.960 +
   1.961 +EXPORT_C int
   1.962 +Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
   1.963 +	dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
   1.964 +    Tcl_Interp *interp;		/* Interp for error return, if not NULL. */
   1.965 +    Tcl_Encoding encoding;	/* The encoding for the source string, or
   1.966 +				 * NULL for the default system encoding. */
   1.967 +    CONST char *src;		/* Source string in specified encoding. */
   1.968 +    int srcLen;			/* Source string length in bytes, or < 0 for
   1.969 +				 * encoding-specific string length. */
   1.970 +    int flags;			/* Conversion control flags. */
   1.971 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
   1.972 +				 * state information used during a piecewise
   1.973 +				 * conversion.  Contents of statePtr are
   1.974 +				 * initialized and/or reset by conversion
   1.975 +				 * routine under control of flags argument. */
   1.976 +    char *dst;			/* Output buffer in which converted string
   1.977 +				 * is stored. */
   1.978 +    int dstLen;			/* The maximum length of output buffer in
   1.979 +				 * bytes. */
   1.980 +    int *srcReadPtr;		/* Filled with the number of bytes from the
   1.981 +				 * source string that were converted.  This
   1.982 +				 * may be less than the original source length
   1.983 +				 * if there was a problem converting some
   1.984 +				 * source characters. */
   1.985 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
   1.986 +				 * stored in the output buffer as a result of
   1.987 +				 * the conversion. */
   1.988 +    int *dstCharsPtr;		/* Filled with the number of characters that
   1.989 +				 * correspond to the bytes stored in the
   1.990 +				 * output buffer. */
   1.991 +{
   1.992 +    Encoding *encodingPtr;
   1.993 +    int result, srcRead, dstWrote, dstChars;
   1.994 +    Tcl_EncodingState state;
   1.995 +    
   1.996 +    if (encoding == NULL) {
   1.997 +	encoding = systemEncoding;
   1.998 +    }
   1.999 +    encodingPtr = (Encoding *) encoding;
  1.1000 +
  1.1001 +    if (src == NULL) {
  1.1002 +	srcLen = 0;
  1.1003 +    } else if (srcLen < 0) {
  1.1004 +	srcLen = (*encodingPtr->lengthProc)(src);
  1.1005 +    }
  1.1006 +    if (statePtr == NULL) {
  1.1007 +	flags |= TCL_ENCODING_START | TCL_ENCODING_END;
  1.1008 +	statePtr = &state;
  1.1009 +    }
  1.1010 +    if (srcReadPtr == NULL) {
  1.1011 +	srcReadPtr = &srcRead;
  1.1012 +    }
  1.1013 +    if (dstWrotePtr == NULL) {
  1.1014 +	dstWrotePtr = &dstWrote;
  1.1015 +    }
  1.1016 +    if (dstCharsPtr == NULL) {
  1.1017 +	dstCharsPtr = &dstChars;
  1.1018 +    }
  1.1019 +
  1.1020 +    /*
  1.1021 +     * If there are any null characters in the middle of the buffer, they will
  1.1022 +     * converted to the UTF-8 null character (\xC080).  To get the actual 
  1.1023 +     * \0 at the end of the destination buffer, we need to append it manually.
  1.1024 +     */
  1.1025 +
  1.1026 +    dstLen--;
  1.1027 +    result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen,
  1.1028 +	    flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
  1.1029 +	    dstCharsPtr);
  1.1030 +    dst[*dstWrotePtr] = '\0';
  1.1031 +    return result;
  1.1032 +}
  1.1033 +
  1.1034 +/*
  1.1035 + *-------------------------------------------------------------------------
  1.1036 + *
  1.1037 + * Tcl_UtfToExternalDString --
  1.1038 + *
  1.1039 + *	Convert a source buffer from UTF-8 into the specified encoding.
  1.1040 + *	If any of the bytes in the source buffer are invalid or cannot
  1.1041 + *	be represented in the target encoding, a default fallback
  1.1042 + *	character will be substituted.
  1.1043 + *
  1.1044 + * Results:
  1.1045 + *	The converted bytes are stored in the DString, which is then
  1.1046 + *	NULL terminated in an encoding-specific manner.  The return value 
  1.1047 + *	is a pointer to the value stored in the DString.
  1.1048 + *
  1.1049 + * Side effects:
  1.1050 + *	None.
  1.1051 + *
  1.1052 + *-------------------------------------------------------------------------
  1.1053 + */
  1.1054 +
  1.1055 +EXPORT_C char *
  1.1056 +Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)
  1.1057 +    Tcl_Encoding encoding;	/* The encoding for the converted string,
  1.1058 +				 * or NULL for the default system encoding. */
  1.1059 +    CONST char *src;		/* Source string in UTF-8. */
  1.1060 +    int srcLen;			/* Source string length in bytes, or < 0 for
  1.1061 +				 * strlen(). */
  1.1062 +    Tcl_DString *dstPtr;	/* Uninitialized or free DString in which 
  1.1063 +				 * the converted string is stored. */
  1.1064 +{
  1.1065 +    char *dst;
  1.1066 +    Tcl_EncodingState state;
  1.1067 +    Encoding *encodingPtr;
  1.1068 +    int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
  1.1069 +    
  1.1070 +    Tcl_DStringInit(dstPtr);
  1.1071 +    dst = Tcl_DStringValue(dstPtr);
  1.1072 +    dstLen = dstPtr->spaceAvl - 1;
  1.1073 +
  1.1074 +    if (encoding == NULL) {
  1.1075 +	encoding = systemEncoding;
  1.1076 +    }
  1.1077 +    encodingPtr = (Encoding *) encoding;
  1.1078 +
  1.1079 +    if (src == NULL) {
  1.1080 +	srcLen = 0;
  1.1081 +    } else if (srcLen < 0) {
  1.1082 +	srcLen = strlen(src);
  1.1083 +    }
  1.1084 +    flags = TCL_ENCODING_START | TCL_ENCODING_END;
  1.1085 +    while (1) {
  1.1086 +	result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src,
  1.1087 +		srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
  1.1088 +		&dstChars);
  1.1089 +	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
  1.1090 +	if (result != TCL_CONVERT_NOSPACE) {
  1.1091 +	    if (encodingPtr->nullSize == 2) {
  1.1092 +	        Tcl_DStringSetLength(dstPtr, soFar + 1);
  1.1093 +	    }
  1.1094 +	    Tcl_DStringSetLength(dstPtr, soFar);
  1.1095 +	    return Tcl_DStringValue(dstPtr);
  1.1096 +	}
  1.1097 +	flags &= ~TCL_ENCODING_START;
  1.1098 +	src += srcRead;
  1.1099 +	srcLen -= srcRead;
  1.1100 +	if (Tcl_DStringLength(dstPtr) == 0) {
  1.1101 +	    Tcl_DStringSetLength(dstPtr, dstLen);
  1.1102 +	}
  1.1103 +	Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
  1.1104 +	dst = Tcl_DStringValue(dstPtr) + soFar;
  1.1105 +	dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
  1.1106 +    }
  1.1107 +}
  1.1108 +
  1.1109 +/*
  1.1110 + *-------------------------------------------------------------------------
  1.1111 + *
  1.1112 + * Tcl_UtfToExternal --
  1.1113 + *
  1.1114 + *	Convert a buffer from UTF-8 into the specified encoding.
  1.1115 + *
  1.1116 + * Results:
  1.1117 + *	The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
  1.1118 + *	TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
  1.1119 + *	as documented in tcl.h.
  1.1120 + *
  1.1121 + * Side effects:
  1.1122 + *	The converted bytes are stored in the output buffer.  
  1.1123 + *
  1.1124 + *-------------------------------------------------------------------------
  1.1125 + */
  1.1126 +
  1.1127 +EXPORT_C int
  1.1128 +Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst,
  1.1129 +	dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.1130 +    Tcl_Interp *interp;		/* Interp for error return, if not NULL. */
  1.1131 +    Tcl_Encoding encoding;	/* The encoding for the converted string,
  1.1132 +				 * or NULL for the default system encoding. */
  1.1133 +    CONST char *src;		/* Source string in UTF-8. */
  1.1134 +    int srcLen;			/* Source string length in bytes, or < 0 for
  1.1135 +				 * strlen(). */
  1.1136 +    int flags;			/* Conversion control flags. */
  1.1137 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.1138 +				 * state information used during a piecewise
  1.1139 +				 * conversion.  Contents of statePtr are
  1.1140 +				 * initialized and/or reset by conversion
  1.1141 +				 * routine under control of flags argument. */
  1.1142 +    char *dst;			/* Output buffer in which converted string
  1.1143 +				 * is stored. */
  1.1144 +    int dstLen;			/* The maximum length of output buffer in
  1.1145 +				 * bytes. */
  1.1146 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.1147 +				 * source string that were converted.  This
  1.1148 +				 * may be less than the original source length
  1.1149 +				 * if there was a problem converting some
  1.1150 +				 * source characters. */
  1.1151 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.1152 +				 * stored in the output buffer as a result of
  1.1153 +				 * the conversion. */
  1.1154 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.1155 +				 * correspond to the bytes stored in the
  1.1156 +				 * output buffer. */
  1.1157 +{
  1.1158 +    Encoding *encodingPtr;
  1.1159 +    int result, srcRead, dstWrote, dstChars;
  1.1160 +    Tcl_EncodingState state;
  1.1161 +    
  1.1162 +    if (encoding == NULL) {
  1.1163 +	encoding = systemEncoding;
  1.1164 +    }
  1.1165 +    encodingPtr = (Encoding *) encoding;
  1.1166 +
  1.1167 +    if (src == NULL) {
  1.1168 +	srcLen = 0;
  1.1169 +    } else if (srcLen < 0) {
  1.1170 +	srcLen = strlen(src);
  1.1171 +    }
  1.1172 +    if (statePtr == NULL) {
  1.1173 +	flags |= TCL_ENCODING_START | TCL_ENCODING_END;
  1.1174 +	statePtr = &state;
  1.1175 +    }
  1.1176 +    if (srcReadPtr == NULL) {
  1.1177 +	srcReadPtr = &srcRead;
  1.1178 +    }
  1.1179 +    if (dstWrotePtr == NULL) {
  1.1180 +	dstWrotePtr = &dstWrote;
  1.1181 +    }
  1.1182 +    if (dstCharsPtr == NULL) {
  1.1183 +	dstCharsPtr = &dstChars;
  1.1184 +    }
  1.1185 +
  1.1186 +    dstLen -= encodingPtr->nullSize;
  1.1187 +    result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, srcLen,
  1.1188 +	    flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
  1.1189 +	    dstCharsPtr);
  1.1190 +    if (encodingPtr->nullSize == 2) {
  1.1191 +	dst[*dstWrotePtr + 1] = '\0';
  1.1192 +    }
  1.1193 +    dst[*dstWrotePtr] = '\0';
  1.1194 +    
  1.1195 +    return result;
  1.1196 +}
  1.1197 +
  1.1198 +/*
  1.1199 + *---------------------------------------------------------------------------
  1.1200 + *
  1.1201 + * Tcl_FindExecutable --
  1.1202 + *
  1.1203 + *	This procedure computes the absolute path name of the current
  1.1204 + *	application, given its argv[0] value.
  1.1205 + *
  1.1206 + * Results:
  1.1207 + *	None.
  1.1208 + *
  1.1209 + * Side effects:
  1.1210 + *	The variable tclExecutableName gets filled in with the file
  1.1211 + *	name for the application, if we figured it out.  If we couldn't
  1.1212 + *	figure it out, tclExecutableName is set to NULL.
  1.1213 + *
  1.1214 + *---------------------------------------------------------------------------
  1.1215 + */
  1.1216 +
  1.1217 +EXPORT_C void
  1.1218 +Tcl_FindExecutable(argv0)
  1.1219 +    CONST char *argv0;		/* The value of the application's argv[0]
  1.1220 +				 * (native). */
  1.1221 +{
  1.1222 +    int mustCleanUtf;
  1.1223 +    CONST char *name;
  1.1224 +    Tcl_DString buffer, nameString;
  1.1225 +
  1.1226 +    TclInitSubsystems(argv0);
  1.1227 +
  1.1228 +    if (argv0 == NULL) {
  1.1229 +	goto done;
  1.1230 +    }
  1.1231 +    if (tclExecutableName != NULL) {
  1.1232 +	ckfree(tclExecutableName);
  1.1233 +	tclExecutableName = NULL;
  1.1234 +    }
  1.1235 +    if ((name = TclpFindExecutable(argv0)) == NULL) {
  1.1236 +	goto done;
  1.1237 +    }
  1.1238 +
  1.1239 +    /*
  1.1240 +     * The value returned from TclpNameOfExecutable is a UTF string that
  1.1241 +     * is possibly dirty depending on when it was initialized.
  1.1242 +     * TclFindEncodings will indicate whether we must "clean" the UTF (as
  1.1243 +     * reported by the underlying system).  To assure that the UTF string
  1.1244 +     * is a properly encoded native string for this system, convert the
  1.1245 +     * UTF string to the default native encoding before the default
  1.1246 +     * encoding is initialized.  Then, convert it back to UTF after the
  1.1247 +     * system encoding is loaded.
  1.1248 +     */
  1.1249 +    
  1.1250 +    Tcl_UtfToExternalDString(NULL, name, -1, &buffer);
  1.1251 +    mustCleanUtf = TclFindEncodings(argv0);
  1.1252 +
  1.1253 +    /*
  1.1254 +     * Now it is OK to convert the native string back to UTF and set
  1.1255 +     * the value of the tclExecutableName.
  1.1256 +     */
  1.1257 +    
  1.1258 +    if (mustCleanUtf) {
  1.1259 +	Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&buffer), -1,
  1.1260 +		&nameString);
  1.1261 +	tclExecutableName = (char *)
  1.1262 +	    ckalloc((unsigned) (Tcl_DStringLength(&nameString) + 1));
  1.1263 +	strcpy(tclExecutableName, Tcl_DStringValue(&nameString));
  1.1264 +
  1.1265 +	Tcl_DStringFree(&nameString);
  1.1266 +    } else {
  1.1267 +	tclExecutableName = (char *) ckalloc((unsigned) (strlen(name) + 1));
  1.1268 +	strcpy(tclExecutableName, name);
  1.1269 +    }
  1.1270 +    Tcl_DStringFree(&buffer);
  1.1271 +    return;
  1.1272 +	
  1.1273 +    done:
  1.1274 +    (void) TclFindEncodings(argv0);
  1.1275 +}
  1.1276 +
  1.1277 +/*
  1.1278 + *---------------------------------------------------------------------------
  1.1279 + *
  1.1280 + * LoadEncodingFile --
  1.1281 + *
  1.1282 + *	Read a file that describes an encoding and create a new Encoding
  1.1283 + *	from the data.  
  1.1284 + *
  1.1285 + * Results:
  1.1286 + *	The return value is the newly loaded Encoding, or NULL if
  1.1287 + *	the file didn't exist of was in the incorrect format.  If NULL was
  1.1288 + *	returned, an error message is left in interp's result object,
  1.1289 + *	unless interp was NULL.
  1.1290 + *
  1.1291 + * Side effects:
  1.1292 + *	File read from disk.  
  1.1293 + *
  1.1294 + *---------------------------------------------------------------------------
  1.1295 + */
  1.1296 +
  1.1297 +static Tcl_Encoding
  1.1298 +LoadEncodingFile(interp, name)
  1.1299 +    Tcl_Interp *interp;		/* Interp for error reporting, if not NULL. */
  1.1300 +    CONST char *name;		/* The name of the encoding file on disk
  1.1301 +				 * and also the name for new encoding. */
  1.1302 +{
  1.1303 +    int objc, i, ch;
  1.1304 +    Tcl_Obj **objv;
  1.1305 +    Tcl_Obj *pathPtr;
  1.1306 +    Tcl_Channel chan;
  1.1307 +    Tcl_Encoding encoding;
  1.1308 +
  1.1309 +    pathPtr = TclGetLibraryPath();
  1.1310 +    if (pathPtr == NULL) {
  1.1311 +	goto unknown;
  1.1312 +    }
  1.1313 +    objc = 0;
  1.1314 +    Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
  1.1315 +
  1.1316 +    chan = NULL;
  1.1317 +    for (i = 0; i < objc; i++) {
  1.1318 +	chan = OpenEncodingFile(Tcl_GetString(objv[i]), name);
  1.1319 +	if (chan != NULL) {
  1.1320 +	    break;
  1.1321 +	}
  1.1322 +    }
  1.1323 +
  1.1324 +    if (chan == NULL) {
  1.1325 +	goto unknown;
  1.1326 +    }
  1.1327 +
  1.1328 +    Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8");
  1.1329 +
  1.1330 +    while (1) {
  1.1331 +	Tcl_DString ds;
  1.1332 +
  1.1333 +	Tcl_DStringInit(&ds);
  1.1334 +	Tcl_Gets(chan, &ds);
  1.1335 +	ch = Tcl_DStringValue(&ds)[0];
  1.1336 +	Tcl_DStringFree(&ds);
  1.1337 +	if (ch != '#') {
  1.1338 +	    break;
  1.1339 +	}
  1.1340 +    }
  1.1341 +
  1.1342 +    encoding = NULL;
  1.1343 +    switch (ch) {
  1.1344 +	case 'S': {
  1.1345 +	    encoding = LoadTableEncoding(interp, name, ENCODING_SINGLEBYTE,
  1.1346 +		    chan);
  1.1347 +	    break;
  1.1348 +	}
  1.1349 +	case 'D': {
  1.1350 +	    encoding = LoadTableEncoding(interp, name, ENCODING_DOUBLEBYTE,
  1.1351 +		    chan);
  1.1352 +	    break;
  1.1353 +	}
  1.1354 +	case 'M': {
  1.1355 +	    encoding = LoadTableEncoding(interp, name, ENCODING_MULTIBYTE,
  1.1356 +		    chan);
  1.1357 +	    break;
  1.1358 +	}
  1.1359 +	case 'E': {
  1.1360 +	    encoding = LoadEscapeEncoding(name, chan);
  1.1361 +	    break;
  1.1362 +	}
  1.1363 +    }
  1.1364 +    if ((encoding == NULL) && (interp != NULL)) {
  1.1365 +	Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL);
  1.1366 +	if (ch == 'E') {
  1.1367 +	    Tcl_AppendResult(interp, " or missing sub-encoding", NULL);
  1.1368 +	}
  1.1369 +    }
  1.1370 +    Tcl_Close(NULL, chan);
  1.1371 +    return encoding;
  1.1372 +
  1.1373 +    unknown:
  1.1374 +    if (interp != NULL) {
  1.1375 +	Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
  1.1376 +    }
  1.1377 +    return NULL;
  1.1378 +}
  1.1379 +
  1.1380 +/*
  1.1381 + *----------------------------------------------------------------------
  1.1382 + *
  1.1383 + * OpenEncodingFile --
  1.1384 + *
  1.1385 + *	Look for the file encoding/<name>.enc in the specified
  1.1386 + *	directory.
  1.1387 + *
  1.1388 + * Results:
  1.1389 + *	Returns an open file channel if the file exists.
  1.1390 + *
  1.1391 + * Side effects:
  1.1392 + *	None.
  1.1393 + *
  1.1394 + *----------------------------------------------------------------------
  1.1395 + */
  1.1396 +
  1.1397 +static Tcl_Channel
  1.1398 +OpenEncodingFile(dir, name)
  1.1399 +    CONST char *dir;
  1.1400 +    CONST char *name;
  1.1401 +
  1.1402 +{
  1.1403 +    CONST char *argv[3];
  1.1404 +    Tcl_DString pathString;
  1.1405 +    CONST char *path;
  1.1406 +    Tcl_Channel chan;
  1.1407 +    Tcl_Obj *pathPtr;
  1.1408 +    
  1.1409 +    argv[0] = dir;
  1.1410 +    argv[1] = "encoding";
  1.1411 +    argv[2] = name;
  1.1412 +
  1.1413 +    Tcl_DStringInit(&pathString);
  1.1414 +    Tcl_JoinPath(3, argv, &pathString);
  1.1415 +    path = Tcl_DStringAppend(&pathString, ".enc", -1);
  1.1416 +    pathPtr = Tcl_NewStringObj(path,-1);
  1.1417 +
  1.1418 +    Tcl_IncrRefCount(pathPtr);
  1.1419 +    chan = Tcl_FSOpenFileChannel(NULL, pathPtr, "r", 0);
  1.1420 +    Tcl_DecrRefCount(pathPtr);
  1.1421 +
  1.1422 +    Tcl_DStringFree(&pathString);
  1.1423 +
  1.1424 +    return chan;
  1.1425 +}
  1.1426 +
  1.1427 +/*
  1.1428 + *-------------------------------------------------------------------------
  1.1429 + *
  1.1430 + * LoadTableEncoding --
  1.1431 + *
  1.1432 + *	Helper function for LoadEncodingTable().  Loads a table to that 
  1.1433 + *	converts between Unicode and some other encoding and creates an 
  1.1434 + *	encoding (using a TableEncoding structure) from that information.
  1.1435 + *
  1.1436 + *	File contains binary data, but begins with a marker to indicate 
  1.1437 + *	byte-ordering, so that same binary file can be read on either
  1.1438 + *	endian platforms.
  1.1439 + *
  1.1440 + * Results:
  1.1441 + *	The return value is the new encoding, or NULL if the encoding 
  1.1442 + *	could not be created (because the file contained invalid data).
  1.1443 + *
  1.1444 + * Side effects:
  1.1445 + *	None.
  1.1446 + *
  1.1447 + *-------------------------------------------------------------------------
  1.1448 + */
  1.1449 +
  1.1450 +static Tcl_Encoding
  1.1451 +LoadTableEncoding(interp, name, type, chan)
  1.1452 +    Tcl_Interp *interp;		/* Interp for temporary obj while reading. */
  1.1453 +    CONST char *name;		/* Name for new encoding. */
  1.1454 +    int type;			/* Type of encoding (ENCODING_?????). */
  1.1455 +    Tcl_Channel chan;		/* File containing new encoding. */
  1.1456 +{
  1.1457 +    Tcl_DString lineString;
  1.1458 +    Tcl_Obj *objPtr;
  1.1459 +    char *line;
  1.1460 +    int i, hi, lo, numPages, symbol, fallback;
  1.1461 +    unsigned char used[256];
  1.1462 +    unsigned int size;
  1.1463 +    TableEncodingData *dataPtr;
  1.1464 +    unsigned short *pageMemPtr;
  1.1465 +    Tcl_EncodingType encType;
  1.1466 +
  1.1467 +    /*
  1.1468 +     * Speed over memory. Use a full 256 character table to decode hex
  1.1469 +     * sequences in the encoding files.
  1.1470 +     */
  1.1471 +
  1.1472 +    static char staticHex[] = {
  1.1473 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*   0 ...  15 */
  1.1474 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  16 ...  31 */
  1.1475 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  32 ...  47 */
  1.1476 +      0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /*  48 ...  63 */
  1.1477 +      0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  64 ...  79 */
  1.1478 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  80 ...  95 */
  1.1479 +      0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  96 ... 111 */
  1.1480 +      0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 112 ... 127 */
  1.1481 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128 ... 143 */
  1.1482 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144 ... 159 */
  1.1483 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160 ... 175 */
  1.1484 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176 ... 191 */
  1.1485 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192 ... 207 */
  1.1486 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 208 ... 223 */
  1.1487 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 224 ... 239 */
  1.1488 +      0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 ... 255 */
  1.1489 +    };
  1.1490 +
  1.1491 +    Tcl_DStringInit(&lineString);
  1.1492 +    Tcl_Gets(chan, &lineString);
  1.1493 +    line = Tcl_DStringValue(&lineString);
  1.1494 +
  1.1495 +    fallback = (int) strtol(line, &line, 16);
  1.1496 +    symbol = (int) strtol(line, &line, 10);
  1.1497 +    numPages = (int) strtol(line, &line, 10);
  1.1498 +    Tcl_DStringFree(&lineString);
  1.1499 +
  1.1500 +    if (numPages < 0) {
  1.1501 +	numPages = 0;
  1.1502 +    } else if (numPages > 256) {
  1.1503 +	numPages = 256;
  1.1504 +    }
  1.1505 +
  1.1506 +    memset(used, 0, sizeof(used));
  1.1507 +
  1.1508 +#undef PAGESIZE
  1.1509 +#define PAGESIZE    (256 * sizeof(unsigned short))
  1.1510 +
  1.1511 +    dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData));
  1.1512 +    memset(dataPtr, 0, sizeof(TableEncodingData));
  1.1513 +
  1.1514 +    dataPtr->fallback = fallback;
  1.1515 +
  1.1516 +    /*
  1.1517 +     * Read the table that maps characters to Unicode.  Performs a single
  1.1518 +     * malloc to get the memory for the array and all the pages needed by
  1.1519 +     * the array.
  1.1520 +     */
  1.1521 +
  1.1522 +    size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
  1.1523 +    dataPtr->toUnicode = (unsigned short **) ckalloc(size);
  1.1524 +    memset(dataPtr->toUnicode, 0, size);
  1.1525 +    pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256);
  1.1526 +
  1.1527 +    if (interp == NULL) {
  1.1528 +	objPtr = Tcl_NewObj();
  1.1529 +    } else {
  1.1530 +	objPtr = Tcl_GetObjResult(interp);
  1.1531 +    }
  1.1532 +    for (i = 0; i < numPages; i++) {
  1.1533 +	int ch;
  1.1534 +	char *p;
  1.1535 +
  1.1536 +	Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0);
  1.1537 +	p = Tcl_GetString(objPtr);
  1.1538 +	hi = (staticHex[(unsigned int)p[0]] << 4) + staticHex[(unsigned int)p[1]];
  1.1539 +	dataPtr->toUnicode[hi] = pageMemPtr;
  1.1540 +	p += 2;
  1.1541 +	for (lo = 0; lo < 256; lo++) {
  1.1542 +	    if ((lo & 0x0f) == 0) {
  1.1543 +		p++;
  1.1544 +	    }
  1.1545 +	    ch = (staticHex[(unsigned int)p[0]] << 12) + (staticHex[(unsigned int)p[1]] << 8)
  1.1546 +		+ (staticHex[(unsigned int)p[2]] << 4) + staticHex[(unsigned int)p[3]];
  1.1547 +	    if (ch != 0) {
  1.1548 +		used[ch >> 8] = 1;
  1.1549 +	    }
  1.1550 +	    *pageMemPtr = (unsigned short) ch;
  1.1551 +	    pageMemPtr++;
  1.1552 +	    p += 4;
  1.1553 +	}
  1.1554 +    }
  1.1555 +    if (interp == NULL) {
  1.1556 +	Tcl_DecrRefCount(objPtr);
  1.1557 +    } else {
  1.1558 +	Tcl_ResetResult(interp);
  1.1559 +    }
  1.1560 +	
  1.1561 +    if (type == ENCODING_DOUBLEBYTE) {
  1.1562 +	memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes));
  1.1563 +    } else {
  1.1564 +	for (hi = 1; hi < 256; hi++) {
  1.1565 +	    if (dataPtr->toUnicode[hi] != NULL) {
  1.1566 +		dataPtr->prefixBytes[hi] = 1;
  1.1567 +	    }
  1.1568 +	}
  1.1569 +    }
  1.1570 +
  1.1571 +    /*
  1.1572 +     * Invert toUnicode array to produce the fromUnicode array.  Performs a
  1.1573 +     * single malloc to get the memory for the array and all the pages
  1.1574 +     * needed by the array.  While reading in the toUnicode array, we
  1.1575 +     * remembered what pages that would be needed for the fromUnicode array.
  1.1576 +     */
  1.1577 +
  1.1578 +    if (symbol) {
  1.1579 +	used[0] = 1;
  1.1580 +    }
  1.1581 +    numPages = 0;
  1.1582 +    for (hi = 0; hi < 256; hi++) {
  1.1583 +	if (used[hi]) {
  1.1584 +	    numPages++;
  1.1585 +	}
  1.1586 +    }
  1.1587 +    size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
  1.1588 +    dataPtr->fromUnicode = (unsigned short **) ckalloc(size);
  1.1589 +    memset(dataPtr->fromUnicode, 0, size);
  1.1590 +    pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256);
  1.1591 +
  1.1592 +    for (hi = 0; hi < 256; hi++) {
  1.1593 +	if (dataPtr->toUnicode[hi] == NULL) {
  1.1594 +	    dataPtr->toUnicode[hi] = emptyPage;
  1.1595 +	} else {
  1.1596 +	    for (lo = 0; lo < 256; lo++) {
  1.1597 +		int ch;
  1.1598 +
  1.1599 +		ch = dataPtr->toUnicode[hi][lo];
  1.1600 +		if (ch != 0) {
  1.1601 +		    unsigned short *page;
  1.1602 +		    
  1.1603 +		    page = dataPtr->fromUnicode[ch >> 8];
  1.1604 +		    if (page == NULL) {
  1.1605 +			page = pageMemPtr;
  1.1606 +			pageMemPtr += 256;
  1.1607 +			dataPtr->fromUnicode[ch >> 8] = page;
  1.1608 +		    }
  1.1609 +		    page[ch & 0xff] = (unsigned short) ((hi << 8) + lo);
  1.1610 +		}
  1.1611 +	    }
  1.1612 +	}
  1.1613 +    }
  1.1614 +    if (type == ENCODING_MULTIBYTE) {
  1.1615 +	/*
  1.1616 +	 * If multibyte encodings don't have a backslash character, define
  1.1617 +	 * one.  Otherwise, on Windows, native file names won't work because
  1.1618 +	 * the backslash in the file name will map to the unknown character
  1.1619 +	 * (question mark) when converting from UTF-8 to external encoding.
  1.1620 +	 */
  1.1621 +
  1.1622 +	if (dataPtr->fromUnicode[0] != NULL) {
  1.1623 +	    if (dataPtr->fromUnicode[0]['\\'] == '\0') {
  1.1624 +		dataPtr->fromUnicode[0]['\\'] = '\\';
  1.1625 +	    }
  1.1626 +	}
  1.1627 +    }
  1.1628 +    if (symbol) {
  1.1629 +	unsigned short *page;
  1.1630 +	
  1.1631 +	/*
  1.1632 +	 * Make a special symbol encoding that not only maps the symbol
  1.1633 +	 * characters from their Unicode code points down into page 0, but
  1.1634 +	 * also ensure that the characters on page 0 map to themselves.
  1.1635 +	 * This is so that a symbol font can be used to display a simple
  1.1636 +	 * string like "abcd" and have alpha, beta, chi, delta show up,
  1.1637 +	 * rather than have "unknown" chars show up because strictly
  1.1638 +	 * speaking the symbol font doesn't have glyphs for those low ascii
  1.1639 +	 * chars.
  1.1640 +	 */
  1.1641 +
  1.1642 +	page = dataPtr->fromUnicode[0];
  1.1643 +	if (page == NULL) {
  1.1644 +	    page = pageMemPtr;
  1.1645 +	    dataPtr->fromUnicode[0] = page;
  1.1646 +	}
  1.1647 +	for (lo = 0; lo < 256; lo++) {
  1.1648 +	    if (dataPtr->toUnicode[0][lo] != 0) {
  1.1649 +		page[lo] = (unsigned short) lo;
  1.1650 +	    }
  1.1651 +	}
  1.1652 +    }
  1.1653 +    for (hi = 0; hi < 256; hi++) {
  1.1654 +	if (dataPtr->fromUnicode[hi] == NULL) {
  1.1655 +	    dataPtr->fromUnicode[hi] = emptyPage;
  1.1656 +	}
  1.1657 +    }
  1.1658 +    /*
  1.1659 +     * For trailing 'R'everse encoding, see [Patch #689341]
  1.1660 +     */
  1.1661 +    Tcl_DStringInit(&lineString);
  1.1662 +    do {
  1.1663 +	int len;
  1.1664 +	/* skip leading empty lines */
  1.1665 +	while ((len = Tcl_Gets(chan, &lineString)) == 0)
  1.1666 +	    ;
  1.1667 +	if (len < 0) {
  1.1668 +	    break;
  1.1669 +	}
  1.1670 +	line = Tcl_DStringValue(&lineString);
  1.1671 +	if (line[0] != 'R') {
  1.1672 +	    break;
  1.1673 +	}
  1.1674 +	for (Tcl_DStringSetLength(&lineString, 0);
  1.1675 +	     (len = Tcl_Gets(chan, &lineString)) >= 0;
  1.1676 +	     Tcl_DStringSetLength(&lineString, 0)) {
  1.1677 +	    unsigned char* p;
  1.1678 +	    int to, from;
  1.1679 +	    if (len < 5) {
  1.1680 +		continue;
  1.1681 +	    }
  1.1682 +	    p = (unsigned char*) Tcl_DStringValue(&lineString);
  1.1683 +	    to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
  1.1684 +		+ (staticHex[p[2]] << 4) + staticHex[p[3]];
  1.1685 +	    if (to == 0) {
  1.1686 +	    	continue;
  1.1687 +	    }
  1.1688 +	    for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) {
  1.1689 +		from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
  1.1690 +			+ (staticHex[p[2]] << 4) + staticHex[p[3]];
  1.1691 +	    	if (from == 0) {
  1.1692 +		    continue;
  1.1693 +		}
  1.1694 +		dataPtr->fromUnicode[from >> 8][from & 0xff] = to;
  1.1695 +	    }
  1.1696 +	}
  1.1697 +    } while (0);
  1.1698 +    Tcl_DStringFree(&lineString);
  1.1699 +
  1.1700 +    encType.encodingName    = name;
  1.1701 +    encType.toUtfProc	    = TableToUtfProc;
  1.1702 +    encType.fromUtfProc	    = TableFromUtfProc;
  1.1703 +    encType.freeProc	    = TableFreeProc;
  1.1704 +    encType.nullSize	    = (type == ENCODING_DOUBLEBYTE) ? 2 : 1;
  1.1705 +    encType.clientData	    = (ClientData) dataPtr;
  1.1706 +    return Tcl_CreateEncoding(&encType);
  1.1707 +}
  1.1708 +
  1.1709 +/*
  1.1710 + *-------------------------------------------------------------------------
  1.1711 + *
  1.1712 + * LoadEscapeEncoding --
  1.1713 + *
  1.1714 + *	Helper function for LoadEncodingTable().  Loads a state machine
  1.1715 + *	that converts between Unicode and some other encoding.  
  1.1716 + *
  1.1717 + *	File contains text data that describes the escape sequences that
  1.1718 + *	are used to choose an encoding and the associated names for the 
  1.1719 + *	sub-encodings.
  1.1720 + *
  1.1721 + * Results:
  1.1722 + *	The return value is the new encoding, or NULL if the encoding 
  1.1723 + *	could not be created (because the file contained invalid data).
  1.1724 + *
  1.1725 + * Side effects:
  1.1726 + *	None.
  1.1727 + *
  1.1728 + *-------------------------------------------------------------------------
  1.1729 + */
  1.1730 +
  1.1731 +static Tcl_Encoding
  1.1732 +LoadEscapeEncoding(name, chan)
  1.1733 +    CONST char *name;		/* Name for new encoding. */
  1.1734 +    Tcl_Channel chan;		/* File containing new encoding. */
  1.1735 +{
  1.1736 +    int i, missingSubEncoding = 0;
  1.1737 +    unsigned int size;
  1.1738 +    Tcl_DString escapeData;
  1.1739 +    char init[16], final[16];
  1.1740 +    EscapeEncodingData *dataPtr;
  1.1741 +    Tcl_EncodingType type;
  1.1742 +
  1.1743 +    init[0] = '\0';
  1.1744 +    final[0] = '\0';
  1.1745 +    Tcl_DStringInit(&escapeData);
  1.1746 +
  1.1747 +    while (1) {
  1.1748 +	int argc;
  1.1749 +	CONST char **argv;
  1.1750 +	char *line;
  1.1751 +	Tcl_DString lineString;
  1.1752 +	
  1.1753 +	Tcl_DStringInit(&lineString);
  1.1754 +	if (Tcl_Gets(chan, &lineString) < 0) {
  1.1755 +	    break;
  1.1756 +	}
  1.1757 +	line = Tcl_DStringValue(&lineString);
  1.1758 +        if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) {
  1.1759 +	    continue;
  1.1760 +	}
  1.1761 +	if (argc >= 2) {
  1.1762 +	    if (strcmp(argv[0], "name") == 0) {
  1.1763 +		;
  1.1764 +	    } else if (strcmp(argv[0], "init") == 0) {
  1.1765 +		strncpy(init, argv[1], sizeof(init));
  1.1766 +		init[sizeof(init) - 1] = '\0';
  1.1767 +	    } else if (strcmp(argv[0], "final") == 0) {
  1.1768 +		strncpy(final, argv[1], sizeof(final));
  1.1769 +		final[sizeof(final) - 1] = '\0';
  1.1770 +	    } else {
  1.1771 +		EscapeSubTable est;
  1.1772 +
  1.1773 +		strncpy(est.sequence, argv[1], sizeof(est.sequence));
  1.1774 +		est.sequence[sizeof(est.sequence) - 1] = '\0';
  1.1775 +		est.sequenceLen = strlen(est.sequence);
  1.1776 +
  1.1777 +		strncpy(est.name, argv[0], sizeof(est.name));
  1.1778 +		est.name[sizeof(est.name) - 1] = '\0';
  1.1779 +
  1.1780 +		/*
  1.1781 +		 * Load the subencodings first so we're never stuck
  1.1782 +		 * trying to use a half-loaded system encoding to
  1.1783 +		 * open/read a *.enc file.
  1.1784 +		 */
  1.1785 +
  1.1786 +		est.encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, est.name);
  1.1787 +		if ((est.encodingPtr == NULL) 
  1.1788 +			|| (est.encodingPtr->toUtfProc != TableToUtfProc)) {
  1.1789 +		    missingSubEncoding = 1;
  1.1790 +		}
  1.1791 +		Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est));
  1.1792 +	    }
  1.1793 +	}
  1.1794 +	ckfree((char *) argv);
  1.1795 +	Tcl_DStringFree(&lineString);
  1.1796 +    }
  1.1797 +    if (missingSubEncoding) {
  1.1798 +	Tcl_DStringFree(&escapeData);
  1.1799 +	return NULL;
  1.1800 +    }
  1.1801 +
  1.1802 +    size = sizeof(EscapeEncodingData)
  1.1803 +	    - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData);
  1.1804 +    dataPtr = (EscapeEncodingData *) ckalloc(size);
  1.1805 +    dataPtr->initLen = strlen(init);
  1.1806 +    strcpy(dataPtr->init, init);
  1.1807 +    dataPtr->finalLen = strlen(final);
  1.1808 +    strcpy(dataPtr->final, final);
  1.1809 +    dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
  1.1810 +    memcpy((VOID *) dataPtr->subTables, (VOID *) Tcl_DStringValue(&escapeData),
  1.1811 +	    (size_t) Tcl_DStringLength(&escapeData));
  1.1812 +    Tcl_DStringFree(&escapeData);
  1.1813 +
  1.1814 +    memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes));
  1.1815 +    for (i = 0; i < dataPtr->numSubTables; i++) {
  1.1816 +	dataPtr->prefixBytes[UCHAR(dataPtr->subTables[i].sequence[0])] = 1;
  1.1817 +    }
  1.1818 +    if (dataPtr->init[0] != '\0') {
  1.1819 +	dataPtr->prefixBytes[UCHAR(dataPtr->init[0])] = 1;
  1.1820 +    }
  1.1821 +    if (dataPtr->final[0] != '\0') {
  1.1822 +	dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1;
  1.1823 +    }
  1.1824 +
  1.1825 +    type.encodingName	= name;
  1.1826 +    type.toUtfProc	= EscapeToUtfProc;
  1.1827 +    type.fromUtfProc    = EscapeFromUtfProc;
  1.1828 +    type.freeProc	= EscapeFreeProc;
  1.1829 +    type.nullSize	= 1;
  1.1830 +    type.clientData	= (ClientData) dataPtr;
  1.1831 +
  1.1832 +    return Tcl_CreateEncoding(&type);
  1.1833 +}
  1.1834 +
  1.1835 +/*
  1.1836 + *-------------------------------------------------------------------------
  1.1837 + *
  1.1838 + * BinaryProc --
  1.1839 + *
  1.1840 + *	The default conversion when no other conversion is specified.
  1.1841 + *	No translation is done; source bytes are copied directly to 
  1.1842 + *	destination bytes.
  1.1843 + *
  1.1844 + * Results:
  1.1845 + *	Returns TCL_OK if conversion was successful.
  1.1846 + *
  1.1847 + * Side effects:
  1.1848 + *	None.
  1.1849 + *
  1.1850 + *-------------------------------------------------------------------------
  1.1851 + */
  1.1852 +
  1.1853 +static int
  1.1854 +BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.1855 +	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.1856 +    ClientData clientData;	/* Not used. */
  1.1857 +    CONST char *src;		/* Source string (unknown encoding). */
  1.1858 +    int srcLen;			/* Source string length in bytes. */
  1.1859 +    int flags;			/* Conversion control flags. */
  1.1860 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.1861 +				 * state information used during a piecewise
  1.1862 +				 * conversion.  Contents of statePtr are
  1.1863 +				 * initialized and/or reset by conversion
  1.1864 +				 * routine under control of flags argument. */
  1.1865 +    char *dst;			/* Output buffer in which converted string
  1.1866 +				 * is stored. */
  1.1867 +    int dstLen;			/* The maximum length of output buffer in
  1.1868 +				 * bytes. */
  1.1869 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.1870 +				 * source string that were converted. */
  1.1871 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.1872 +				 * stored in the output buffer as a result of
  1.1873 +				 * the conversion. */
  1.1874 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.1875 +				 * correspond to the bytes stored in the
  1.1876 +				 * output buffer. */
  1.1877 +{
  1.1878 +    int result;
  1.1879 +
  1.1880 +    result = TCL_OK;
  1.1881 +    dstLen -= TCL_UTF_MAX - 1;
  1.1882 +    if (dstLen < 0) {
  1.1883 +	dstLen = 0;
  1.1884 +    }
  1.1885 +    if (srcLen > dstLen) {
  1.1886 +	srcLen = dstLen;
  1.1887 +	result = TCL_CONVERT_NOSPACE;
  1.1888 +    }
  1.1889 +
  1.1890 +    *srcReadPtr = srcLen;
  1.1891 +    *dstWrotePtr = srcLen;
  1.1892 +    *dstCharsPtr = srcLen;
  1.1893 +    memcpy((void *) dst, (void *) src, (size_t) srcLen);
  1.1894 +    return result;
  1.1895 +}
  1.1896 +
  1.1897 +
  1.1898 +/*
  1.1899 + *-------------------------------------------------------------------------
  1.1900 + *
  1.1901 + * UtfExtToUtfIntProc --
  1.1902 + *
  1.1903 + *	Convert from UTF-8 to UTF-8. While converting null-bytes from
  1.1904 + *	the Tcl's internal representation (0xc0, 0x80) to the official
  1.1905 + *	representation (0x00). See UtfToUtfProc for details.
  1.1906 + *
  1.1907 + * Results:
  1.1908 + *	Returns TCL_OK if conversion was successful.
  1.1909 + *
  1.1910 + * Side effects:
  1.1911 + *	None.
  1.1912 + *
  1.1913 + *-------------------------------------------------------------------------
  1.1914 + */
  1.1915 +static int 
  1.1916 +UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.1917 +	     srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.1918 +    ClientData clientData;	/* Not used. */
  1.1919 +    CONST char *src;		/* Source string in UTF-8. */
  1.1920 +    int srcLen;			/* Source string length in bytes. */
  1.1921 +    int flags;			/* Conversion control flags. */
  1.1922 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.1923 +				 * state information used during a piecewise
  1.1924 +				 * conversion.  Contents of statePtr are
  1.1925 +				 * initialized and/or reset by conversion
  1.1926 +				 * routine under control of flags argument. */
  1.1927 +    char *dst;			/* Output buffer in which converted string
  1.1928 +				 * is stored. */
  1.1929 +    int dstLen;			/* The maximum length of output buffer in
  1.1930 +				 * bytes. */
  1.1931 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.1932 +				 * source string that were converted.  This
  1.1933 +				 * may be less than the original source length
  1.1934 +				 * if there was a problem converting some
  1.1935 +				 * source characters. */
  1.1936 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.1937 +				 * stored in the output buffer as a result of
  1.1938 +				 * the conversion. */
  1.1939 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.1940 +				 * correspond to the bytes stored in the
  1.1941 +				 * output buffer. */
  1.1942 +{
  1.1943 +    return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.1944 +			srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
  1.1945 +}
  1.1946 +
  1.1947 +/*
  1.1948 + *-------------------------------------------------------------------------
  1.1949 + *
  1.1950 + * UtfExtToUtfIntProc --
  1.1951 + *
  1.1952 + *	Convert from UTF-8 to UTF-8 while converting null-bytes from
  1.1953 + *	the official representation (0x00) to Tcl's internal
  1.1954 + *	representation (0xc0, 0x80). See UtfToUtfProc for details.
  1.1955 + *
  1.1956 + * Results:
  1.1957 + *	Returns TCL_OK if conversion was successful.
  1.1958 + *
  1.1959 + * Side effects:
  1.1960 + *	None.
  1.1961 + *
  1.1962 + *-------------------------------------------------------------------------
  1.1963 + */
  1.1964 +static int 
  1.1965 +UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.1966 +	     srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.1967 +    ClientData clientData;	/* Not used. */
  1.1968 +    CONST char *src;		/* Source string in UTF-8. */
  1.1969 +    int srcLen;			/* Source string length in bytes. */
  1.1970 +    int flags;			/* Conversion control flags. */
  1.1971 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.1972 +				 * state information used during a piecewise
  1.1973 +				 * conversion.  Contents of statePtr are
  1.1974 +				 * initialized and/or reset by conversion
  1.1975 +				 * routine under control of flags argument. */
  1.1976 +    char *dst;			/* Output buffer in which converted string
  1.1977 +				 * is stored. */
  1.1978 +    int dstLen;			/* The maximum length of output buffer in
  1.1979 +				 * bytes. */
  1.1980 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.1981 +				 * source string that were converted.  This
  1.1982 +				 * may be less than the original source length
  1.1983 +				 * if there was a problem converting some
  1.1984 +				 * source characters. */
  1.1985 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.1986 +				 * stored in the output buffer as a result of
  1.1987 +				 * the conversion. */
  1.1988 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.1989 +				 * correspond to the bytes stored in the
  1.1990 +				 * output buffer. */
  1.1991 +{
  1.1992 +    return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.1993 +			srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
  1.1994 +}
  1.1995 +
  1.1996 +/*
  1.1997 + *-------------------------------------------------------------------------
  1.1998 + *
  1.1999 + * UtfToUtfProc --
  1.2000 + *
  1.2001 + *	Convert from UTF-8 to UTF-8.  Note that the UTF-8 to UTF-8 
  1.2002 + *	translation is not a no-op, because it will turn a stream of
  1.2003 + *	improperly formed UTF-8 into a properly formed stream.
  1.2004 + *
  1.2005 + * Results:
  1.2006 + *	Returns TCL_OK if conversion was successful.
  1.2007 + *
  1.2008 + * Side effects:
  1.2009 + *	None.
  1.2010 + *
  1.2011 + *-------------------------------------------------------------------------
  1.2012 + */
  1.2013 +
  1.2014 +static int 
  1.2015 +UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.2016 +	     srcReadPtr, dstWrotePtr, dstCharsPtr, pureNullMode)
  1.2017 +    ClientData clientData;	/* Not used. */
  1.2018 +    CONST char *src;		/* Source string in UTF-8. */
  1.2019 +    int srcLen;			/* Source string length in bytes. */
  1.2020 +    int flags;			/* Conversion control flags. */
  1.2021 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.2022 +				 * state information used during a piecewise
  1.2023 +				 * conversion.  Contents of statePtr are
  1.2024 +				 * initialized and/or reset by conversion
  1.2025 +				 * routine under control of flags argument. */
  1.2026 +    char *dst;			/* Output buffer in which converted string
  1.2027 +				 * is stored. */
  1.2028 +    int dstLen;			/* The maximum length of output buffer in
  1.2029 +				 * bytes. */
  1.2030 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.2031 +				 * source string that were converted.  This
  1.2032 +				 * may be less than the original source length
  1.2033 +				 * if there was a problem converting some
  1.2034 +				 * source characters. */
  1.2035 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.2036 +				 * stored in the output buffer as a result of
  1.2037 +				 * the conversion. */
  1.2038 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.2039 +				 * correspond to the bytes stored in the
  1.2040 +				 * output buffer. */
  1.2041 +    int pureNullMode;		/* Convert embedded nulls from
  1.2042 +				 * internal representation to real
  1.2043 +				 * null-bytes or vice versa */
  1.2044 +
  1.2045 +{
  1.2046 +    CONST char *srcStart, *srcEnd, *srcClose;
  1.2047 +    char *dstStart, *dstEnd;
  1.2048 +    int result, numChars;
  1.2049 +    Tcl_UniChar ch;
  1.2050 +
  1.2051 +    result = TCL_OK;
  1.2052 +    
  1.2053 +    srcStart = src;
  1.2054 +    srcEnd = src + srcLen;
  1.2055 +    srcClose = srcEnd;
  1.2056 +    if ((flags & TCL_ENCODING_END) == 0) {
  1.2057 +	srcClose -= TCL_UTF_MAX;
  1.2058 +    }
  1.2059 +
  1.2060 +    dstStart = dst;
  1.2061 +    dstEnd = dst + dstLen - TCL_UTF_MAX;
  1.2062 +
  1.2063 +    for (numChars = 0; src < srcEnd; numChars++) {
  1.2064 +	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  1.2065 +	    /*
  1.2066 +	     * If there is more string to follow, this will ensure that the
  1.2067 +	     * last UTF-8 character in the source buffer hasn't been cut off.
  1.2068 +	     */
  1.2069 +
  1.2070 +	    result = TCL_CONVERT_MULTIBYTE;
  1.2071 +	    break;
  1.2072 +	}
  1.2073 +	if (dst > dstEnd) {
  1.2074 +	    result = TCL_CONVERT_NOSPACE;
  1.2075 +	    break;
  1.2076 +	}
  1.2077 +	if (UCHAR(*src) < 0x80 &&
  1.2078 +	    !(UCHAR(*src) == 0 && pureNullMode == 0)) {
  1.2079 +	    /*
  1.2080 +	     * Copy 7bit chatacters, but skip null-bytes when we are
  1.2081 +	     * in input mode, so that they get converted to 0xc080.
  1.2082 +	     */
  1.2083 +	    *dst++ = *src++;
  1.2084 +	} else if (pureNullMode == 1 &&
  1.2085 +		   UCHAR(*src) == 0xc0 &&
  1.2086 +		   UCHAR(*(src+1)) == 0x80) {
  1.2087 +	    /* 
  1.2088 +	     * Convert 0xc080 to real nulls when we are in output mode.
  1.2089 +	     */
  1.2090 +	    *dst++ = 0;
  1.2091 +	    src += 2;
  1.2092 +	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
  1.2093 +	    /* Always check before using Tcl_UtfToUniChar. Not doing
  1.2094 +	     * can so cause it run beyond the endof the buffer!  If we
  1.2095 +	     * * happen such an incomplete char its byts are made to *
  1.2096 +	     * represent themselves.
  1.2097 +	     */
  1.2098 +
  1.2099 +	    ch = (Tcl_UniChar) *src;
  1.2100 +	    src += 1;
  1.2101 +	    dst += Tcl_UniCharToUtf(ch, dst);
  1.2102 +	} else {
  1.2103 +	    src += Tcl_UtfToUniChar(src, &ch);
  1.2104 +	    dst += Tcl_UniCharToUtf(ch, dst);
  1.2105 +	}
  1.2106 +    }
  1.2107 +
  1.2108 +    *srcReadPtr  = src - srcStart;
  1.2109 +    *dstWrotePtr = dst - dstStart;
  1.2110 +    *dstCharsPtr = numChars;
  1.2111 +    return result;
  1.2112 +}
  1.2113 +
  1.2114 +/*
  1.2115 + *-------------------------------------------------------------------------
  1.2116 + *
  1.2117 + * UnicodeToUtfProc --
  1.2118 + *
  1.2119 + *	Convert from Unicode to UTF-8.
  1.2120 + *
  1.2121 + * Results:
  1.2122 + *	Returns TCL_OK if conversion was successful.
  1.2123 + *
  1.2124 + * Side effects:
  1.2125 + *	None.
  1.2126 + *
  1.2127 + *-------------------------------------------------------------------------
  1.2128 + */
  1.2129 +
  1.2130 +static int 
  1.2131 +UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.2132 +	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.2133 +    ClientData clientData;	/* Not used. */
  1.2134 +    CONST char *src;		/* Source string in Unicode. */
  1.2135 +    int srcLen;			/* Source string length in bytes. */
  1.2136 +    int flags;			/* Conversion control flags. */
  1.2137 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.2138 +				 * state information used during a piecewise
  1.2139 +				 * conversion.  Contents of statePtr are
  1.2140 +				 * initialized and/or reset by conversion
  1.2141 +				 * routine under control of flags argument. */
  1.2142 +    char *dst;			/* Output buffer in which converted string
  1.2143 +				 * is stored. */
  1.2144 +    int dstLen;			/* The maximum length of output buffer in
  1.2145 +				 * bytes. */
  1.2146 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.2147 +				 * source string that were converted.  This
  1.2148 +				 * may be less than the original source length
  1.2149 +				 * if there was a problem converting some
  1.2150 +				 * source characters. */
  1.2151 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.2152 +				 * stored in the output buffer as a result of
  1.2153 +				 * the conversion. */
  1.2154 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.2155 +				 * correspond to the bytes stored in the
  1.2156 +				 * output buffer. */
  1.2157 +{
  1.2158 +    CONST char *srcStart, *srcEnd;
  1.2159 +    char *dstEnd, *dstStart;
  1.2160 +    int result, numChars;
  1.2161 +    Tcl_UniChar ch;
  1.2162 +
  1.2163 +    result = TCL_OK;
  1.2164 +    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
  1.2165 +	result = TCL_CONVERT_MULTIBYTE;
  1.2166 +	srcLen /= sizeof(Tcl_UniChar);
  1.2167 +	srcLen *= sizeof(Tcl_UniChar);
  1.2168 +    }
  1.2169 +
  1.2170 +    srcStart = src;
  1.2171 +    srcEnd = src + srcLen;
  1.2172 +
  1.2173 +    dstStart = dst;
  1.2174 +    dstEnd = dst + dstLen - TCL_UTF_MAX;
  1.2175 +
  1.2176 +    for (numChars = 0; src < srcEnd; numChars++) {
  1.2177 +	if (dst > dstEnd) {
  1.2178 +	    result = TCL_CONVERT_NOSPACE;
  1.2179 +	    break;
  1.2180 +	}
  1.2181 +	/*
  1.2182 +	 * Special case for 1-byte utf chars for speed.  Make sure we
  1.2183 +	 * work with Tcl_UniChar-size data.
  1.2184 +	 */
  1.2185 +	ch = *(Tcl_UniChar *)src;
  1.2186 +	if (ch && ch < 0x80) {
  1.2187 +	    *dst++ = (ch & 0xFF);
  1.2188 +	} else {
  1.2189 +	    dst += Tcl_UniCharToUtf(ch, dst);
  1.2190 +	}
  1.2191 +	src += sizeof(Tcl_UniChar);
  1.2192 +    }
  1.2193 +
  1.2194 +    *srcReadPtr = src - srcStart;
  1.2195 +    *dstWrotePtr = dst - dstStart;
  1.2196 +    *dstCharsPtr = numChars;
  1.2197 +    return result;
  1.2198 +}
  1.2199 +
  1.2200 +/*
  1.2201 + *-------------------------------------------------------------------------
  1.2202 + *
  1.2203 + * UtfToUnicodeProc --
  1.2204 + *
  1.2205 + *	Convert from UTF-8 to Unicode.
  1.2206 + *
  1.2207 + * Results:
  1.2208 + *	Returns TCL_OK if conversion was successful.
  1.2209 + *
  1.2210 + * Side effects:
  1.2211 + *	None.
  1.2212 + *
  1.2213 + *-------------------------------------------------------------------------
  1.2214 + */
  1.2215 +
  1.2216 +static int 
  1.2217 +UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.2218 +	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.2219 +    ClientData clientData;	/* TableEncodingData that specifies encoding. */
  1.2220 +    CONST char *src;		/* Source string in UTF-8. */
  1.2221 +    int srcLen;			/* Source string length in bytes. */
  1.2222 +    int flags;			/* Conversion control flags. */
  1.2223 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.2224 +				 * state information used during a piecewise
  1.2225 +				 * conversion.  Contents of statePtr are
  1.2226 +				 * initialized and/or reset by conversion
  1.2227 +				 * routine under control of flags argument. */
  1.2228 +    char *dst;			/* Output buffer in which converted string
  1.2229 +				 * is stored. */
  1.2230 +    int dstLen;			/* The maximum length of output buffer in
  1.2231 +				 * bytes. */
  1.2232 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.2233 +				 * source string that were converted.  This
  1.2234 +				 * may be less than the original source length
  1.2235 +				 * if there was a problem converting some
  1.2236 +				 * source characters. */
  1.2237 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.2238 +				 * stored in the output buffer as a result of
  1.2239 +				 * the conversion. */
  1.2240 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.2241 +				 * correspond to the bytes stored in the
  1.2242 +				 * output buffer. */
  1.2243 +{
  1.2244 +    CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
  1.2245 +    int result, numChars;
  1.2246 +    Tcl_UniChar ch;
  1.2247 +
  1.2248 +    srcStart = src;
  1.2249 +    srcEnd = src + srcLen;
  1.2250 +    srcClose = srcEnd;
  1.2251 +    if ((flags & TCL_ENCODING_END) == 0) {
  1.2252 +	srcClose -= TCL_UTF_MAX;
  1.2253 +    }
  1.2254 +
  1.2255 +    dstStart = dst;
  1.2256 +    dstEnd   = dst + dstLen - sizeof(Tcl_UniChar);
  1.2257 +
  1.2258 +    result = TCL_OK;
  1.2259 +    for (numChars = 0; src < srcEnd; numChars++) {
  1.2260 +	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  1.2261 +	    /*
  1.2262 +	     * If there is more string to follow, this will ensure that the
  1.2263 +	     * last UTF-8 character in the source buffer hasn't been cut off.
  1.2264 +	     */
  1.2265 +
  1.2266 +	    result = TCL_CONVERT_MULTIBYTE;
  1.2267 +	    break;
  1.2268 +	}
  1.2269 +	if (dst > dstEnd) {
  1.2270 +	    result = TCL_CONVERT_NOSPACE;
  1.2271 +	    break;
  1.2272 +        }
  1.2273 +	src += TclUtfToUniChar(src, &ch);
  1.2274 +	/*
  1.2275 +	 * Need to handle this in a way that won't cause misalignment
  1.2276 +	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
  1.2277 +	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
  1.2278 +	 */
  1.2279 +#ifdef WORDS_BIGENDIAN
  1.2280 +	*dst++ = (ch >> 8);
  1.2281 +	*dst++ = (ch & 0xFF);
  1.2282 +#else
  1.2283 +	*dst++ = (ch & 0xFF);
  1.2284 +	*dst++ = (ch >> 8);
  1.2285 +#endif
  1.2286 +    }
  1.2287 +    *srcReadPtr = src - srcStart;
  1.2288 +    *dstWrotePtr = dst - dstStart;
  1.2289 +    *dstCharsPtr = numChars;
  1.2290 +    return result;
  1.2291 +}
  1.2292 +
  1.2293 +/*
  1.2294 + *-------------------------------------------------------------------------
  1.2295 + *
  1.2296 + * TableToUtfProc --
  1.2297 + *
  1.2298 + *	Convert from the encoding specified by the TableEncodingData into
  1.2299 + *	UTF-8.
  1.2300 + *
  1.2301 + * Results:
  1.2302 + *	Returns TCL_OK if conversion was successful.
  1.2303 + *
  1.2304 + * Side effects:
  1.2305 + *	None.
  1.2306 + *
  1.2307 + *-------------------------------------------------------------------------
  1.2308 + */
  1.2309 +
  1.2310 +static int 
  1.2311 +TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.2312 +	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.2313 +    ClientData clientData;	/* TableEncodingData that specifies
  1.2314 +				 * encoding. */
  1.2315 +    CONST char *src;		/* Source string in specified encoding. */
  1.2316 +    int srcLen;			/* Source string length in bytes. */
  1.2317 +    int flags;			/* Conversion control flags. */
  1.2318 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.2319 +				 * state information used during a piecewise
  1.2320 +				 * conversion.  Contents of statePtr are
  1.2321 +				 * initialized and/or reset by conversion
  1.2322 +				 * routine under control of flags argument. */
  1.2323 +    char *dst;			/* Output buffer in which converted string
  1.2324 +				 * is stored. */
  1.2325 +    int dstLen;			/* The maximum length of output buffer in
  1.2326 +				 * bytes. */
  1.2327 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.2328 +				 * source string that were converted.  This
  1.2329 +				 * may be less than the original source length
  1.2330 +				 * if there was a problem converting some
  1.2331 +				 * source characters. */
  1.2332 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.2333 +				 * stored in the output buffer as a result of
  1.2334 +				 * the conversion. */
  1.2335 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.2336 +				 * correspond to the bytes stored in the
  1.2337 +				 * output buffer. */
  1.2338 +{
  1.2339 +    CONST char *srcStart, *srcEnd;
  1.2340 +    char *dstEnd, *dstStart, *prefixBytes;
  1.2341 +    int result, byte, numChars;
  1.2342 +    Tcl_UniChar ch;
  1.2343 +    unsigned short **toUnicode;
  1.2344 +    unsigned short *pageZero;
  1.2345 +    TableEncodingData *dataPtr;
  1.2346 +    
  1.2347 +    srcStart = src;
  1.2348 +    srcEnd = src + srcLen;
  1.2349 +
  1.2350 +    dstStart = dst;
  1.2351 +    dstEnd = dst + dstLen - TCL_UTF_MAX;
  1.2352 +
  1.2353 +    dataPtr = (TableEncodingData *) clientData;
  1.2354 +    toUnicode = dataPtr->toUnicode;
  1.2355 +    prefixBytes = dataPtr->prefixBytes;
  1.2356 +    pageZero = toUnicode[0];
  1.2357 +
  1.2358 +    result = TCL_OK;
  1.2359 +    for (numChars = 0; src < srcEnd; numChars++) {
  1.2360 +        if (dst > dstEnd) {
  1.2361 +            result = TCL_CONVERT_NOSPACE;
  1.2362 +            break;
  1.2363 +        }
  1.2364 +	byte = *((unsigned char *) src);
  1.2365 +	if (prefixBytes[byte]) {
  1.2366 +	    src++;
  1.2367 +	    if (src >= srcEnd) {
  1.2368 +		src--;
  1.2369 +		result = TCL_CONVERT_MULTIBYTE;
  1.2370 +		break;
  1.2371 +	    }
  1.2372 +	    ch = toUnicode[byte][*((unsigned char *) src)];
  1.2373 +	} else {
  1.2374 +	    ch = pageZero[byte];
  1.2375 +	}
  1.2376 +	if ((ch == 0) && (byte != 0)) {
  1.2377 +	    if (flags & TCL_ENCODING_STOPONERROR) {
  1.2378 +		result = TCL_CONVERT_SYNTAX;
  1.2379 +		break;
  1.2380 +	    }
  1.2381 +	    if (prefixBytes[byte]) {
  1.2382 +		src--;
  1.2383 +	    }
  1.2384 +	    ch = (Tcl_UniChar) byte;
  1.2385 +	}
  1.2386 +	/*
  1.2387 +	 * Special case for 1-byte utf chars for speed.
  1.2388 +	 */
  1.2389 +	if (ch && ch < 0x80) {
  1.2390 +	    *dst++ = (char) ch;
  1.2391 +	} else {
  1.2392 +	    dst += Tcl_UniCharToUtf(ch, dst);
  1.2393 +	}
  1.2394 +        src++;
  1.2395 +    }
  1.2396 +    *srcReadPtr = src - srcStart;
  1.2397 +    *dstWrotePtr = dst - dstStart;
  1.2398 +    *dstCharsPtr = numChars;
  1.2399 +    return result;
  1.2400 +}
  1.2401 +
  1.2402 +/*
  1.2403 + *-------------------------------------------------------------------------
  1.2404 + *
  1.2405 + * TableFromUtfProc --
  1.2406 + *
  1.2407 + *	Convert from UTF-8 into the encoding specified by the
  1.2408 + *	TableEncodingData.
  1.2409 + *
  1.2410 + * Results:
  1.2411 + *	Returns TCL_OK if conversion was successful.
  1.2412 + *
  1.2413 + * Side effects:
  1.2414 + *	None.
  1.2415 + *
  1.2416 + *-------------------------------------------------------------------------
  1.2417 + */
  1.2418 +
  1.2419 +static int 
  1.2420 +TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.2421 +	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.2422 +    ClientData clientData;	/* TableEncodingData that specifies
  1.2423 +				 * encoding. */
  1.2424 +    CONST char *src;		/* Source string in UTF-8. */
  1.2425 +    int srcLen;			/* Source string length in bytes. */
  1.2426 +    int flags;			/* Conversion control flags. */
  1.2427 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.2428 +				 * state information used during a piecewise
  1.2429 +				 * conversion.  Contents of statePtr are
  1.2430 +				 * initialized and/or reset by conversion
  1.2431 +				 * routine under control of flags argument. */
  1.2432 +    char *dst;			/* Output buffer in which converted string
  1.2433 +				 * is stored. */
  1.2434 +    int dstLen;			/* The maximum length of output buffer in
  1.2435 +				 * bytes. */
  1.2436 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.2437 +				 * source string that were converted.  This
  1.2438 +				 * may be less than the original source length
  1.2439 +				 * if there was a problem converting some
  1.2440 +				 * source characters. */
  1.2441 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.2442 +				 * stored in the output buffer as a result of
  1.2443 +				 * the conversion. */
  1.2444 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.2445 +				 * correspond to the bytes stored in the
  1.2446 +				 * output buffer. */
  1.2447 +{
  1.2448 +    CONST char *srcStart, *srcEnd, *srcClose;
  1.2449 +    char *dstStart, *dstEnd, *prefixBytes;
  1.2450 +    Tcl_UniChar ch;
  1.2451 +    int result, len, word, numChars;
  1.2452 +    TableEncodingData *dataPtr;
  1.2453 +    unsigned short **fromUnicode;
  1.2454 +    
  1.2455 +    result = TCL_OK;    
  1.2456 +
  1.2457 +    dataPtr = (TableEncodingData *) clientData;
  1.2458 +    prefixBytes = dataPtr->prefixBytes;
  1.2459 +    fromUnicode = dataPtr->fromUnicode;
  1.2460 +    
  1.2461 +    srcStart = src;
  1.2462 +    srcEnd = src + srcLen;
  1.2463 +    srcClose = srcEnd;
  1.2464 +    if ((flags & TCL_ENCODING_END) == 0) {
  1.2465 +	srcClose -= TCL_UTF_MAX;
  1.2466 +    }
  1.2467 +
  1.2468 +    dstStart = dst;
  1.2469 +    dstEnd = dst + dstLen - 1;
  1.2470 +
  1.2471 +    for (numChars = 0; src < srcEnd; numChars++) {
  1.2472 +	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  1.2473 +	    /*
  1.2474 +	     * If there is more string to follow, this will ensure that the
  1.2475 +	     * last UTF-8 character in the source buffer hasn't been cut off.
  1.2476 +	     */
  1.2477 +
  1.2478 +	    result = TCL_CONVERT_MULTIBYTE;
  1.2479 +	    break;
  1.2480 +	}
  1.2481 +	len = TclUtfToUniChar(src, &ch);
  1.2482 +
  1.2483 +#if TCL_UTF_MAX > 3
  1.2484 +	/*
  1.2485 +	 * This prevents a crash condition.  More evaluation is required
  1.2486 +	 * for full support of int Tcl_UniChar. [Bug 1004065]
  1.2487 +	 */
  1.2488 +	if (ch & 0xffff0000) {
  1.2489 +	    word = 0;
  1.2490 +	} else
  1.2491 +#endif
  1.2492 +	    word = fromUnicode[(ch >> 8)][ch & 0xff];
  1.2493 +
  1.2494 +	if ((word == 0) && (ch != 0)) {
  1.2495 +	    if (flags & TCL_ENCODING_STOPONERROR) {
  1.2496 +		result = TCL_CONVERT_UNKNOWN;
  1.2497 +		break;
  1.2498 +	    }
  1.2499 +	    word = dataPtr->fallback; 
  1.2500 +	}
  1.2501 +	if (prefixBytes[(word >> 8)] != 0) {
  1.2502 +	    if (dst + 1 > dstEnd) {
  1.2503 +		result = TCL_CONVERT_NOSPACE;
  1.2504 +		break;
  1.2505 +	    }
  1.2506 +	    dst[0] = (char) (word >> 8);
  1.2507 +	    dst[1] = (char) word;
  1.2508 +	    dst += 2;
  1.2509 +	} else {
  1.2510 +	    if (dst > dstEnd) {
  1.2511 +		result = TCL_CONVERT_NOSPACE;
  1.2512 +		break;
  1.2513 +	    }
  1.2514 +	    dst[0] = (char) word;
  1.2515 +	    dst++;
  1.2516 +	} 
  1.2517 +	src += len;
  1.2518 +    }
  1.2519 +    *srcReadPtr = src - srcStart;
  1.2520 +    *dstWrotePtr = dst - dstStart;
  1.2521 +    *dstCharsPtr = numChars;
  1.2522 +    return result;
  1.2523 +}
  1.2524 +
  1.2525 +/*
  1.2526 + *---------------------------------------------------------------------------
  1.2527 + *
  1.2528 + * TableFreeProc --
  1.2529 + *
  1.2530 + *	This procedure is invoked when an encoding is deleted.  It deletes
  1.2531 + *	the memory used by the TableEncodingData.
  1.2532 + *
  1.2533 + * Results:
  1.2534 + *	None.
  1.2535 + *
  1.2536 + * Side effects:
  1.2537 + *	Memory freed.
  1.2538 + *
  1.2539 + *---------------------------------------------------------------------------
  1.2540 + */
  1.2541 +
  1.2542 +static void
  1.2543 +TableFreeProc(clientData)
  1.2544 +    ClientData clientData;	/* TableEncodingData that specifies
  1.2545 +				 * encoding. */
  1.2546 +{
  1.2547 +    TableEncodingData *dataPtr;
  1.2548 +
  1.2549 +    /*
  1.2550 +     * Make sure we aren't freeing twice on shutdown.  [Bug #219314]
  1.2551 +     */
  1.2552 +
  1.2553 +    dataPtr = (TableEncodingData *) clientData;
  1.2554 +    ckfree((char *) dataPtr->toUnicode);
  1.2555 +    ckfree((char *) dataPtr->fromUnicode);
  1.2556 +    ckfree((char *) dataPtr);
  1.2557 +}
  1.2558 +
  1.2559 +/*
  1.2560 + *-------------------------------------------------------------------------
  1.2561 + *
  1.2562 + * EscapeToUtfProc --
  1.2563 + *
  1.2564 + *	Convert from the encoding specified by the EscapeEncodingData into
  1.2565 + *	UTF-8.
  1.2566 + *
  1.2567 + * Results:
  1.2568 + *	Returns TCL_OK if conversion was successful.
  1.2569 + *
  1.2570 + * Side effects:
  1.2571 + *	None.
  1.2572 + *
  1.2573 + *-------------------------------------------------------------------------
  1.2574 + */
  1.2575 +
  1.2576 +static int 
  1.2577 +EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.2578 +	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.2579 +    ClientData clientData;	/* EscapeEncodingData that specifies
  1.2580 +				 * encoding. */
  1.2581 +    CONST char *src;		/* Source string in specified encoding. */
  1.2582 +    int srcLen;			/* Source string length in bytes. */
  1.2583 +    int flags;			/* Conversion control flags. */
  1.2584 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.2585 +				 * state information used during a piecewise
  1.2586 +				 * conversion.  Contents of statePtr are
  1.2587 +				 * initialized and/or reset by conversion
  1.2588 +				 * routine under control of flags argument. */
  1.2589 +    char *dst;			/* Output buffer in which converted string
  1.2590 +				 * is stored. */
  1.2591 +    int dstLen;			/* The maximum length of output buffer in
  1.2592 +				 * bytes. */
  1.2593 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.2594 +				 * source string that were converted.  This
  1.2595 +				 * may be less than the original source length
  1.2596 +				 * if there was a problem converting some
  1.2597 +				 * source characters. */
  1.2598 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.2599 +				 * stored in the output buffer as a result of
  1.2600 +				 * the conversion. */
  1.2601 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.2602 +				 * correspond to the bytes stored in the
  1.2603 +				 * output buffer. */
  1.2604 +{
  1.2605 +    EscapeEncodingData *dataPtr;
  1.2606 +    char *prefixBytes, *tablePrefixBytes;
  1.2607 +    unsigned short **tableToUnicode;
  1.2608 +    Encoding *encodingPtr;
  1.2609 +    int state, result, numChars;
  1.2610 +    CONST char *srcStart, *srcEnd;
  1.2611 +    char *dstStart, *dstEnd;
  1.2612 +
  1.2613 +    result = TCL_OK;
  1.2614 +
  1.2615 +    tablePrefixBytes = NULL;	/* lint. */
  1.2616 +    tableToUnicode = NULL;	/* lint. */
  1.2617 +
  1.2618 +    dataPtr = (EscapeEncodingData *) clientData;
  1.2619 +    prefixBytes = dataPtr->prefixBytes;
  1.2620 +    encodingPtr = NULL;
  1.2621 +
  1.2622 +    srcStart = src;
  1.2623 +    srcEnd = src + srcLen;
  1.2624 +
  1.2625 +    dstStart = dst;
  1.2626 +    dstEnd = dst + dstLen - TCL_UTF_MAX;
  1.2627 +
  1.2628 +    state = (int) *statePtr;
  1.2629 +    if (flags & TCL_ENCODING_START) {
  1.2630 +	state = 0;
  1.2631 +    }
  1.2632 +
  1.2633 +    for (numChars = 0; src < srcEnd; ) {
  1.2634 +	int byte, hi, lo, ch;
  1.2635 +
  1.2636 +        if (dst > dstEnd) {
  1.2637 +            result = TCL_CONVERT_NOSPACE;
  1.2638 +            break;
  1.2639 +        }
  1.2640 +	byte = *((unsigned char *) src);
  1.2641 +	if (prefixBytes[byte]) {
  1.2642 +	    unsigned int left, len, longest;
  1.2643 +	    int checked, i;
  1.2644 +	    EscapeSubTable *subTablePtr;
  1.2645 +	    
  1.2646 +	    /*
  1.2647 +	     * Saw the beginning of an escape sequence. 
  1.2648 +	     */
  1.2649 +	     
  1.2650 +	    left = srcEnd - src;
  1.2651 +	    len = dataPtr->initLen;
  1.2652 +	    longest = len;
  1.2653 +	    checked = 0;
  1.2654 +	    if (len <= left) {
  1.2655 +		checked++;
  1.2656 +		if ((len > 0) && 
  1.2657 +			(memcmp(src, dataPtr->init, len) == 0)) {
  1.2658 +		    /*
  1.2659 +		     * If we see initialization string, skip it, even if we're
  1.2660 +		     * not at the beginning of the buffer. 
  1.2661 +		     */
  1.2662 +		     
  1.2663 +		    src += len;
  1.2664 +		    continue;
  1.2665 +		}
  1.2666 +	    }
  1.2667 +	    len = dataPtr->finalLen;
  1.2668 +	    if (len > longest) {
  1.2669 +		longest = len;
  1.2670 +	    }
  1.2671 +	    if (len <= left) {
  1.2672 +		checked++;
  1.2673 +		if ((len > 0) && 
  1.2674 +			(memcmp(src, dataPtr->final, len) == 0)) {
  1.2675 +		    /*
  1.2676 +		     * If we see finalization string, skip it, even if we're
  1.2677 +		     * not at the end of the buffer. 
  1.2678 +		     */
  1.2679 +		     
  1.2680 +		    src += len;
  1.2681 +		    continue;
  1.2682 +		}
  1.2683 +	    }
  1.2684 +	    subTablePtr = dataPtr->subTables;
  1.2685 +	    for (i = 0; i < dataPtr->numSubTables; i++) {
  1.2686 +		len = subTablePtr->sequenceLen;
  1.2687 +		if (len > longest) {
  1.2688 +		    longest = len;
  1.2689 +		}
  1.2690 +		if (len <= left) {
  1.2691 +		    checked++;
  1.2692 +		    if ((len > 0) && 
  1.2693 +			    (memcmp(src, subTablePtr->sequence, len) == 0)) {
  1.2694 +			state = i;
  1.2695 +			encodingPtr = NULL;
  1.2696 +			subTablePtr = NULL;
  1.2697 +			src += len;
  1.2698 +			break;
  1.2699 +		    }
  1.2700 +		}
  1.2701 +		subTablePtr++;
  1.2702 +	    }
  1.2703 +	    if (subTablePtr == NULL) {
  1.2704 +		/*
  1.2705 +		 * A match was found, the escape sequence was consumed, and
  1.2706 +		 * the state was updated.
  1.2707 +		 */
  1.2708 +
  1.2709 +		continue;
  1.2710 +	    }
  1.2711 +
  1.2712 +	    /*
  1.2713 +	     * We have a split-up or unrecognized escape sequence.  If we
  1.2714 +	     * checked all the sequences, then it's a syntax error,
  1.2715 +	     * otherwise we need more bytes to determine a match.
  1.2716 +	     */
  1.2717 +
  1.2718 +	    if ((checked == dataPtr->numSubTables + 2)
  1.2719 +		    || (flags & TCL_ENCODING_END)) {
  1.2720 +		if ((flags & TCL_ENCODING_STOPONERROR) == 0) {
  1.2721 +		    /*
  1.2722 +		     * Skip the unknown escape sequence.
  1.2723 +		     */
  1.2724 +
  1.2725 +		    src += longest;
  1.2726 +		    continue;
  1.2727 +		}
  1.2728 +		result = TCL_CONVERT_SYNTAX;
  1.2729 +	    } else {
  1.2730 +		result = TCL_CONVERT_MULTIBYTE;
  1.2731 +	    }
  1.2732 +	    break;
  1.2733 +	}
  1.2734 +
  1.2735 +	if (encodingPtr == NULL) {
  1.2736 +	    TableEncodingData *tableDataPtr;
  1.2737 +
  1.2738 +	    encodingPtr = GetTableEncoding(dataPtr, state);
  1.2739 +	    tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  1.2740 +	    tablePrefixBytes = tableDataPtr->prefixBytes;
  1.2741 +	    tableToUnicode = tableDataPtr->toUnicode;
  1.2742 +	}
  1.2743 +	if (tablePrefixBytes[byte]) {
  1.2744 +	    src++;
  1.2745 +	    if (src >= srcEnd) {
  1.2746 +		src--;
  1.2747 +		result = TCL_CONVERT_MULTIBYTE;
  1.2748 +		break;
  1.2749 +	    }
  1.2750 +	    hi = byte;
  1.2751 +	    lo = *((unsigned char *) src);
  1.2752 +	} else {
  1.2753 +	    hi = 0;
  1.2754 +	    lo = byte;
  1.2755 +	}
  1.2756 +	ch = tableToUnicode[hi][lo];
  1.2757 +	dst += Tcl_UniCharToUtf(ch, dst);
  1.2758 +	src++;
  1.2759 +	numChars++;
  1.2760 +    }
  1.2761 +
  1.2762 +    *statePtr = (Tcl_EncodingState) state;
  1.2763 +    *srcReadPtr = src - srcStart;
  1.2764 +    *dstWrotePtr = dst - dstStart;
  1.2765 +    *dstCharsPtr = numChars;
  1.2766 +    return result;
  1.2767 +}
  1.2768 +
  1.2769 +/*
  1.2770 + *-------------------------------------------------------------------------
  1.2771 + *
  1.2772 + * EscapeFromUtfProc --
  1.2773 + *
  1.2774 + *	Convert from UTF-8 into the encoding specified by the
  1.2775 + *	EscapeEncodingData.
  1.2776 + *
  1.2777 + * Results:
  1.2778 + *	Returns TCL_OK if conversion was successful.
  1.2779 + *
  1.2780 + * Side effects:
  1.2781 + *	None.
  1.2782 + *
  1.2783 + *-------------------------------------------------------------------------
  1.2784 + */
  1.2785 +
  1.2786 +static int 
  1.2787 +EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1.2788 +	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1.2789 +    ClientData clientData;	/* EscapeEncodingData that specifies
  1.2790 +				 * encoding. */
  1.2791 +    CONST char *src;		/* Source string in UTF-8. */
  1.2792 +    int srcLen;			/* Source string length in bytes. */
  1.2793 +    int flags;			/* Conversion control flags. */
  1.2794 +    Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1.2795 +				 * state information used during a piecewise
  1.2796 +				 * conversion.  Contents of statePtr are
  1.2797 +				 * initialized and/or reset by conversion
  1.2798 +				 * routine under control of flags argument. */
  1.2799 +    char *dst;			/* Output buffer in which converted string
  1.2800 +				 * is stored. */
  1.2801 +    int dstLen;			/* The maximum length of output buffer in
  1.2802 +				 * bytes. */
  1.2803 +    int *srcReadPtr;		/* Filled with the number of bytes from the
  1.2804 +				 * source string that were converted.  This
  1.2805 +				 * may be less than the original source length
  1.2806 +				 * if there was a problem converting some
  1.2807 +				 * source characters. */
  1.2808 +    int *dstWrotePtr;		/* Filled with the number of bytes that were
  1.2809 +				 * stored in the output buffer as a result of
  1.2810 +				 * the conversion. */
  1.2811 +    int *dstCharsPtr;		/* Filled with the number of characters that
  1.2812 +				 * correspond to the bytes stored in the
  1.2813 +				 * output buffer. */
  1.2814 +{
  1.2815 +    EscapeEncodingData *dataPtr;
  1.2816 +    Encoding *encodingPtr;
  1.2817 +    CONST char *srcStart, *srcEnd, *srcClose;
  1.2818 +    char *dstStart, *dstEnd;
  1.2819 +    int state, result, numChars;
  1.2820 +    TableEncodingData *tableDataPtr;
  1.2821 +    char *tablePrefixBytes;
  1.2822 +    unsigned short **tableFromUnicode;
  1.2823 +    
  1.2824 +    result = TCL_OK;    
  1.2825 +
  1.2826 +    dataPtr = (EscapeEncodingData *) clientData;
  1.2827 +
  1.2828 +    srcStart = src;
  1.2829 +    srcEnd = src + srcLen;
  1.2830 +    srcClose = srcEnd;
  1.2831 +    if ((flags & TCL_ENCODING_END) == 0) {
  1.2832 +	srcClose -= TCL_UTF_MAX;
  1.2833 +    }
  1.2834 +
  1.2835 +    dstStart = dst;
  1.2836 +    dstEnd = dst + dstLen - 1;
  1.2837 +
  1.2838 +    /*
  1.2839 +     * RFC1468 states that the text starts in ASCII, and switches to Japanese
  1.2840 +     * characters, and that the text must end in ASCII. [Patch #474358]
  1.2841 +     */
  1.2842 +
  1.2843 +    if (flags & TCL_ENCODING_START) {
  1.2844 +	state = 0;
  1.2845 +	if ((dst + dataPtr->initLen) > dstEnd) {
  1.2846 +	    *srcReadPtr = 0;
  1.2847 +	    *dstWrotePtr = 0;
  1.2848 +	    return TCL_CONVERT_NOSPACE;
  1.2849 +	}
  1.2850 +	memcpy((VOID *) dst, (VOID *) dataPtr->init,
  1.2851 +		(size_t) dataPtr->initLen);
  1.2852 +	dst += dataPtr->initLen;
  1.2853 +    } else {
  1.2854 +        state = (int) *statePtr;
  1.2855 +    }
  1.2856 +
  1.2857 +    encodingPtr = GetTableEncoding(dataPtr, state);
  1.2858 +    tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  1.2859 +    tablePrefixBytes = tableDataPtr->prefixBytes;
  1.2860 +    tableFromUnicode = tableDataPtr->fromUnicode;
  1.2861 +
  1.2862 +    for (numChars = 0; src < srcEnd; numChars++) {
  1.2863 +	unsigned int len;
  1.2864 +	int word;
  1.2865 +	Tcl_UniChar ch;
  1.2866 +	
  1.2867 +	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  1.2868 +	    /*
  1.2869 +	     * If there is more string to follow, this will ensure that the
  1.2870 +	     * last UTF-8 character in the source buffer hasn't been cut off.
  1.2871 +	     */
  1.2872 +
  1.2873 +	    result = TCL_CONVERT_MULTIBYTE;
  1.2874 +	    break;
  1.2875 +	}
  1.2876 +	len = TclUtfToUniChar(src, &ch);
  1.2877 +	word = tableFromUnicode[(ch >> 8)][ch & 0xff];
  1.2878 +
  1.2879 +	if ((word == 0) && (ch != 0)) {
  1.2880 +	    int oldState;
  1.2881 +	    EscapeSubTable *subTablePtr;
  1.2882 +	    
  1.2883 +	    oldState = state;
  1.2884 +	    for (state = 0; state < dataPtr->numSubTables; state++) {
  1.2885 +		encodingPtr = GetTableEncoding(dataPtr, state);
  1.2886 +		tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  1.2887 +	    	word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff];
  1.2888 +		if (word != 0) {
  1.2889 +		    break;
  1.2890 +		}
  1.2891 +	    }
  1.2892 +
  1.2893 +	    if (word == 0) {
  1.2894 +		state = oldState;
  1.2895 +		if (flags & TCL_ENCODING_STOPONERROR) {
  1.2896 +		    result = TCL_CONVERT_UNKNOWN;
  1.2897 +		    break;
  1.2898 +		}
  1.2899 +		encodingPtr = GetTableEncoding(dataPtr, state);
  1.2900 +		tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  1.2901 +		word = tableDataPtr->fallback;
  1.2902 +	    } 
  1.2903 +	    
  1.2904 +	    tablePrefixBytes = tableDataPtr->prefixBytes;
  1.2905 +	    tableFromUnicode = tableDataPtr->fromUnicode;
  1.2906 +
  1.2907 +	    /*
  1.2908 +	     * The state variable has the value of oldState when word is 0.
  1.2909 +	     * In this case, the escape sequense should not be copied to dst 
  1.2910 +	     * because the current character set is not changed.
  1.2911 +	     */
  1.2912 +	    if (state != oldState) {
  1.2913 +		subTablePtr = &dataPtr->subTables[state];
  1.2914 +		if ((dst + subTablePtr->sequenceLen) > dstEnd) {
  1.2915 +		    /*
  1.2916 +		     * If there is no space to write the escape sequence, the
  1.2917 +		     * state variable must be changed to the value of oldState
  1.2918 +		     * variable because this escape sequence must be written
  1.2919 +		     * in the next conversion.
  1.2920 +		     */
  1.2921 +		    state = oldState;
  1.2922 +		    result = TCL_CONVERT_NOSPACE;
  1.2923 +		    break;
  1.2924 +		}
  1.2925 +		memcpy((VOID *) dst, (VOID *) subTablePtr->sequence,
  1.2926 +			(size_t) subTablePtr->sequenceLen);
  1.2927 +		dst += subTablePtr->sequenceLen;
  1.2928 +	    }
  1.2929 +	}
  1.2930 +
  1.2931 +	if (tablePrefixBytes[(word >> 8)] != 0) {
  1.2932 +	    if (dst + 1 > dstEnd) {
  1.2933 +		result = TCL_CONVERT_NOSPACE;
  1.2934 +		break;
  1.2935 +	    }
  1.2936 +	    dst[0] = (char) (word >> 8);
  1.2937 +	    dst[1] = (char) word;
  1.2938 +	    dst += 2;
  1.2939 +	} else {
  1.2940 +	    if (dst > dstEnd) {
  1.2941 +		result = TCL_CONVERT_NOSPACE;
  1.2942 +		break;
  1.2943 +	    }
  1.2944 +	    dst[0] = (char) word;
  1.2945 +	    dst++;
  1.2946 +	} 
  1.2947 +	src += len;
  1.2948 +    }
  1.2949 +
  1.2950 +    if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) {
  1.2951 +	unsigned int len = dataPtr->subTables[0].sequenceLen;
  1.2952 +	/*
  1.2953 +	 * [Bug 1516109].
  1.2954 +	 * Certain encodings like iso2022-jp need to write
  1.2955 +	 * an escape sequence after all characters have
  1.2956 +	 * been converted. This logic checks that enough
  1.2957 +	 * room is available in the buffer for the escape bytes.
  1.2958 +	 * The TCL_ENCODING_END flag is cleared after a final
  1.2959 +	 * escape sequence has been added to the buffer so
  1.2960 +	 * that another call to this method does not attempt
  1.2961 +	 * to append escape bytes a second time.
  1.2962 +	 */
  1.2963 +	if ((dst + dataPtr->finalLen + (state?len:0)) > dstEnd) {
  1.2964 +	    result = TCL_CONVERT_NOSPACE;
  1.2965 +	} else {
  1.2966 +	    if (state) {
  1.2967 +		memcpy((VOID *) dst, (VOID *) dataPtr->subTables[0].sequence,
  1.2968 +			(size_t) len);
  1.2969 +		dst += len;
  1.2970 +	    }
  1.2971 +	    memcpy((VOID *) dst, (VOID *) dataPtr->final,
  1.2972 +		    (size_t) dataPtr->finalLen);
  1.2973 +	    dst += dataPtr->finalLen;
  1.2974 +	    state &= ~TCL_ENCODING_END;
  1.2975 +	}
  1.2976 +    }
  1.2977 +
  1.2978 +    *statePtr = (Tcl_EncodingState) state;
  1.2979 +    *srcReadPtr = src - srcStart;
  1.2980 +    *dstWrotePtr = dst - dstStart;
  1.2981 +    *dstCharsPtr = numChars;
  1.2982 +    return result;
  1.2983 +}
  1.2984 +
  1.2985 +/*
  1.2986 + *---------------------------------------------------------------------------
  1.2987 + *
  1.2988 + * EscapeFreeProc --
  1.2989 + *
  1.2990 + *	This procedure is invoked when an EscapeEncodingData encoding is 
  1.2991 + *	deleted.  It deletes the memory used by the encoding.
  1.2992 + *
  1.2993 + * Results:
  1.2994 + *	None.
  1.2995 + *
  1.2996 + * Side effects:
  1.2997 + *	Memory freed.
  1.2998 + *
  1.2999 + *---------------------------------------------------------------------------
  1.3000 + */
  1.3001 +
  1.3002 +static void
  1.3003 +EscapeFreeProc(clientData)
  1.3004 +    ClientData clientData;	/* EscapeEncodingData that specifies encoding. */
  1.3005 +{
  1.3006 +    EscapeEncodingData *dataPtr;
  1.3007 +    EscapeSubTable *subTablePtr;
  1.3008 +    int i;
  1.3009 +
  1.3010 +    dataPtr = (EscapeEncodingData *) clientData;
  1.3011 +    if (dataPtr == NULL) {
  1.3012 +	return;
  1.3013 +    }
  1.3014 +    subTablePtr = dataPtr->subTables;
  1.3015 +    for (i = 0; i < dataPtr->numSubTables; i++) {
  1.3016 +	FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr);
  1.3017 +	subTablePtr++;
  1.3018 +    }
  1.3019 +    ckfree((char *) dataPtr);
  1.3020 +}
  1.3021 +
  1.3022 +/*
  1.3023 + *---------------------------------------------------------------------------
  1.3024 + *
  1.3025 + * GetTableEncoding --
  1.3026 + *
  1.3027 + *	Helper function for the EscapeEncodingData conversions.  Gets the
  1.3028 + *	encoding (of type TextEncodingData) that represents the specified
  1.3029 + *	state.
  1.3030 + *
  1.3031 + * Results:
  1.3032 + *	The return value is the encoding.
  1.3033 + *
  1.3034 + * Side effects:
  1.3035 + *	If the encoding that represents the specified state has not
  1.3036 + *	already been used by this EscapeEncoding, it will be loaded
  1.3037 + *	and cached in the dataPtr.
  1.3038 + *
  1.3039 + *---------------------------------------------------------------------------
  1.3040 + */
  1.3041 +
  1.3042 +static Encoding *
  1.3043 +GetTableEncoding(dataPtr, state)
  1.3044 +    EscapeEncodingData *dataPtr;/* Contains names of encodings. */
  1.3045 +    int state;			/* Index in dataPtr of desired Encoding. */
  1.3046 +{
  1.3047 +    EscapeSubTable *subTablePtr;
  1.3048 +    Encoding *encodingPtr;
  1.3049 +    
  1.3050 +    subTablePtr = &dataPtr->subTables[state];
  1.3051 +    encodingPtr = subTablePtr->encodingPtr;
  1.3052 +    if (encodingPtr == NULL) {
  1.3053 +	/*
  1.3054 +	 * Now that escape encodings load their sub-encodings first, and
  1.3055 +	 * fail to load if any sub-encodings are missing, this branch should
  1.3056 +	 * never happen.  
  1.3057 +	 */
  1.3058 +	encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name);
  1.3059 +	if ((encodingPtr == NULL) 
  1.3060 +		|| (encodingPtr->toUtfProc != TableToUtfProc)) {
  1.3061 +	    panic("EscapeToUtfProc: invalid sub table");
  1.3062 +	}
  1.3063 +	subTablePtr->encodingPtr = encodingPtr;
  1.3064 +    }
  1.3065 +    return encodingPtr;
  1.3066 +}
  1.3067 +
  1.3068 +/*
  1.3069 + *---------------------------------------------------------------------------
  1.3070 + *
  1.3071 + * unilen --
  1.3072 + *
  1.3073 + *	A helper function for the Tcl_ExternalToUtf functions.  This
  1.3074 + *	function is similar to strlen for double-byte characters: it
  1.3075 + *	returns the number of bytes in a 0x0000 terminated string.
  1.3076 + *
  1.3077 + * Results:
  1.3078 + *	As above.
  1.3079 + *
  1.3080 + * Side effects:
  1.3081 + *	None.
  1.3082 + *
  1.3083 + *---------------------------------------------------------------------------
  1.3084 + */
  1.3085 +
  1.3086 +static size_t
  1.3087 +unilen(src)
  1.3088 +    CONST char *src;
  1.3089 +{
  1.3090 +    unsigned short *p;
  1.3091 +
  1.3092 +    p = (unsigned short *) src;
  1.3093 +    while (*p != 0x0000) {
  1.3094 +	p++;
  1.3095 +    }
  1.3096 +    return (char *) p - src;
  1.3097 +}
  1.3098 +
  1.3099 +/*
  1.3100 + *-------------------------------------------------------------------------
  1.3101 + *
  1.3102 + * TclFindEncodings --
  1.3103 + *
  1.3104 + *	Find and load the encoding file for this operating system.
  1.3105 + *	Before this is called, Tcl makes assumptions about the
  1.3106 + *	native string representation, but the true encoding is not
  1.3107 + *	assured.
  1.3108 + *
  1.3109 + * Results:
  1.3110 + *	Return result of TclpInitLibraryPath, which reports whether the
  1.3111 + *	path is clean (0) or dirty (1) UTF.
  1.3112 + *
  1.3113 + * Side effects:
  1.3114 + *	Varied, see the respective initialization routines.
  1.3115 + *
  1.3116 + *-------------------------------------------------------------------------
  1.3117 + */
  1.3118 +
  1.3119 +static int
  1.3120 +TclFindEncodings(argv0)
  1.3121 +    CONST char *argv0;		/* Name of executable from argv[0] to main()
  1.3122 +				 * in native multi-byte encoding. */
  1.3123 +{
  1.3124 +    int mustCleanUtf = 0;
  1.3125 +
  1.3126 +    if (encodingsInitialized == 0) {
  1.3127 +	/* 
  1.3128 +	 * Double check inside the mutex.  There may be calls
  1.3129 +	 * back into this routine from some of the procedures below.
  1.3130 +	 */
  1.3131 +
  1.3132 +	TclpInitLock();
  1.3133 +	if (encodingsInitialized == 0) {
  1.3134 +	    char *native;
  1.3135 +	    Tcl_Obj *pathPtr;
  1.3136 +	    Tcl_DString libPath, buffer;
  1.3137 +
  1.3138 +	    /*
  1.3139 +	     * Have to set this bit here to avoid deadlock with the
  1.3140 +	     * routines below us that call into TclInitSubsystems.
  1.3141 +	     */
  1.3142 +
  1.3143 +	    encodingsInitialized = 1;
  1.3144 +
  1.3145 +	    native = TclpFindExecutable(argv0);
  1.3146 +	    mustCleanUtf = TclpInitLibraryPath(native);
  1.3147 +
  1.3148 +	    /*
  1.3149 +	     * The library path was set in the TclpInitLibraryPath routine.
  1.3150 +	     * The string set is a dirty UTF string.  To preserve the value
  1.3151 +	     * convert the UTF string back to native before setting the new
  1.3152 +	     * default encoding.
  1.3153 +	     */
  1.3154 +
  1.3155 +	    pathPtr = TclGetLibraryPath();
  1.3156 +	    if ((pathPtr != NULL) && mustCleanUtf) {
  1.3157 +		Tcl_UtfToExternalDString(NULL, Tcl_GetString(pathPtr), -1,
  1.3158 +			&libPath);
  1.3159 +	    }
  1.3160 +
  1.3161 +	    TclpSetInitialEncodings();
  1.3162 +
  1.3163 +	    /*
  1.3164 +	     * Now convert the native string back to UTF.
  1.3165 +	     */
  1.3166 +
  1.3167 +	    if ((pathPtr != NULL) && mustCleanUtf) {
  1.3168 +		Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&libPath), -1,
  1.3169 +			&buffer);
  1.3170 +		pathPtr = Tcl_NewStringObj(Tcl_DStringValue(&buffer), -1);
  1.3171 +		TclSetLibraryPath(pathPtr);
  1.3172 +
  1.3173 +		Tcl_DStringFree(&libPath);
  1.3174 +		Tcl_DStringFree(&buffer);
  1.3175 +	    }
  1.3176 +	}
  1.3177 +	TclpInitUnlock();
  1.3178 +    }
  1.3179 +
  1.3180 +    return mustCleanUtf;
  1.3181 +}