os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclEncoding.c
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  * tclEncoding.c --
     3  *
     4  *	Contains the implementation of the encoding conversion package.
     5  *
     6  * Copyright (c) 1996-1998 Sun Microsystems, Inc.
     7  * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.  
     8  *
     9  * See the file "license.terms" for information on usage and redistribution
    10  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
    11  *
    12  * RCS: @(#) $Id: tclEncoding.c,v 1.16.2.14 2007/02/12 19:25:42 andreas_kupries Exp $
    13  */
    14 
    15 #include "tclInt.h"
    16 #include "tclPort.h"
    17 #if defined(__SYMBIAN32__) 
    18 #include "tclSymbianGlobals.h"
    19 #endif 
    20 
    21 typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src));
    22 
    23 /*
    24  * The following data structure represents an encoding, which describes how
    25  * to convert between various character sets and UTF-8.
    26  */
    27 
    28 typedef struct Encoding {
    29     char *name;			/* Name of encoding.  Malloced because (1)
    30 				 * hash table entry that owns this encoding
    31 				 * may be freed prior to this encoding being
    32 				 * freed, (2) string passed in the
    33 				 * Tcl_EncodingType structure may not be
    34 				 * persistent. */
    35     Tcl_EncodingConvertProc *toUtfProc;
    36 				/* Procedure to convert from external
    37 				 * encoding into UTF-8. */
    38     Tcl_EncodingConvertProc *fromUtfProc;
    39 				/* Procedure to convert from UTF-8 into
    40 				 * external encoding. */
    41     Tcl_EncodingFreeProc *freeProc;
    42 				/* If non-NULL, procedure to call when this
    43 				 * encoding is deleted. */
    44     int nullSize;		/* Number of 0x00 bytes that signify
    45 				 * end-of-string in this encoding.  This
    46 				 * number is used to determine the source
    47 				 * string length when the srcLen argument is
    48 				 * negative.  This number can be 1 or 2. */
    49     ClientData clientData;	/* Arbitrary value associated with encoding
    50 				 * type.  Passed to conversion procedures. */
    51     LengthProc *lengthProc;	/* Function to compute length of
    52 				 * null-terminated strings in this encoding.
    53 				 * If nullSize is 1, this is strlen; if
    54 				 * nullSize is 2, this is a function that
    55 				 * returns the number of bytes in a 0x0000
    56 				 * terminated string. */
    57     int refCount;		/* Number of uses of this structure. */
    58     Tcl_HashEntry *hPtr;	/* Hash table entry that owns this encoding. */
    59 } Encoding;
    60 
    61 /*
    62  * The following structure is the clientData for a dynamically-loaded,
    63  * table-driven encoding created by LoadTableEncoding().  It maps between
    64  * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only)
    65  * encoding.
    66  */
    67 
    68 typedef struct TableEncodingData {
    69     int fallback;		/* Character (in this encoding) to
    70 				 * substitute when this encoding cannot
    71 				 * represent a UTF-8 character. */
    72     char prefixBytes[256];	/* If a byte in the input stream is a lead
    73 				 * byte for a 2-byte sequence, the
    74 				 * corresponding entry in this array is 1,
    75 				 * otherwise it is 0. */
    76     unsigned short **toUnicode;	/* Two dimensional sparse matrix to map
    77 				 * characters from the encoding to Unicode.
    78 				 * Each element of the toUnicode array points
    79 				 * to an array of 256 shorts.  If there is no
    80 				 * corresponding character in Unicode, the
    81 				 * value in the matrix is 0x0000.  malloc'd. */
    82     unsigned short **fromUnicode;
    83 				/* Two dimensional sparse matrix to map
    84 				 * characters from Unicode to the encoding.
    85 				 * Each element of the fromUnicode array
    86 				 * points to an array of 256 shorts.  If there
    87 				 * is no corresponding character the encoding,
    88 				 * the value in the matrix is 0x0000.
    89 				 * malloc'd. */
    90 } TableEncodingData;
    91 
    92 /*
    93  * The following structures is the clientData for a dynamically-loaded,
    94  * escape-driven encoding that is itself comprised of other simpler
    95  * encodings.  An example is "iso-2022-jp", which uses escape sequences to
    96  * switch between ascii, jis0208, jis0212, gb2312, and ksc5601.  Note that
    97  * "escape-driven" does not necessarily mean that the ESCAPE character is
    98  * the character used for switching character sets.
    99  */
   100 
   101 typedef struct EscapeSubTable {
   102     unsigned int sequenceLen;	/* Length of following string. */
   103     char sequence[16];		/* Escape code that marks this encoding. */
   104     char name[32];		/* Name for encoding. */
   105     Encoding *encodingPtr;	/* Encoding loaded using above name, or NULL
   106 				 * if this sub-encoding has not been needed
   107 				 * yet. */
   108 } EscapeSubTable;
   109 
   110 typedef struct EscapeEncodingData {
   111     int fallback;		/* Character (in this encoding) to
   112 				 * substitute when this encoding cannot
   113 				 * represent a UTF-8 character. */
   114     unsigned int initLen;	/* Length of following string. */
   115     char init[16];		/* String to emit or expect before first char
   116 				 * in conversion. */
   117     unsigned int finalLen;	/* Length of following string. */
   118     char final[16];		/* String to emit or expect after last char
   119 				 * in conversion. */
   120     char prefixBytes[256];	/* If a byte in the input stream is the 
   121 				 * first character of one of the escape 
   122 				 * sequences in the following array, the 
   123 				 * corresponding entry in this array is 1,
   124 				 * otherwise it is 0. */
   125     int numSubTables;		/* Length of following array. */
   126     EscapeSubTable subTables[1];/* Information about each EscapeSubTable
   127 				 * used by this encoding type.  The actual 
   128 				 * size will be as large as necessary to 
   129 				 * hold all EscapeSubTables. */
   130 } EscapeEncodingData;
   131 
   132 /*
   133  * Constants used when loading an encoding file to identify the type of the
   134  * file.
   135  */
   136 
   137 #define ENCODING_SINGLEBYTE	0
   138 #define ENCODING_DOUBLEBYTE	1
   139 #define ENCODING_MULTIBYTE	2
   140 #define ENCODING_ESCAPE		3
   141 
   142 #if !defined(__SYMBIAN32__) || !defined(__WINSCW__)
   143 /*
   144  * Initialize the default encoding directory.  If this variable contains
   145  * a non NULL value, it will be the first path used to locate the
   146  * system encoding files.
   147  */
   148 
   149 char *tclDefaultEncodingDir = NULL;
   150 
   151 static int encodingsInitialized  = 0;
   152 
   153 /*
   154  * Hash table that keeps track of all loaded Encodings.  Keys are
   155  * the string names that represent the encoding, values are (Encoding *).
   156  */
   157  
   158 static Tcl_HashTable encodingTable;
   159 TCL_DECLARE_MUTEX(encodingMutex)
   160 
   161 /*
   162  * The following are used to hold the default and current system encodings.  
   163  * If NULL is passed to one of the conversion routines, the current setting 
   164  * of the system encoding will be used to perform the conversion.
   165  */
   166 
   167 static Tcl_Encoding defaultEncoding;
   168 static Tcl_Encoding systemEncoding;
   169 #endif
   170 /*
   171  * The following variable is used in the sparse matrix code for a
   172  * TableEncoding to represent a page in the table that has no entries.
   173  */
   174 
   175 static unsigned short emptyPage[256];
   176 
   177 /*
   178  * Procedures used only in this module.
   179  */
   180 
   181 static int		BinaryProc _ANSI_ARGS_((ClientData clientData,
   182 			    CONST char *src, int srcLen, int flags,
   183 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   184 			    int *srcReadPtr, int *dstWrotePtr,
   185 			    int *dstCharsPtr));
   186 static void		DupEncodingIntRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
   187 			    Tcl_Obj *dupPtr));
   188 static void		EscapeFreeProc _ANSI_ARGS_((ClientData clientData));
   189 static int		EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData,
   190 			    CONST char *src, int srcLen, int flags,
   191 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   192 			    int *srcReadPtr, int *dstWrotePtr,
   193 			    int *dstCharsPtr));
   194 static int		EscapeToUtfProc _ANSI_ARGS_((ClientData clientData,
   195 			    CONST char *src, int srcLen, int flags,
   196 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   197 			    int *srcReadPtr, int *dstWrotePtr,
   198 			    int *dstCharsPtr));
   199 static void		FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding));
   200 static void		FreeEncodingIntRep _ANSI_ARGS_((Tcl_Obj *objPtr));
   201 static Encoding *	GetTableEncoding _ANSI_ARGS_((
   202 			    EscapeEncodingData *dataPtr, int state));
   203 static Tcl_Encoding	LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp,
   204 			    CONST char *name));
   205 static Tcl_Encoding	LoadTableEncoding _ANSI_ARGS_((Tcl_Interp *interp,
   206 			    CONST char *name, int type, Tcl_Channel chan));
   207 static Tcl_Encoding	LoadEscapeEncoding _ANSI_ARGS_((CONST char *name, 
   208 			    Tcl_Channel chan));
   209 static Tcl_Channel	OpenEncodingFile _ANSI_ARGS_((CONST char *dir,
   210 			    CONST char *name));
   211 static void		TableFreeProc _ANSI_ARGS_((ClientData clientData));
   212 static int		TableFromUtfProc _ANSI_ARGS_((ClientData clientData,
   213 			    CONST char *src, int srcLen, int flags,
   214 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   215 			    int *srcReadPtr, int *dstWrotePtr,
   216 			    int *dstCharsPtr));
   217 static int		TableToUtfProc _ANSI_ARGS_((ClientData clientData,
   218 			    CONST char *src, int srcLen, int flags,
   219 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   220 			    int *srcReadPtr, int *dstWrotePtr,
   221 			    int *dstCharsPtr));
   222 static size_t		unilen _ANSI_ARGS_((CONST char *src));
   223 static int		UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData,
   224 			    CONST char *src, int srcLen, int flags,
   225 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   226 			    int *srcReadPtr, int *dstWrotePtr,
   227 			    int *dstCharsPtr));
   228 static int		UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData,
   229 			    CONST char *src, int srcLen, int flags,
   230 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   231 			    int *srcReadPtr, int *dstWrotePtr,
   232 			    int *dstCharsPtr));
   233 static int		UtfToUtfProc _ANSI_ARGS_((ClientData clientData,
   234 			    CONST char *src, int srcLen, int flags,
   235 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   236 			    int *srcReadPtr, int *dstWrotePtr,
   237 			    int *dstCharsPtr, int pureNullMode));
   238 static int		UtfIntToUtfExtProc _ANSI_ARGS_((ClientData clientData,
   239 			    CONST char *src, int srcLen, int flags,
   240 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   241 			    int *srcReadPtr, int *dstWrotePtr,
   242 			    int *dstCharsPtr));
   243 static int		UtfExtToUtfIntProc _ANSI_ARGS_((ClientData clientData,
   244 			    CONST char *src, int srcLen, int flags,
   245 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
   246 			    int *srcReadPtr, int *dstWrotePtr,
   247 			    int *dstCharsPtr));
   248 static int		TclFindEncodings _ANSI_ARGS_((CONST char *argv0));
   249 
   250 /*
   251  * A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep.
   252  * This should help the lifetime of encodings be more useful.  
   253  * See concerns raised in [Bug 1077262].
   254  */
   255 
   256 static Tcl_ObjType EncodingType = {
   257     "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
   258 };
   259 
   260 
   261 /*
   262  *----------------------------------------------------------------------
   263  *
   264  * TclGetEncodingFromObj --
   265  *
   266  *      Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr),
   267  *      if possible, and returns TCL_OK.  If no such encoding exists,
   268  *      TCL_ERROR is returned, and if interp is non-NULL, an error message
   269  *      is written there.
   270  *
   271  * Results:
   272  *      Standard Tcl return code.
   273  *
   274  * Side effects:
   275  * 	Caches the Tcl_Encoding value as the internal rep of (*objPtr).
   276  *
   277  *----------------------------------------------------------------------
   278  */
   279 int 
   280 TclGetEncodingFromObj(interp, objPtr, encodingPtr)
   281     Tcl_Interp *interp;
   282     Tcl_Obj *objPtr;
   283     Tcl_Encoding *encodingPtr;
   284 {
   285     CONST char *name = Tcl_GetString(objPtr);
   286     if (objPtr->typePtr != &EncodingType) {
   287 	Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
   288 
   289 	if (encoding == NULL) {
   290 	    return TCL_ERROR;
   291 	}
   292 	if (objPtr->typePtr && objPtr->typePtr->freeIntRepProc) {
   293 	    objPtr->typePtr->freeIntRepProc(objPtr);
   294 	}
   295 	objPtr->internalRep.otherValuePtr = (VOID *) encoding;
   296 	objPtr->typePtr = &EncodingType;
   297     }
   298     *encodingPtr = Tcl_GetEncoding(NULL, name);
   299     return TCL_OK;
   300 }
   301 
   302 /*
   303  *----------------------------------------------------------------------
   304  *
   305  * FreeEncodingIntRep --
   306  *
   307  *      The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType.
   308  *
   309  *----------------------------------------------------------------------
   310  */
   311 static void
   312 FreeEncodingIntRep(objPtr)
   313     Tcl_Obj *objPtr;
   314 {
   315     Tcl_FreeEncoding((Tcl_Encoding) objPtr->internalRep.otherValuePtr);
   316 }
   317 
   318 /*
   319  *----------------------------------------------------------------------
   320  *
   321  * DupEncodingIntRep --
   322  *
   323  *      The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType.
   324  *
   325  *----------------------------------------------------------------------
   326  */
   327 static void
   328 DupEncodingIntRep(srcPtr, dupPtr)
   329     Tcl_Obj *srcPtr;
   330     Tcl_Obj *dupPtr;
   331 {
   332     dupPtr->internalRep.otherValuePtr = (VOID *)
   333 	    Tcl_GetEncoding(NULL, srcPtr->bytes);
   334 }
   335 
   336 /*
   337  *---------------------------------------------------------------------------
   338  *
   339  * TclInitEncodingSubsystem --
   340  *
   341  *	Initialize all resources used by this subsystem on a per-process
   342  *	basis.  
   343  *
   344  * Results:
   345  *	None.
   346  *
   347  * Side effects:
   348  *	Depends on the memory, object, and IO subsystems.
   349  *
   350  *---------------------------------------------------------------------------
   351  */
   352 
   353 void
   354 TclInitEncodingSubsystem()
   355 {
   356     Tcl_EncodingType type;
   357 
   358     Tcl_MutexLock(&encodingMutex);
   359     Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
   360     Tcl_MutexUnlock(&encodingMutex);
   361     
   362     /*
   363      * Create a few initial encodings.  Note that the UTF-8 to UTF-8 
   364      * translation is not a no-op, because it will turn a stream of
   365      * improperly formed UTF-8 into a properly formed stream.
   366      */
   367 
   368     type.encodingName	= "identity";
   369     type.toUtfProc	= BinaryProc;
   370     type.fromUtfProc	= BinaryProc;
   371     type.freeProc	= NULL;
   372     type.nullSize	= 1;
   373     type.clientData	= NULL;
   374 
   375     defaultEncoding	= Tcl_CreateEncoding(&type);
   376     systemEncoding	= Tcl_GetEncoding(NULL, type.encodingName);
   377 
   378     type.encodingName	= "utf-8";
   379     type.toUtfProc	= UtfExtToUtfIntProc;
   380     type.fromUtfProc	= UtfIntToUtfExtProc;
   381     type.freeProc	= NULL;
   382     type.nullSize	= 1;
   383     type.clientData	= NULL;
   384     Tcl_CreateEncoding(&type);
   385 
   386     type.encodingName   = "unicode";
   387     type.toUtfProc	= UnicodeToUtfProc;
   388     type.fromUtfProc    = UtfToUnicodeProc;
   389     type.freeProc	= NULL;
   390     type.nullSize	= 2;
   391     type.clientData	= NULL;
   392     Tcl_CreateEncoding(&type);
   393 }
   394 
   395 
   396 /*
   397  *----------------------------------------------------------------------
   398  *
   399  * TclFinalizeEncodingSubsystem --
   400  *
   401  *	Release the state associated with the encoding subsystem.
   402  *
   403  * Results:
   404  *	None.
   405  *
   406  * Side effects:
   407  *	Frees all of the encodings.
   408  *
   409  *----------------------------------------------------------------------
   410  */
   411 
   412 void
   413 TclFinalizeEncodingSubsystem()
   414 {
   415     Tcl_HashSearch search;
   416     Tcl_HashEntry *hPtr;
   417 
   418     Tcl_MutexLock(&encodingMutex);
   419     encodingsInitialized  = 0;
   420     FreeEncoding(systemEncoding);
   421     hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
   422     while (hPtr != NULL) {
   423 	/*
   424 	 * Call FreeEncoding instead of doing it directly to handle refcounts
   425 	 * like escape encodings use.  [Bug #524674]
   426 	 * Make sure to call Tcl_FirstHashEntry repeatedly so that all
   427 	 * encodings are eventually cleaned up.
   428 	 */
   429 	FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr));
   430 	hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
   431     }
   432     Tcl_DeleteHashTable(&encodingTable);
   433     Tcl_MutexUnlock(&encodingMutex);
   434 }
   435 
   436 /*
   437  *-------------------------------------------------------------------------
   438  *
   439  * Tcl_GetDefaultEncodingDir --
   440  *
   441  *
   442  * Results:
   443  *
   444  * Side effects:
   445  *
   446  *-------------------------------------------------------------------------
   447  */
   448 
   449 EXPORT_C CONST char *
   450 Tcl_GetDefaultEncodingDir()
   451 {
   452     return tclDefaultEncodingDir;
   453 }
   454 
   455 /*
   456  *-------------------------------------------------------------------------
   457  *
   458  * Tcl_SetDefaultEncodingDir --
   459  *
   460  *
   461  * Results:
   462  *
   463  * Side effects:
   464  *
   465  *-------------------------------------------------------------------------
   466  */
   467 
   468 EXPORT_C void
   469 Tcl_SetDefaultEncodingDir(path)
   470     CONST char *path;
   471 {
   472     tclDefaultEncodingDir = (char *)ckalloc((unsigned) strlen(path) + 1);
   473     strcpy(tclDefaultEncodingDir, path);
   474 }
   475 
   476 /*
   477  *-------------------------------------------------------------------------
   478  *
   479  * Tcl_GetEncoding --
   480  *
   481  *	Given the name of a encoding, find the corresponding Tcl_Encoding
   482  *	token.  If the encoding did not already exist, Tcl attempts to
   483  *	dynamically load an encoding by that name.
   484  *
   485  * Results:
   486  *	Returns a token that represents the encoding.  If the name didn't
   487  *	refer to any known or loadable encoding, NULL is returned.  If
   488  *	NULL was returned, an error message is left in interp's result
   489  *	object, unless interp was NULL.
   490  *
   491  * Side effects:
   492  *	The new encoding type is entered into a table visible to all
   493  *	interpreters, keyed off the encoding's name.  For each call to
   494  *	this procedure, there should eventually be a call to
   495  *	Tcl_FreeEncoding, so that the database can be cleaned up when
   496  *	encodings aren't needed anymore.
   497  *
   498  *-------------------------------------------------------------------------
   499  */
   500 
   501 EXPORT_C Tcl_Encoding
   502 Tcl_GetEncoding(interp, name)
   503     Tcl_Interp *interp;		/* Interp for error reporting, if not NULL. */
   504     CONST char *name;		/* The name of the desired encoding. */
   505 {
   506     Tcl_HashEntry *hPtr;
   507     Encoding *encodingPtr;
   508 
   509     Tcl_MutexLock(&encodingMutex);
   510     if (name == NULL) {
   511 	encodingPtr = (Encoding *) systemEncoding;
   512 	encodingPtr->refCount++;
   513 	Tcl_MutexUnlock(&encodingMutex);
   514 	return systemEncoding;
   515     }
   516 
   517     hPtr = Tcl_FindHashEntry(&encodingTable, name);
   518     if (hPtr != NULL) {
   519 	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
   520 	encodingPtr->refCount++;
   521 	Tcl_MutexUnlock(&encodingMutex);
   522 	return (Tcl_Encoding) encodingPtr;
   523     }
   524     Tcl_MutexUnlock(&encodingMutex);
   525     return LoadEncodingFile(interp, name);
   526 }
   527 
   528 /*
   529  *---------------------------------------------------------------------------
   530  *
   531  * Tcl_FreeEncoding --
   532  *
   533  *	This procedure is called to release an encoding allocated by
   534  *	Tcl_CreateEncoding() or Tcl_GetEncoding().
   535  *
   536  * Results:
   537  *	None.
   538  *
   539  * Side effects:
   540  *	The reference count associated with the encoding is decremented
   541  *	and the encoding may be deleted if nothing is using it anymore.
   542  *
   543  *---------------------------------------------------------------------------
   544  */
   545 
   546 EXPORT_C void
   547 Tcl_FreeEncoding(encoding)
   548     Tcl_Encoding encoding;
   549 {
   550     Tcl_MutexLock(&encodingMutex);
   551     FreeEncoding(encoding);
   552     Tcl_MutexUnlock(&encodingMutex);
   553 }
   554 
   555 /*
   556  *----------------------------------------------------------------------
   557  *
   558  * FreeEncoding --
   559  *
   560  *	This procedure is called to release an encoding by procedures
   561  *	that already have the encodingMutex.
   562  *
   563  * Results:
   564  *	None.
   565  *
   566  * Side effects:
   567  *	The reference count associated with the encoding is decremented
   568  *	and the encoding may be deleted if nothing is using it anymore.
   569  *
   570  *----------------------------------------------------------------------
   571  */
   572 
   573 static void
   574 FreeEncoding(encoding)
   575     Tcl_Encoding encoding;
   576 {
   577     Encoding *encodingPtr;
   578     
   579     encodingPtr = (Encoding *) encoding;
   580     if (encodingPtr == NULL) {
   581 	return;
   582     }
   583     encodingPtr->refCount--;
   584     if (encodingPtr->refCount == 0) {
   585 	if (encodingPtr->freeProc != NULL) {
   586 	    (*encodingPtr->freeProc)(encodingPtr->clientData);
   587 	}
   588 	if (encodingPtr->hPtr != NULL) {
   589 	    Tcl_DeleteHashEntry(encodingPtr->hPtr);
   590 	}
   591 	ckfree((char *) encodingPtr->name);
   592 	ckfree((char *) encodingPtr);
   593     }
   594 }
   595 
   596 /*
   597  *-------------------------------------------------------------------------
   598  *
   599  * Tcl_GetEncodingName --
   600  *
   601  *	Given an encoding, return the name that was used to constuct
   602  *	the encoding.
   603  *
   604  * Results:
   605  *	The name of the encoding.
   606  *
   607  * Side effects:
   608  *	None.
   609  *
   610  *---------------------------------------------------------------------------
   611  */
   612 
   613 EXPORT_C CONST char *
   614 Tcl_GetEncodingName(encoding)
   615     Tcl_Encoding encoding;	/* The encoding whose name to fetch. */
   616 {
   617     Encoding *encodingPtr;
   618 
   619     if (encoding == NULL) {
   620 	encoding = systemEncoding;
   621     }
   622     encodingPtr = (Encoding *) encoding;
   623     return encodingPtr->name;
   624 }
   625 
   626 /*
   627  *-------------------------------------------------------------------------
   628  *
   629  * Tcl_GetEncodingNames --
   630  *
   631  *	Get the list of all known encodings, including the ones stored
   632  *	as files on disk in the encoding path.
   633  *
   634  * Results:
   635  *	Modifies interp's result object to hold a list of all the available
   636  *	encodings.
   637  *
   638  * Side effects:
   639  *	None.
   640  *
   641  *-------------------------------------------------------------------------
   642  */
   643 
   644 EXPORT_C void
   645 Tcl_GetEncodingNames(interp)
   646     Tcl_Interp *interp;		/* Interp to hold result. */
   647 {
   648     Tcl_HashSearch search;
   649     Tcl_HashEntry *hPtr;
   650     Tcl_Obj *pathPtr, *resultPtr;
   651     int dummy;
   652 
   653     Tcl_HashTable table;
   654 
   655     Tcl_MutexLock(&encodingMutex);
   656     Tcl_InitHashTable(&table, TCL_STRING_KEYS);
   657     hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
   658     while (hPtr != NULL) {
   659 	Encoding *encodingPtr;
   660 	
   661 	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
   662 	Tcl_CreateHashEntry(&table, encodingPtr->name, &dummy);
   663 	hPtr = Tcl_NextHashEntry(&search);
   664     }
   665     Tcl_MutexUnlock(&encodingMutex);
   666 
   667     pathPtr = TclGetLibraryPath();
   668     if (pathPtr != NULL) {
   669 	int i, objc;
   670 	Tcl_Obj **objv;
   671 	char globArgString[10];
   672 	Tcl_Obj* encodingObj = Tcl_NewStringObj("encoding",-1);
   673 	Tcl_IncrRefCount(encodingObj);
   674 	
   675 	objc = 0;
   676 	Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
   677 
   678 	for (i = 0; i < objc; i++) {
   679 	    Tcl_Obj *searchIn;
   680 	    
   681 	    /* 
   682 	     * Construct the path from the element of pathPtr,
   683 	     * joined with 'encoding'.
   684 	     */
   685 	    searchIn = Tcl_FSJoinToPath(objv[i],1,&encodingObj);
   686 	    Tcl_IncrRefCount(searchIn);
   687 	    Tcl_ResetResult(interp);
   688 
   689 	    /*
   690 	     * TclGlob() changes the contents of globArgString, which causes
   691 	     * a segfault if we pass in a pointer to non-writeable memory.
   692 	     * TclGlob() puts its results directly into interp.
   693 	     */
   694 
   695 	    strcpy(globArgString, "*.enc");
   696 	    /* 
   697 	     * The GLOBMODE_TAILS flag returns just the tail of each file
   698 	     * which is the encoding name with a .enc extension 
   699 	     */
   700 	    if ((TclGlob(interp, globArgString, searchIn, 
   701 			 TCL_GLOBMODE_TAILS, NULL) == TCL_OK)) {
   702 		int objc2 = 0;
   703 		Tcl_Obj **objv2;
   704 		int j;
   705 
   706 		Tcl_ListObjGetElements(NULL, Tcl_GetObjResult(interp), &objc2,
   707 			&objv2);
   708 
   709 		for (j = 0; j < objc2; j++) {
   710 		    int length;
   711 		    char *string;
   712 		    string = Tcl_GetStringFromObj(objv2[j], &length);
   713 		    length -= 4;
   714 		    if (length > 0) {
   715 			string[length] = '\0';
   716 			Tcl_CreateHashEntry(&table, string, &dummy);
   717 			string[length] = '.';
   718 		    }
   719 		}
   720 	    }
   721 	    Tcl_DecrRefCount(searchIn);
   722 	}
   723 	Tcl_DecrRefCount(encodingObj);
   724     }
   725 
   726     /*
   727      * Clear any values placed in the result by globbing.
   728      */
   729 
   730     Tcl_ResetResult(interp);
   731     resultPtr = Tcl_GetObjResult(interp);
   732 
   733     hPtr = Tcl_FirstHashEntry(&table, &search);
   734     while (hPtr != NULL) {
   735 	Tcl_Obj *strPtr;
   736 
   737 	strPtr = Tcl_NewStringObj(Tcl_GetHashKey(&table, hPtr), -1);
   738 	Tcl_ListObjAppendElement(NULL, resultPtr, strPtr);
   739 	hPtr = Tcl_NextHashEntry(&search);
   740     }
   741     Tcl_DeleteHashTable(&table);
   742 }
   743 
   744 /*
   745  *------------------------------------------------------------------------
   746  *
   747  * Tcl_SetSystemEncoding --
   748  *
   749  *	Sets the default encoding that should be used whenever the user
   750  *	passes a NULL value in to one of the conversion routines.
   751  *	If the supplied name is NULL, the system encoding is reset to the
   752  *	default system encoding.
   753  *
   754  * Results:
   755  *	The return value is TCL_OK if the system encoding was successfully
   756  *	set to the encoding specified by name, TCL_ERROR otherwise.  If
   757  *	TCL_ERROR is returned, an error message is left in interp's result
   758  *	object, unless interp was NULL.
   759  *
   760  * Side effects:
   761  *	The reference count of the new system encoding is incremented.
   762  *	The reference count of the old system encoding is decremented and 
   763  *	it may be freed.  
   764  *
   765  *------------------------------------------------------------------------
   766  */
   767 
   768 EXPORT_C int
   769 Tcl_SetSystemEncoding(interp, name)
   770     Tcl_Interp *interp;		/* Interp for error reporting, if not NULL. */
   771     CONST char *name;		/* The name of the desired encoding, or NULL
   772 				 * to reset to default encoding. */
   773 {
   774     Tcl_Encoding encoding;
   775     Encoding *encodingPtr;
   776 
   777     if (name == NULL) {
   778 	Tcl_MutexLock(&encodingMutex);
   779 	encoding = defaultEncoding;
   780 	encodingPtr = (Encoding *) encoding;
   781 	encodingPtr->refCount++;
   782 	Tcl_MutexUnlock(&encodingMutex);
   783     } else {
   784 	encoding = Tcl_GetEncoding(interp, name);
   785 	if (encoding == NULL) {
   786 	    return TCL_ERROR;
   787 	}
   788     }
   789 
   790     Tcl_MutexLock(&encodingMutex);
   791     FreeEncoding(systemEncoding);
   792     systemEncoding = encoding;
   793     Tcl_MutexUnlock(&encodingMutex);
   794 
   795     return TCL_OK;
   796 }
   797 
   798 /*
   799  *---------------------------------------------------------------------------
   800  *
   801  * Tcl_CreateEncoding --
   802  *
   803  *	This procedure is called to define a new encoding and the procedures
   804  *	that are used to convert between the specified encoding and Unicode.  
   805  *
   806  * Results:
   807  *	Returns a token that represents the encoding.  If an encoding with
   808  *	the same name already existed, the old encoding token remains
   809  *	valid and continues to behave as it used to, and will eventually
   810  *	be garbage collected when the last reference to it goes away.  Any
   811  *	subsequent calls to Tcl_GetEncoding with the specified name will
   812  *	retrieve the most recent encoding token.
   813  *
   814  * Side effects:
   815  *	The new encoding type is entered into a table visible to all
   816  *	interpreters, keyed off the encoding's name.  For each call to
   817  *	this procedure, there should eventually be a call to
   818  *	Tcl_FreeEncoding, so that the database can be cleaned up when
   819  *	encodings aren't needed anymore.
   820  *
   821  *---------------------------------------------------------------------------
   822  */ 
   823 
   824 EXPORT_C Tcl_Encoding
   825 Tcl_CreateEncoding(typePtr)
   826     Tcl_EncodingType *typePtr;	/* The encoding type. */
   827 {
   828     Tcl_HashEntry *hPtr;
   829     int new;
   830     Encoding *encodingPtr;
   831     char *name;
   832 
   833     Tcl_MutexLock(&encodingMutex);
   834     hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &new);
   835     if (new == 0) {
   836 	/*
   837 	 * Remove old encoding from hash table, but don't delete it until
   838 	 * last reference goes away.
   839 	 */
   840 	 
   841 	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
   842 	encodingPtr->hPtr = NULL;
   843     }
   844 
   845     name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1);
   846     
   847     encodingPtr = (Encoding *) ckalloc(sizeof(Encoding));
   848     encodingPtr->name		= strcpy(name, typePtr->encodingName);
   849     encodingPtr->toUtfProc	= typePtr->toUtfProc;
   850     encodingPtr->fromUtfProc	= typePtr->fromUtfProc;
   851     encodingPtr->freeProc	= typePtr->freeProc;
   852     encodingPtr->nullSize	= typePtr->nullSize;
   853     encodingPtr->clientData	= typePtr->clientData;
   854     if (typePtr->nullSize == 1) {
   855 	encodingPtr->lengthProc = (LengthProc *) strlen;
   856     } else {
   857 	encodingPtr->lengthProc = (LengthProc *) unilen;
   858     }
   859     encodingPtr->refCount	= 1;
   860     encodingPtr->hPtr		= hPtr;
   861     Tcl_SetHashValue(hPtr, encodingPtr);
   862 
   863     Tcl_MutexUnlock(&encodingMutex);
   864 
   865     return (Tcl_Encoding) encodingPtr;
   866 }
   867 
   868 /*
   869  *-------------------------------------------------------------------------
   870  *
   871  * Tcl_ExternalToUtfDString --
   872  *
   873  *	Convert a source buffer from the specified encoding into UTF-8.
   874  *	If any of the bytes in the source buffer are invalid or cannot
   875  *	be represented in the target encoding, a default fallback
   876  *	character will be substituted.
   877  *
   878  * Results:
   879  *	The converted bytes are stored in the DString, which is then NULL
   880  *	terminated.  The return value is a pointer to the value stored 
   881  *	in the DString.
   882  *
   883  * Side effects:
   884  *	None.
   885  *
   886  *-------------------------------------------------------------------------
   887  */
   888 
   889 EXPORT_C char * 
   890 Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)
   891     Tcl_Encoding encoding;	/* The encoding for the source string, or
   892 				 * NULL for the default system encoding. */
   893     CONST char *src;		/* Source string in specified encoding. */
   894     int srcLen;			/* Source string length in bytes, or < 0 for
   895 				 * encoding-specific string length. */
   896     Tcl_DString *dstPtr;	/* Uninitialized or free DString in which 
   897 				 * the converted string is stored. */
   898 {
   899     char *dst;
   900     Tcl_EncodingState state;
   901     Encoding *encodingPtr;
   902     int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
   903 
   904     Tcl_DStringInit(dstPtr);
   905     dst = Tcl_DStringValue(dstPtr);
   906     dstLen = dstPtr->spaceAvl - 1;
   907     
   908     if (encoding == NULL) {
   909 	encoding = systemEncoding;
   910     }
   911     encodingPtr = (Encoding *) encoding;
   912 
   913     if (src == NULL) {
   914 	srcLen = 0;
   915     } else if (srcLen < 0) {
   916 	srcLen = (*encodingPtr->lengthProc)(src);
   917     }
   918     flags = TCL_ENCODING_START | TCL_ENCODING_END;
   919     while (1) {
   920 	result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src,
   921 		srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
   922 		&dstChars);
   923 	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
   924 	if (result != TCL_CONVERT_NOSPACE) {
   925 	    Tcl_DStringSetLength(dstPtr, soFar);
   926 	    return Tcl_DStringValue(dstPtr);
   927 	}
   928 	flags &= ~TCL_ENCODING_START;
   929 	src += srcRead;
   930 	srcLen -= srcRead;
   931 	if (Tcl_DStringLength(dstPtr) == 0) {
   932 	    Tcl_DStringSetLength(dstPtr, dstLen);
   933 	}
   934 	Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
   935 	dst = Tcl_DStringValue(dstPtr) + soFar;
   936 	dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
   937     }
   938 }
   939 
   940 /*
   941  *-------------------------------------------------------------------------
   942  *
   943  * Tcl_ExternalToUtf --
   944  *
   945  *	Convert a source buffer from the specified encoding into UTF-8.
   946  *
   947  * Results:
   948  *	The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
   949  *	TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
   950  *	as documented in tcl.h.
   951  *
   952  * Side effects:
   953  *	The converted bytes are stored in the output buffer.  
   954  *
   955  *-------------------------------------------------------------------------
   956  */
   957 
   958 EXPORT_C int
   959 Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
   960 	dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
   961     Tcl_Interp *interp;		/* Interp for error return, if not NULL. */
   962     Tcl_Encoding encoding;	/* The encoding for the source string, or
   963 				 * NULL for the default system encoding. */
   964     CONST char *src;		/* Source string in specified encoding. */
   965     int srcLen;			/* Source string length in bytes, or < 0 for
   966 				 * encoding-specific string length. */
   967     int flags;			/* Conversion control flags. */
   968     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
   969 				 * state information used during a piecewise
   970 				 * conversion.  Contents of statePtr are
   971 				 * initialized and/or reset by conversion
   972 				 * routine under control of flags argument. */
   973     char *dst;			/* Output buffer in which converted string
   974 				 * is stored. */
   975     int dstLen;			/* The maximum length of output buffer in
   976 				 * bytes. */
   977     int *srcReadPtr;		/* Filled with the number of bytes from the
   978 				 * source string that were converted.  This
   979 				 * may be less than the original source length
   980 				 * if there was a problem converting some
   981 				 * source characters. */
   982     int *dstWrotePtr;		/* Filled with the number of bytes that were
   983 				 * stored in the output buffer as a result of
   984 				 * the conversion. */
   985     int *dstCharsPtr;		/* Filled with the number of characters that
   986 				 * correspond to the bytes stored in the
   987 				 * output buffer. */
   988 {
   989     Encoding *encodingPtr;
   990     int result, srcRead, dstWrote, dstChars;
   991     Tcl_EncodingState state;
   992     
   993     if (encoding == NULL) {
   994 	encoding = systemEncoding;
   995     }
   996     encodingPtr = (Encoding *) encoding;
   997 
   998     if (src == NULL) {
   999 	srcLen = 0;
  1000     } else if (srcLen < 0) {
  1001 	srcLen = (*encodingPtr->lengthProc)(src);
  1002     }
  1003     if (statePtr == NULL) {
  1004 	flags |= TCL_ENCODING_START | TCL_ENCODING_END;
  1005 	statePtr = &state;
  1006     }
  1007     if (srcReadPtr == NULL) {
  1008 	srcReadPtr = &srcRead;
  1009     }
  1010     if (dstWrotePtr == NULL) {
  1011 	dstWrotePtr = &dstWrote;
  1012     }
  1013     if (dstCharsPtr == NULL) {
  1014 	dstCharsPtr = &dstChars;
  1015     }
  1016 
  1017     /*
  1018      * If there are any null characters in the middle of the buffer, they will
  1019      * converted to the UTF-8 null character (\xC080).  To get the actual 
  1020      * \0 at the end of the destination buffer, we need to append it manually.
  1021      */
  1022 
  1023     dstLen--;
  1024     result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen,
  1025 	    flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
  1026 	    dstCharsPtr);
  1027     dst[*dstWrotePtr] = '\0';
  1028     return result;
  1029 }
  1030 
  1031 /*
  1032  *-------------------------------------------------------------------------
  1033  *
  1034  * Tcl_UtfToExternalDString --
  1035  *
  1036  *	Convert a source buffer from UTF-8 into the specified encoding.
  1037  *	If any of the bytes in the source buffer are invalid or cannot
  1038  *	be represented in the target encoding, a default fallback
  1039  *	character will be substituted.
  1040  *
  1041  * Results:
  1042  *	The converted bytes are stored in the DString, which is then
  1043  *	NULL terminated in an encoding-specific manner.  The return value 
  1044  *	is a pointer to the value stored in the DString.
  1045  *
  1046  * Side effects:
  1047  *	None.
  1048  *
  1049  *-------------------------------------------------------------------------
  1050  */
  1051 
  1052 EXPORT_C char *
  1053 Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)
  1054     Tcl_Encoding encoding;	/* The encoding for the converted string,
  1055 				 * or NULL for the default system encoding. */
  1056     CONST char *src;		/* Source string in UTF-8. */
  1057     int srcLen;			/* Source string length in bytes, or < 0 for
  1058 				 * strlen(). */
  1059     Tcl_DString *dstPtr;	/* Uninitialized or free DString in which 
  1060 				 * the converted string is stored. */
  1061 {
  1062     char *dst;
  1063     Tcl_EncodingState state;
  1064     Encoding *encodingPtr;
  1065     int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
  1066     
  1067     Tcl_DStringInit(dstPtr);
  1068     dst = Tcl_DStringValue(dstPtr);
  1069     dstLen = dstPtr->spaceAvl - 1;
  1070 
  1071     if (encoding == NULL) {
  1072 	encoding = systemEncoding;
  1073     }
  1074     encodingPtr = (Encoding *) encoding;
  1075 
  1076     if (src == NULL) {
  1077 	srcLen = 0;
  1078     } else if (srcLen < 0) {
  1079 	srcLen = strlen(src);
  1080     }
  1081     flags = TCL_ENCODING_START | TCL_ENCODING_END;
  1082     while (1) {
  1083 	result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src,
  1084 		srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
  1085 		&dstChars);
  1086 	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
  1087 	if (result != TCL_CONVERT_NOSPACE) {
  1088 	    if (encodingPtr->nullSize == 2) {
  1089 	        Tcl_DStringSetLength(dstPtr, soFar + 1);
  1090 	    }
  1091 	    Tcl_DStringSetLength(dstPtr, soFar);
  1092 	    return Tcl_DStringValue(dstPtr);
  1093 	}
  1094 	flags &= ~TCL_ENCODING_START;
  1095 	src += srcRead;
  1096 	srcLen -= srcRead;
  1097 	if (Tcl_DStringLength(dstPtr) == 0) {
  1098 	    Tcl_DStringSetLength(dstPtr, dstLen);
  1099 	}
  1100 	Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
  1101 	dst = Tcl_DStringValue(dstPtr) + soFar;
  1102 	dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
  1103     }
  1104 }
  1105 
  1106 /*
  1107  *-------------------------------------------------------------------------
  1108  *
  1109  * Tcl_UtfToExternal --
  1110  *
  1111  *	Convert a buffer from UTF-8 into the specified encoding.
  1112  *
  1113  * Results:
  1114  *	The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
  1115  *	TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
  1116  *	as documented in tcl.h.
  1117  *
  1118  * Side effects:
  1119  *	The converted bytes are stored in the output buffer.  
  1120  *
  1121  *-------------------------------------------------------------------------
  1122  */
  1123 
  1124 EXPORT_C int
  1125 Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst,
  1126 	dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
  1127     Tcl_Interp *interp;		/* Interp for error return, if not NULL. */
  1128     Tcl_Encoding encoding;	/* The encoding for the converted string,
  1129 				 * or NULL for the default system encoding. */
  1130     CONST char *src;		/* Source string in UTF-8. */
  1131     int srcLen;			/* Source string length in bytes, or < 0 for
  1132 				 * strlen(). */
  1133     int flags;			/* Conversion control flags. */
  1134     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1135 				 * state information used during a piecewise
  1136 				 * conversion.  Contents of statePtr are
  1137 				 * initialized and/or reset by conversion
  1138 				 * routine under control of flags argument. */
  1139     char *dst;			/* Output buffer in which converted string
  1140 				 * is stored. */
  1141     int dstLen;			/* The maximum length of output buffer in
  1142 				 * bytes. */
  1143     int *srcReadPtr;		/* Filled with the number of bytes from the
  1144 				 * source string that were converted.  This
  1145 				 * may be less than the original source length
  1146 				 * if there was a problem converting some
  1147 				 * source characters. */
  1148     int *dstWrotePtr;		/* Filled with the number of bytes that were
  1149 				 * stored in the output buffer as a result of
  1150 				 * the conversion. */
  1151     int *dstCharsPtr;		/* Filled with the number of characters that
  1152 				 * correspond to the bytes stored in the
  1153 				 * output buffer. */
  1154 {
  1155     Encoding *encodingPtr;
  1156     int result, srcRead, dstWrote, dstChars;
  1157     Tcl_EncodingState state;
  1158     
  1159     if (encoding == NULL) {
  1160 	encoding = systemEncoding;
  1161     }
  1162     encodingPtr = (Encoding *) encoding;
  1163 
  1164     if (src == NULL) {
  1165 	srcLen = 0;
  1166     } else if (srcLen < 0) {
  1167 	srcLen = strlen(src);
  1168     }
  1169     if (statePtr == NULL) {
  1170 	flags |= TCL_ENCODING_START | TCL_ENCODING_END;
  1171 	statePtr = &state;
  1172     }
  1173     if (srcReadPtr == NULL) {
  1174 	srcReadPtr = &srcRead;
  1175     }
  1176     if (dstWrotePtr == NULL) {
  1177 	dstWrotePtr = &dstWrote;
  1178     }
  1179     if (dstCharsPtr == NULL) {
  1180 	dstCharsPtr = &dstChars;
  1181     }
  1182 
  1183     dstLen -= encodingPtr->nullSize;
  1184     result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, srcLen,
  1185 	    flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
  1186 	    dstCharsPtr);
  1187     if (encodingPtr->nullSize == 2) {
  1188 	dst[*dstWrotePtr + 1] = '\0';
  1189     }
  1190     dst[*dstWrotePtr] = '\0';
  1191     
  1192     return result;
  1193 }
  1194 
  1195 /*
  1196  *---------------------------------------------------------------------------
  1197  *
  1198  * Tcl_FindExecutable --
  1199  *
  1200  *	This procedure computes the absolute path name of the current
  1201  *	application, given its argv[0] value.
  1202  *
  1203  * Results:
  1204  *	None.
  1205  *
  1206  * Side effects:
  1207  *	The variable tclExecutableName gets filled in with the file
  1208  *	name for the application, if we figured it out.  If we couldn't
  1209  *	figure it out, tclExecutableName is set to NULL.
  1210  *
  1211  *---------------------------------------------------------------------------
  1212  */
  1213 
  1214 EXPORT_C void
  1215 Tcl_FindExecutable(argv0)
  1216     CONST char *argv0;		/* The value of the application's argv[0]
  1217 				 * (native). */
  1218 {
  1219     int mustCleanUtf;
  1220     CONST char *name;
  1221     Tcl_DString buffer, nameString;
  1222 
  1223     TclInitSubsystems(argv0);
  1224 
  1225     if (argv0 == NULL) {
  1226 	goto done;
  1227     }
  1228     if (tclExecutableName != NULL) {
  1229 	ckfree(tclExecutableName);
  1230 	tclExecutableName = NULL;
  1231     }
  1232     if ((name = TclpFindExecutable(argv0)) == NULL) {
  1233 	goto done;
  1234     }
  1235 
  1236     /*
  1237      * The value returned from TclpNameOfExecutable is a UTF string that
  1238      * is possibly dirty depending on when it was initialized.
  1239      * TclFindEncodings will indicate whether we must "clean" the UTF (as
  1240      * reported by the underlying system).  To assure that the UTF string
  1241      * is a properly encoded native string for this system, convert the
  1242      * UTF string to the default native encoding before the default
  1243      * encoding is initialized.  Then, convert it back to UTF after the
  1244      * system encoding is loaded.
  1245      */
  1246     
  1247     Tcl_UtfToExternalDString(NULL, name, -1, &buffer);
  1248     mustCleanUtf = TclFindEncodings(argv0);
  1249 
  1250     /*
  1251      * Now it is OK to convert the native string back to UTF and set
  1252      * the value of the tclExecutableName.
  1253      */
  1254     
  1255     if (mustCleanUtf) {
  1256 	Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&buffer), -1,
  1257 		&nameString);
  1258 	tclExecutableName = (char *)
  1259 	    ckalloc((unsigned) (Tcl_DStringLength(&nameString) + 1));
  1260 	strcpy(tclExecutableName, Tcl_DStringValue(&nameString));
  1261 
  1262 	Tcl_DStringFree(&nameString);
  1263     } else {
  1264 	tclExecutableName = (char *) ckalloc((unsigned) (strlen(name) + 1));
  1265 	strcpy(tclExecutableName, name);
  1266     }
  1267     Tcl_DStringFree(&buffer);
  1268     return;
  1269 	
  1270     done:
  1271     (void) TclFindEncodings(argv0);
  1272 }
  1273 
  1274 /*
  1275  *---------------------------------------------------------------------------
  1276  *
  1277  * LoadEncodingFile --
  1278  *
  1279  *	Read a file that describes an encoding and create a new Encoding
  1280  *	from the data.  
  1281  *
  1282  * Results:
  1283  *	The return value is the newly loaded Encoding, or NULL if
  1284  *	the file didn't exist of was in the incorrect format.  If NULL was
  1285  *	returned, an error message is left in interp's result object,
  1286  *	unless interp was NULL.
  1287  *
  1288  * Side effects:
  1289  *	File read from disk.  
  1290  *
  1291  *---------------------------------------------------------------------------
  1292  */
  1293 
  1294 static Tcl_Encoding
  1295 LoadEncodingFile(interp, name)
  1296     Tcl_Interp *interp;		/* Interp for error reporting, if not NULL. */
  1297     CONST char *name;		/* The name of the encoding file on disk
  1298 				 * and also the name for new encoding. */
  1299 {
  1300     int objc, i, ch;
  1301     Tcl_Obj **objv;
  1302     Tcl_Obj *pathPtr;
  1303     Tcl_Channel chan;
  1304     Tcl_Encoding encoding;
  1305 
  1306     pathPtr = TclGetLibraryPath();
  1307     if (pathPtr == NULL) {
  1308 	goto unknown;
  1309     }
  1310     objc = 0;
  1311     Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
  1312 
  1313     chan = NULL;
  1314     for (i = 0; i < objc; i++) {
  1315 	chan = OpenEncodingFile(Tcl_GetString(objv[i]), name);
  1316 	if (chan != NULL) {
  1317 	    break;
  1318 	}
  1319     }
  1320 
  1321     if (chan == NULL) {
  1322 	goto unknown;
  1323     }
  1324 
  1325     Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8");
  1326 
  1327     while (1) {
  1328 	Tcl_DString ds;
  1329 
  1330 	Tcl_DStringInit(&ds);
  1331 	Tcl_Gets(chan, &ds);
  1332 	ch = Tcl_DStringValue(&ds)[0];
  1333 	Tcl_DStringFree(&ds);
  1334 	if (ch != '#') {
  1335 	    break;
  1336 	}
  1337     }
  1338 
  1339     encoding = NULL;
  1340     switch (ch) {
  1341 	case 'S': {
  1342 	    encoding = LoadTableEncoding(interp, name, ENCODING_SINGLEBYTE,
  1343 		    chan);
  1344 	    break;
  1345 	}
  1346 	case 'D': {
  1347 	    encoding = LoadTableEncoding(interp, name, ENCODING_DOUBLEBYTE,
  1348 		    chan);
  1349 	    break;
  1350 	}
  1351 	case 'M': {
  1352 	    encoding = LoadTableEncoding(interp, name, ENCODING_MULTIBYTE,
  1353 		    chan);
  1354 	    break;
  1355 	}
  1356 	case 'E': {
  1357 	    encoding = LoadEscapeEncoding(name, chan);
  1358 	    break;
  1359 	}
  1360     }
  1361     if ((encoding == NULL) && (interp != NULL)) {
  1362 	Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL);
  1363 	if (ch == 'E') {
  1364 	    Tcl_AppendResult(interp, " or missing sub-encoding", NULL);
  1365 	}
  1366     }
  1367     Tcl_Close(NULL, chan);
  1368     return encoding;
  1369 
  1370     unknown:
  1371     if (interp != NULL) {
  1372 	Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
  1373     }
  1374     return NULL;
  1375 }
  1376 
  1377 /*
  1378  *----------------------------------------------------------------------
  1379  *
  1380  * OpenEncodingFile --
  1381  *
  1382  *	Look for the file encoding/<name>.enc in the specified
  1383  *	directory.
  1384  *
  1385  * Results:
  1386  *	Returns an open file channel if the file exists.
  1387  *
  1388  * Side effects:
  1389  *	None.
  1390  *
  1391  *----------------------------------------------------------------------
  1392  */
  1393 
  1394 static Tcl_Channel
  1395 OpenEncodingFile(dir, name)
  1396     CONST char *dir;
  1397     CONST char *name;
  1398 
  1399 {
  1400     CONST char *argv[3];
  1401     Tcl_DString pathString;
  1402     CONST char *path;
  1403     Tcl_Channel chan;
  1404     Tcl_Obj *pathPtr;
  1405     
  1406     argv[0] = dir;
  1407     argv[1] = "encoding";
  1408     argv[2] = name;
  1409 
  1410     Tcl_DStringInit(&pathString);
  1411     Tcl_JoinPath(3, argv, &pathString);
  1412     path = Tcl_DStringAppend(&pathString, ".enc", -1);
  1413     pathPtr = Tcl_NewStringObj(path,-1);
  1414 
  1415     Tcl_IncrRefCount(pathPtr);
  1416     chan = Tcl_FSOpenFileChannel(NULL, pathPtr, "r", 0);
  1417     Tcl_DecrRefCount(pathPtr);
  1418 
  1419     Tcl_DStringFree(&pathString);
  1420 
  1421     return chan;
  1422 }
  1423 
  1424 /*
  1425  *-------------------------------------------------------------------------
  1426  *
  1427  * LoadTableEncoding --
  1428  *
  1429  *	Helper function for LoadEncodingTable().  Loads a table to that 
  1430  *	converts between Unicode and some other encoding and creates an 
  1431  *	encoding (using a TableEncoding structure) from that information.
  1432  *
  1433  *	File contains binary data, but begins with a marker to indicate 
  1434  *	byte-ordering, so that same binary file can be read on either
  1435  *	endian platforms.
  1436  *
  1437  * Results:
  1438  *	The return value is the new encoding, or NULL if the encoding 
  1439  *	could not be created (because the file contained invalid data).
  1440  *
  1441  * Side effects:
  1442  *	None.
  1443  *
  1444  *-------------------------------------------------------------------------
  1445  */
  1446 
  1447 static Tcl_Encoding
  1448 LoadTableEncoding(interp, name, type, chan)
  1449     Tcl_Interp *interp;		/* Interp for temporary obj while reading. */
  1450     CONST char *name;		/* Name for new encoding. */
  1451     int type;			/* Type of encoding (ENCODING_?????). */
  1452     Tcl_Channel chan;		/* File containing new encoding. */
  1453 {
  1454     Tcl_DString lineString;
  1455     Tcl_Obj *objPtr;
  1456     char *line;
  1457     int i, hi, lo, numPages, symbol, fallback;
  1458     unsigned char used[256];
  1459     unsigned int size;
  1460     TableEncodingData *dataPtr;
  1461     unsigned short *pageMemPtr;
  1462     Tcl_EncodingType encType;
  1463 
  1464     /*
  1465      * Speed over memory. Use a full 256 character table to decode hex
  1466      * sequences in the encoding files.
  1467      */
  1468 
  1469     static char staticHex[] = {
  1470       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*   0 ...  15 */
  1471       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  16 ...  31 */
  1472       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  32 ...  47 */
  1473       0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /*  48 ...  63 */
  1474       0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  64 ...  79 */
  1475       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  80 ...  95 */
  1476       0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*  96 ... 111 */
  1477       0,  1,  2,  3,  4,  5,  6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 112 ... 127 */
  1478       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128 ... 143 */
  1479       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144 ... 159 */
  1480       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160 ... 175 */
  1481       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176 ... 191 */
  1482       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192 ... 207 */
  1483       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 208 ... 223 */
  1484       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 224 ... 239 */
  1485       0,  0,  0,  0,  0,  0,  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 ... 255 */
  1486     };
  1487 
  1488     Tcl_DStringInit(&lineString);
  1489     Tcl_Gets(chan, &lineString);
  1490     line = Tcl_DStringValue(&lineString);
  1491 
  1492     fallback = (int) strtol(line, &line, 16);
  1493     symbol = (int) strtol(line, &line, 10);
  1494     numPages = (int) strtol(line, &line, 10);
  1495     Tcl_DStringFree(&lineString);
  1496 
  1497     if (numPages < 0) {
  1498 	numPages = 0;
  1499     } else if (numPages > 256) {
  1500 	numPages = 256;
  1501     }
  1502 
  1503     memset(used, 0, sizeof(used));
  1504 
  1505 #undef PAGESIZE
  1506 #define PAGESIZE    (256 * sizeof(unsigned short))
  1507 
  1508     dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData));
  1509     memset(dataPtr, 0, sizeof(TableEncodingData));
  1510 
  1511     dataPtr->fallback = fallback;
  1512 
  1513     /*
  1514      * Read the table that maps characters to Unicode.  Performs a single
  1515      * malloc to get the memory for the array and all the pages needed by
  1516      * the array.
  1517      */
  1518 
  1519     size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
  1520     dataPtr->toUnicode = (unsigned short **) ckalloc(size);
  1521     memset(dataPtr->toUnicode, 0, size);
  1522     pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256);
  1523 
  1524     if (interp == NULL) {
  1525 	objPtr = Tcl_NewObj();
  1526     } else {
  1527 	objPtr = Tcl_GetObjResult(interp);
  1528     }
  1529     for (i = 0; i < numPages; i++) {
  1530 	int ch;
  1531 	char *p;
  1532 
  1533 	Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0);
  1534 	p = Tcl_GetString(objPtr);
  1535 	hi = (staticHex[(unsigned int)p[0]] << 4) + staticHex[(unsigned int)p[1]];
  1536 	dataPtr->toUnicode[hi] = pageMemPtr;
  1537 	p += 2;
  1538 	for (lo = 0; lo < 256; lo++) {
  1539 	    if ((lo & 0x0f) == 0) {
  1540 		p++;
  1541 	    }
  1542 	    ch = (staticHex[(unsigned int)p[0]] << 12) + (staticHex[(unsigned int)p[1]] << 8)
  1543 		+ (staticHex[(unsigned int)p[2]] << 4) + staticHex[(unsigned int)p[3]];
  1544 	    if (ch != 0) {
  1545 		used[ch >> 8] = 1;
  1546 	    }
  1547 	    *pageMemPtr = (unsigned short) ch;
  1548 	    pageMemPtr++;
  1549 	    p += 4;
  1550 	}
  1551     }
  1552     if (interp == NULL) {
  1553 	Tcl_DecrRefCount(objPtr);
  1554     } else {
  1555 	Tcl_ResetResult(interp);
  1556     }
  1557 	
  1558     if (type == ENCODING_DOUBLEBYTE) {
  1559 	memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes));
  1560     } else {
  1561 	for (hi = 1; hi < 256; hi++) {
  1562 	    if (dataPtr->toUnicode[hi] != NULL) {
  1563 		dataPtr->prefixBytes[hi] = 1;
  1564 	    }
  1565 	}
  1566     }
  1567 
  1568     /*
  1569      * Invert toUnicode array to produce the fromUnicode array.  Performs a
  1570      * single malloc to get the memory for the array and all the pages
  1571      * needed by the array.  While reading in the toUnicode array, we
  1572      * remembered what pages that would be needed for the fromUnicode array.
  1573      */
  1574 
  1575     if (symbol) {
  1576 	used[0] = 1;
  1577     }
  1578     numPages = 0;
  1579     for (hi = 0; hi < 256; hi++) {
  1580 	if (used[hi]) {
  1581 	    numPages++;
  1582 	}
  1583     }
  1584     size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
  1585     dataPtr->fromUnicode = (unsigned short **) ckalloc(size);
  1586     memset(dataPtr->fromUnicode, 0, size);
  1587     pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256);
  1588 
  1589     for (hi = 0; hi < 256; hi++) {
  1590 	if (dataPtr->toUnicode[hi] == NULL) {
  1591 	    dataPtr->toUnicode[hi] = emptyPage;
  1592 	} else {
  1593 	    for (lo = 0; lo < 256; lo++) {
  1594 		int ch;
  1595 
  1596 		ch = dataPtr->toUnicode[hi][lo];
  1597 		if (ch != 0) {
  1598 		    unsigned short *page;
  1599 		    
  1600 		    page = dataPtr->fromUnicode[ch >> 8];
  1601 		    if (page == NULL) {
  1602 			page = pageMemPtr;
  1603 			pageMemPtr += 256;
  1604 			dataPtr->fromUnicode[ch >> 8] = page;
  1605 		    }
  1606 		    page[ch & 0xff] = (unsigned short) ((hi << 8) + lo);
  1607 		}
  1608 	    }
  1609 	}
  1610     }
  1611     if (type == ENCODING_MULTIBYTE) {
  1612 	/*
  1613 	 * If multibyte encodings don't have a backslash character, define
  1614 	 * one.  Otherwise, on Windows, native file names won't work because
  1615 	 * the backslash in the file name will map to the unknown character
  1616 	 * (question mark) when converting from UTF-8 to external encoding.
  1617 	 */
  1618 
  1619 	if (dataPtr->fromUnicode[0] != NULL) {
  1620 	    if (dataPtr->fromUnicode[0]['\\'] == '\0') {
  1621 		dataPtr->fromUnicode[0]['\\'] = '\\';
  1622 	    }
  1623 	}
  1624     }
  1625     if (symbol) {
  1626 	unsigned short *page;
  1627 	
  1628 	/*
  1629 	 * Make a special symbol encoding that not only maps the symbol
  1630 	 * characters from their Unicode code points down into page 0, but
  1631 	 * also ensure that the characters on page 0 map to themselves.
  1632 	 * This is so that a symbol font can be used to display a simple
  1633 	 * string like "abcd" and have alpha, beta, chi, delta show up,
  1634 	 * rather than have "unknown" chars show up because strictly
  1635 	 * speaking the symbol font doesn't have glyphs for those low ascii
  1636 	 * chars.
  1637 	 */
  1638 
  1639 	page = dataPtr->fromUnicode[0];
  1640 	if (page == NULL) {
  1641 	    page = pageMemPtr;
  1642 	    dataPtr->fromUnicode[0] = page;
  1643 	}
  1644 	for (lo = 0; lo < 256; lo++) {
  1645 	    if (dataPtr->toUnicode[0][lo] != 0) {
  1646 		page[lo] = (unsigned short) lo;
  1647 	    }
  1648 	}
  1649     }
  1650     for (hi = 0; hi < 256; hi++) {
  1651 	if (dataPtr->fromUnicode[hi] == NULL) {
  1652 	    dataPtr->fromUnicode[hi] = emptyPage;
  1653 	}
  1654     }
  1655     /*
  1656      * For trailing 'R'everse encoding, see [Patch #689341]
  1657      */
  1658     Tcl_DStringInit(&lineString);
  1659     do {
  1660 	int len;
  1661 	/* skip leading empty lines */
  1662 	while ((len = Tcl_Gets(chan, &lineString)) == 0)
  1663 	    ;
  1664 	if (len < 0) {
  1665 	    break;
  1666 	}
  1667 	line = Tcl_DStringValue(&lineString);
  1668 	if (line[0] != 'R') {
  1669 	    break;
  1670 	}
  1671 	for (Tcl_DStringSetLength(&lineString, 0);
  1672 	     (len = Tcl_Gets(chan, &lineString)) >= 0;
  1673 	     Tcl_DStringSetLength(&lineString, 0)) {
  1674 	    unsigned char* p;
  1675 	    int to, from;
  1676 	    if (len < 5) {
  1677 		continue;
  1678 	    }
  1679 	    p = (unsigned char*) Tcl_DStringValue(&lineString);
  1680 	    to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
  1681 		+ (staticHex[p[2]] << 4) + staticHex[p[3]];
  1682 	    if (to == 0) {
  1683 	    	continue;
  1684 	    }
  1685 	    for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) {
  1686 		from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
  1687 			+ (staticHex[p[2]] << 4) + staticHex[p[3]];
  1688 	    	if (from == 0) {
  1689 		    continue;
  1690 		}
  1691 		dataPtr->fromUnicode[from >> 8][from & 0xff] = to;
  1692 	    }
  1693 	}
  1694     } while (0);
  1695     Tcl_DStringFree(&lineString);
  1696 
  1697     encType.encodingName    = name;
  1698     encType.toUtfProc	    = TableToUtfProc;
  1699     encType.fromUtfProc	    = TableFromUtfProc;
  1700     encType.freeProc	    = TableFreeProc;
  1701     encType.nullSize	    = (type == ENCODING_DOUBLEBYTE) ? 2 : 1;
  1702     encType.clientData	    = (ClientData) dataPtr;
  1703     return Tcl_CreateEncoding(&encType);
  1704 }
  1705 
  1706 /*
  1707  *-------------------------------------------------------------------------
  1708  *
  1709  * LoadEscapeEncoding --
  1710  *
  1711  *	Helper function for LoadEncodingTable().  Loads a state machine
  1712  *	that converts between Unicode and some other encoding.  
  1713  *
  1714  *	File contains text data that describes the escape sequences that
  1715  *	are used to choose an encoding and the associated names for the 
  1716  *	sub-encodings.
  1717  *
  1718  * Results:
  1719  *	The return value is the new encoding, or NULL if the encoding 
  1720  *	could not be created (because the file contained invalid data).
  1721  *
  1722  * Side effects:
  1723  *	None.
  1724  *
  1725  *-------------------------------------------------------------------------
  1726  */
  1727 
  1728 static Tcl_Encoding
  1729 LoadEscapeEncoding(name, chan)
  1730     CONST char *name;		/* Name for new encoding. */
  1731     Tcl_Channel chan;		/* File containing new encoding. */
  1732 {
  1733     int i, missingSubEncoding = 0;
  1734     unsigned int size;
  1735     Tcl_DString escapeData;
  1736     char init[16], final[16];
  1737     EscapeEncodingData *dataPtr;
  1738     Tcl_EncodingType type;
  1739 
  1740     init[0] = '\0';
  1741     final[0] = '\0';
  1742     Tcl_DStringInit(&escapeData);
  1743 
  1744     while (1) {
  1745 	int argc;
  1746 	CONST char **argv;
  1747 	char *line;
  1748 	Tcl_DString lineString;
  1749 	
  1750 	Tcl_DStringInit(&lineString);
  1751 	if (Tcl_Gets(chan, &lineString) < 0) {
  1752 	    break;
  1753 	}
  1754 	line = Tcl_DStringValue(&lineString);
  1755         if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) {
  1756 	    continue;
  1757 	}
  1758 	if (argc >= 2) {
  1759 	    if (strcmp(argv[0], "name") == 0) {
  1760 		;
  1761 	    } else if (strcmp(argv[0], "init") == 0) {
  1762 		strncpy(init, argv[1], sizeof(init));
  1763 		init[sizeof(init) - 1] = '\0';
  1764 	    } else if (strcmp(argv[0], "final") == 0) {
  1765 		strncpy(final, argv[1], sizeof(final));
  1766 		final[sizeof(final) - 1] = '\0';
  1767 	    } else {
  1768 		EscapeSubTable est;
  1769 
  1770 		strncpy(est.sequence, argv[1], sizeof(est.sequence));
  1771 		est.sequence[sizeof(est.sequence) - 1] = '\0';
  1772 		est.sequenceLen = strlen(est.sequence);
  1773 
  1774 		strncpy(est.name, argv[0], sizeof(est.name));
  1775 		est.name[sizeof(est.name) - 1] = '\0';
  1776 
  1777 		/*
  1778 		 * Load the subencodings first so we're never stuck
  1779 		 * trying to use a half-loaded system encoding to
  1780 		 * open/read a *.enc file.
  1781 		 */
  1782 
  1783 		est.encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, est.name);
  1784 		if ((est.encodingPtr == NULL) 
  1785 			|| (est.encodingPtr->toUtfProc != TableToUtfProc)) {
  1786 		    missingSubEncoding = 1;
  1787 		}
  1788 		Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est));
  1789 	    }
  1790 	}
  1791 	ckfree((char *) argv);
  1792 	Tcl_DStringFree(&lineString);
  1793     }
  1794     if (missingSubEncoding) {
  1795 	Tcl_DStringFree(&escapeData);
  1796 	return NULL;
  1797     }
  1798 
  1799     size = sizeof(EscapeEncodingData)
  1800 	    - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData);
  1801     dataPtr = (EscapeEncodingData *) ckalloc(size);
  1802     dataPtr->initLen = strlen(init);
  1803     strcpy(dataPtr->init, init);
  1804     dataPtr->finalLen = strlen(final);
  1805     strcpy(dataPtr->final, final);
  1806     dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
  1807     memcpy((VOID *) dataPtr->subTables, (VOID *) Tcl_DStringValue(&escapeData),
  1808 	    (size_t) Tcl_DStringLength(&escapeData));
  1809     Tcl_DStringFree(&escapeData);
  1810 
  1811     memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes));
  1812     for (i = 0; i < dataPtr->numSubTables; i++) {
  1813 	dataPtr->prefixBytes[UCHAR(dataPtr->subTables[i].sequence[0])] = 1;
  1814     }
  1815     if (dataPtr->init[0] != '\0') {
  1816 	dataPtr->prefixBytes[UCHAR(dataPtr->init[0])] = 1;
  1817     }
  1818     if (dataPtr->final[0] != '\0') {
  1819 	dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1;
  1820     }
  1821 
  1822     type.encodingName	= name;
  1823     type.toUtfProc	= EscapeToUtfProc;
  1824     type.fromUtfProc    = EscapeFromUtfProc;
  1825     type.freeProc	= EscapeFreeProc;
  1826     type.nullSize	= 1;
  1827     type.clientData	= (ClientData) dataPtr;
  1828 
  1829     return Tcl_CreateEncoding(&type);
  1830 }
  1831 
  1832 /*
  1833  *-------------------------------------------------------------------------
  1834  *
  1835  * BinaryProc --
  1836  *
  1837  *	The default conversion when no other conversion is specified.
  1838  *	No translation is done; source bytes are copied directly to 
  1839  *	destination bytes.
  1840  *
  1841  * Results:
  1842  *	Returns TCL_OK if conversion was successful.
  1843  *
  1844  * Side effects:
  1845  *	None.
  1846  *
  1847  *-------------------------------------------------------------------------
  1848  */
  1849 
  1850 static int
  1851 BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1852 	srcReadPtr, dstWrotePtr, dstCharsPtr)
  1853     ClientData clientData;	/* Not used. */
  1854     CONST char *src;		/* Source string (unknown encoding). */
  1855     int srcLen;			/* Source string length in bytes. */
  1856     int flags;			/* Conversion control flags. */
  1857     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1858 				 * state information used during a piecewise
  1859 				 * conversion.  Contents of statePtr are
  1860 				 * initialized and/or reset by conversion
  1861 				 * routine under control of flags argument. */
  1862     char *dst;			/* Output buffer in which converted string
  1863 				 * is stored. */
  1864     int dstLen;			/* The maximum length of output buffer in
  1865 				 * bytes. */
  1866     int *srcReadPtr;		/* Filled with the number of bytes from the
  1867 				 * source string that were converted. */
  1868     int *dstWrotePtr;		/* Filled with the number of bytes that were
  1869 				 * stored in the output buffer as a result of
  1870 				 * the conversion. */
  1871     int *dstCharsPtr;		/* Filled with the number of characters that
  1872 				 * correspond to the bytes stored in the
  1873 				 * output buffer. */
  1874 {
  1875     int result;
  1876 
  1877     result = TCL_OK;
  1878     dstLen -= TCL_UTF_MAX - 1;
  1879     if (dstLen < 0) {
  1880 	dstLen = 0;
  1881     }
  1882     if (srcLen > dstLen) {
  1883 	srcLen = dstLen;
  1884 	result = TCL_CONVERT_NOSPACE;
  1885     }
  1886 
  1887     *srcReadPtr = srcLen;
  1888     *dstWrotePtr = srcLen;
  1889     *dstCharsPtr = srcLen;
  1890     memcpy((void *) dst, (void *) src, (size_t) srcLen);
  1891     return result;
  1892 }
  1893 
  1894 
  1895 /*
  1896  *-------------------------------------------------------------------------
  1897  *
  1898  * UtfExtToUtfIntProc --
  1899  *
  1900  *	Convert from UTF-8 to UTF-8. While converting null-bytes from
  1901  *	the Tcl's internal representation (0xc0, 0x80) to the official
  1902  *	representation (0x00). See UtfToUtfProc for details.
  1903  *
  1904  * Results:
  1905  *	Returns TCL_OK if conversion was successful.
  1906  *
  1907  * Side effects:
  1908  *	None.
  1909  *
  1910  *-------------------------------------------------------------------------
  1911  */
  1912 static int 
  1913 UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1914 	     srcReadPtr, dstWrotePtr, dstCharsPtr)
  1915     ClientData clientData;	/* Not used. */
  1916     CONST char *src;		/* Source string in UTF-8. */
  1917     int srcLen;			/* Source string length in bytes. */
  1918     int flags;			/* Conversion control flags. */
  1919     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1920 				 * state information used during a piecewise
  1921 				 * conversion.  Contents of statePtr are
  1922 				 * initialized and/or reset by conversion
  1923 				 * routine under control of flags argument. */
  1924     char *dst;			/* Output buffer in which converted string
  1925 				 * is stored. */
  1926     int dstLen;			/* The maximum length of output buffer in
  1927 				 * bytes. */
  1928     int *srcReadPtr;		/* Filled with the number of bytes from the
  1929 				 * source string that were converted.  This
  1930 				 * may be less than the original source length
  1931 				 * if there was a problem converting some
  1932 				 * source characters. */
  1933     int *dstWrotePtr;		/* Filled with the number of bytes that were
  1934 				 * stored in the output buffer as a result of
  1935 				 * the conversion. */
  1936     int *dstCharsPtr;		/* Filled with the number of characters that
  1937 				 * correspond to the bytes stored in the
  1938 				 * output buffer. */
  1939 {
  1940     return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1941 			srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
  1942 }
  1943 
  1944 /*
  1945  *-------------------------------------------------------------------------
  1946  *
  1947  * UtfExtToUtfIntProc --
  1948  *
  1949  *	Convert from UTF-8 to UTF-8 while converting null-bytes from
  1950  *	the official representation (0x00) to Tcl's internal
  1951  *	representation (0xc0, 0x80). See UtfToUtfProc for details.
  1952  *
  1953  * Results:
  1954  *	Returns TCL_OK if conversion was successful.
  1955  *
  1956  * Side effects:
  1957  *	None.
  1958  *
  1959  *-------------------------------------------------------------------------
  1960  */
  1961 static int 
  1962 UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1963 	     srcReadPtr, dstWrotePtr, dstCharsPtr)
  1964     ClientData clientData;	/* Not used. */
  1965     CONST char *src;		/* Source string in UTF-8. */
  1966     int srcLen;			/* Source string length in bytes. */
  1967     int flags;			/* Conversion control flags. */
  1968     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  1969 				 * state information used during a piecewise
  1970 				 * conversion.  Contents of statePtr are
  1971 				 * initialized and/or reset by conversion
  1972 				 * routine under control of flags argument. */
  1973     char *dst;			/* Output buffer in which converted string
  1974 				 * is stored. */
  1975     int dstLen;			/* The maximum length of output buffer in
  1976 				 * bytes. */
  1977     int *srcReadPtr;		/* Filled with the number of bytes from the
  1978 				 * source string that were converted.  This
  1979 				 * may be less than the original source length
  1980 				 * if there was a problem converting some
  1981 				 * source characters. */
  1982     int *dstWrotePtr;		/* Filled with the number of bytes that were
  1983 				 * stored in the output buffer as a result of
  1984 				 * the conversion. */
  1985     int *dstCharsPtr;		/* Filled with the number of characters that
  1986 				 * correspond to the bytes stored in the
  1987 				 * output buffer. */
  1988 {
  1989     return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  1990 			srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
  1991 }
  1992 
  1993 /*
  1994  *-------------------------------------------------------------------------
  1995  *
  1996  * UtfToUtfProc --
  1997  *
  1998  *	Convert from UTF-8 to UTF-8.  Note that the UTF-8 to UTF-8 
  1999  *	translation is not a no-op, because it will turn a stream of
  2000  *	improperly formed UTF-8 into a properly formed stream.
  2001  *
  2002  * Results:
  2003  *	Returns TCL_OK if conversion was successful.
  2004  *
  2005  * Side effects:
  2006  *	None.
  2007  *
  2008  *-------------------------------------------------------------------------
  2009  */
  2010 
  2011 static int 
  2012 UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  2013 	     srcReadPtr, dstWrotePtr, dstCharsPtr, pureNullMode)
  2014     ClientData clientData;	/* Not used. */
  2015     CONST char *src;		/* Source string in UTF-8. */
  2016     int srcLen;			/* Source string length in bytes. */
  2017     int flags;			/* Conversion control flags. */
  2018     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  2019 				 * state information used during a piecewise
  2020 				 * conversion.  Contents of statePtr are
  2021 				 * initialized and/or reset by conversion
  2022 				 * routine under control of flags argument. */
  2023     char *dst;			/* Output buffer in which converted string
  2024 				 * is stored. */
  2025     int dstLen;			/* The maximum length of output buffer in
  2026 				 * bytes. */
  2027     int *srcReadPtr;		/* Filled with the number of bytes from the
  2028 				 * source string that were converted.  This
  2029 				 * may be less than the original source length
  2030 				 * if there was a problem converting some
  2031 				 * source characters. */
  2032     int *dstWrotePtr;		/* Filled with the number of bytes that were
  2033 				 * stored in the output buffer as a result of
  2034 				 * the conversion. */
  2035     int *dstCharsPtr;		/* Filled with the number of characters that
  2036 				 * correspond to the bytes stored in the
  2037 				 * output buffer. */
  2038     int pureNullMode;		/* Convert embedded nulls from
  2039 				 * internal representation to real
  2040 				 * null-bytes or vice versa */
  2041 
  2042 {
  2043     CONST char *srcStart, *srcEnd, *srcClose;
  2044     char *dstStart, *dstEnd;
  2045     int result, numChars;
  2046     Tcl_UniChar ch;
  2047 
  2048     result = TCL_OK;
  2049     
  2050     srcStart = src;
  2051     srcEnd = src + srcLen;
  2052     srcClose = srcEnd;
  2053     if ((flags & TCL_ENCODING_END) == 0) {
  2054 	srcClose -= TCL_UTF_MAX;
  2055     }
  2056 
  2057     dstStart = dst;
  2058     dstEnd = dst + dstLen - TCL_UTF_MAX;
  2059 
  2060     for (numChars = 0; src < srcEnd; numChars++) {
  2061 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  2062 	    /*
  2063 	     * If there is more string to follow, this will ensure that the
  2064 	     * last UTF-8 character in the source buffer hasn't been cut off.
  2065 	     */
  2066 
  2067 	    result = TCL_CONVERT_MULTIBYTE;
  2068 	    break;
  2069 	}
  2070 	if (dst > dstEnd) {
  2071 	    result = TCL_CONVERT_NOSPACE;
  2072 	    break;
  2073 	}
  2074 	if (UCHAR(*src) < 0x80 &&
  2075 	    !(UCHAR(*src) == 0 && pureNullMode == 0)) {
  2076 	    /*
  2077 	     * Copy 7bit chatacters, but skip null-bytes when we are
  2078 	     * in input mode, so that they get converted to 0xc080.
  2079 	     */
  2080 	    *dst++ = *src++;
  2081 	} else if (pureNullMode == 1 &&
  2082 		   UCHAR(*src) == 0xc0 &&
  2083 		   UCHAR(*(src+1)) == 0x80) {
  2084 	    /* 
  2085 	     * Convert 0xc080 to real nulls when we are in output mode.
  2086 	     */
  2087 	    *dst++ = 0;
  2088 	    src += 2;
  2089 	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
  2090 	    /* Always check before using Tcl_UtfToUniChar. Not doing
  2091 	     * can so cause it run beyond the endof the buffer!  If we
  2092 	     * * happen such an incomplete char its byts are made to *
  2093 	     * represent themselves.
  2094 	     */
  2095 
  2096 	    ch = (Tcl_UniChar) *src;
  2097 	    src += 1;
  2098 	    dst += Tcl_UniCharToUtf(ch, dst);
  2099 	} else {
  2100 	    src += Tcl_UtfToUniChar(src, &ch);
  2101 	    dst += Tcl_UniCharToUtf(ch, dst);
  2102 	}
  2103     }
  2104 
  2105     *srcReadPtr  = src - srcStart;
  2106     *dstWrotePtr = dst - dstStart;
  2107     *dstCharsPtr = numChars;
  2108     return result;
  2109 }
  2110 
  2111 /*
  2112  *-------------------------------------------------------------------------
  2113  *
  2114  * UnicodeToUtfProc --
  2115  *
  2116  *	Convert from Unicode to UTF-8.
  2117  *
  2118  * Results:
  2119  *	Returns TCL_OK if conversion was successful.
  2120  *
  2121  * Side effects:
  2122  *	None.
  2123  *
  2124  *-------------------------------------------------------------------------
  2125  */
  2126 
  2127 static int 
  2128 UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  2129 	srcReadPtr, dstWrotePtr, dstCharsPtr)
  2130     ClientData clientData;	/* Not used. */
  2131     CONST char *src;		/* Source string in Unicode. */
  2132     int srcLen;			/* Source string length in bytes. */
  2133     int flags;			/* Conversion control flags. */
  2134     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  2135 				 * state information used during a piecewise
  2136 				 * conversion.  Contents of statePtr are
  2137 				 * initialized and/or reset by conversion
  2138 				 * routine under control of flags argument. */
  2139     char *dst;			/* Output buffer in which converted string
  2140 				 * is stored. */
  2141     int dstLen;			/* The maximum length of output buffer in
  2142 				 * bytes. */
  2143     int *srcReadPtr;		/* Filled with the number of bytes from the
  2144 				 * source string that were converted.  This
  2145 				 * may be less than the original source length
  2146 				 * if there was a problem converting some
  2147 				 * source characters. */
  2148     int *dstWrotePtr;		/* Filled with the number of bytes that were
  2149 				 * stored in the output buffer as a result of
  2150 				 * the conversion. */
  2151     int *dstCharsPtr;		/* Filled with the number of characters that
  2152 				 * correspond to the bytes stored in the
  2153 				 * output buffer. */
  2154 {
  2155     CONST char *srcStart, *srcEnd;
  2156     char *dstEnd, *dstStart;
  2157     int result, numChars;
  2158     Tcl_UniChar ch;
  2159 
  2160     result = TCL_OK;
  2161     if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
  2162 	result = TCL_CONVERT_MULTIBYTE;
  2163 	srcLen /= sizeof(Tcl_UniChar);
  2164 	srcLen *= sizeof(Tcl_UniChar);
  2165     }
  2166 
  2167     srcStart = src;
  2168     srcEnd = src + srcLen;
  2169 
  2170     dstStart = dst;
  2171     dstEnd = dst + dstLen - TCL_UTF_MAX;
  2172 
  2173     for (numChars = 0; src < srcEnd; numChars++) {
  2174 	if (dst > dstEnd) {
  2175 	    result = TCL_CONVERT_NOSPACE;
  2176 	    break;
  2177 	}
  2178 	/*
  2179 	 * Special case for 1-byte utf chars for speed.  Make sure we
  2180 	 * work with Tcl_UniChar-size data.
  2181 	 */
  2182 	ch = *(Tcl_UniChar *)src;
  2183 	if (ch && ch < 0x80) {
  2184 	    *dst++ = (ch & 0xFF);
  2185 	} else {
  2186 	    dst += Tcl_UniCharToUtf(ch, dst);
  2187 	}
  2188 	src += sizeof(Tcl_UniChar);
  2189     }
  2190 
  2191     *srcReadPtr = src - srcStart;
  2192     *dstWrotePtr = dst - dstStart;
  2193     *dstCharsPtr = numChars;
  2194     return result;
  2195 }
  2196 
  2197 /*
  2198  *-------------------------------------------------------------------------
  2199  *
  2200  * UtfToUnicodeProc --
  2201  *
  2202  *	Convert from UTF-8 to Unicode.
  2203  *
  2204  * Results:
  2205  *	Returns TCL_OK if conversion was successful.
  2206  *
  2207  * Side effects:
  2208  *	None.
  2209  *
  2210  *-------------------------------------------------------------------------
  2211  */
  2212 
  2213 static int 
  2214 UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  2215 	srcReadPtr, dstWrotePtr, dstCharsPtr)
  2216     ClientData clientData;	/* TableEncodingData that specifies encoding. */
  2217     CONST char *src;		/* Source string in UTF-8. */
  2218     int srcLen;			/* Source string length in bytes. */
  2219     int flags;			/* Conversion control flags. */
  2220     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  2221 				 * state information used during a piecewise
  2222 				 * conversion.  Contents of statePtr are
  2223 				 * initialized and/or reset by conversion
  2224 				 * routine under control of flags argument. */
  2225     char *dst;			/* Output buffer in which converted string
  2226 				 * is stored. */
  2227     int dstLen;			/* The maximum length of output buffer in
  2228 				 * bytes. */
  2229     int *srcReadPtr;		/* Filled with the number of bytes from the
  2230 				 * source string that were converted.  This
  2231 				 * may be less than the original source length
  2232 				 * if there was a problem converting some
  2233 				 * source characters. */
  2234     int *dstWrotePtr;		/* Filled with the number of bytes that were
  2235 				 * stored in the output buffer as a result of
  2236 				 * the conversion. */
  2237     int *dstCharsPtr;		/* Filled with the number of characters that
  2238 				 * correspond to the bytes stored in the
  2239 				 * output buffer. */
  2240 {
  2241     CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
  2242     int result, numChars;
  2243     Tcl_UniChar ch;
  2244 
  2245     srcStart = src;
  2246     srcEnd = src + srcLen;
  2247     srcClose = srcEnd;
  2248     if ((flags & TCL_ENCODING_END) == 0) {
  2249 	srcClose -= TCL_UTF_MAX;
  2250     }
  2251 
  2252     dstStart = dst;
  2253     dstEnd   = dst + dstLen - sizeof(Tcl_UniChar);
  2254 
  2255     result = TCL_OK;
  2256     for (numChars = 0; src < srcEnd; numChars++) {
  2257 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  2258 	    /*
  2259 	     * If there is more string to follow, this will ensure that the
  2260 	     * last UTF-8 character in the source buffer hasn't been cut off.
  2261 	     */
  2262 
  2263 	    result = TCL_CONVERT_MULTIBYTE;
  2264 	    break;
  2265 	}
  2266 	if (dst > dstEnd) {
  2267 	    result = TCL_CONVERT_NOSPACE;
  2268 	    break;
  2269         }
  2270 	src += TclUtfToUniChar(src, &ch);
  2271 	/*
  2272 	 * Need to handle this in a way that won't cause misalignment
  2273 	 * by casting dst to a Tcl_UniChar. [Bug 1122671]
  2274 	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
  2275 	 */
  2276 #ifdef WORDS_BIGENDIAN
  2277 	*dst++ = (ch >> 8);
  2278 	*dst++ = (ch & 0xFF);
  2279 #else
  2280 	*dst++ = (ch & 0xFF);
  2281 	*dst++ = (ch >> 8);
  2282 #endif
  2283     }
  2284     *srcReadPtr = src - srcStart;
  2285     *dstWrotePtr = dst - dstStart;
  2286     *dstCharsPtr = numChars;
  2287     return result;
  2288 }
  2289 
  2290 /*
  2291  *-------------------------------------------------------------------------
  2292  *
  2293  * TableToUtfProc --
  2294  *
  2295  *	Convert from the encoding specified by the TableEncodingData into
  2296  *	UTF-8.
  2297  *
  2298  * Results:
  2299  *	Returns TCL_OK if conversion was successful.
  2300  *
  2301  * Side effects:
  2302  *	None.
  2303  *
  2304  *-------------------------------------------------------------------------
  2305  */
  2306 
  2307 static int 
  2308 TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  2309 	srcReadPtr, dstWrotePtr, dstCharsPtr)
  2310     ClientData clientData;	/* TableEncodingData that specifies
  2311 				 * encoding. */
  2312     CONST char *src;		/* Source string in specified encoding. */
  2313     int srcLen;			/* Source string length in bytes. */
  2314     int flags;			/* Conversion control flags. */
  2315     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  2316 				 * state information used during a piecewise
  2317 				 * conversion.  Contents of statePtr are
  2318 				 * initialized and/or reset by conversion
  2319 				 * routine under control of flags argument. */
  2320     char *dst;			/* Output buffer in which converted string
  2321 				 * is stored. */
  2322     int dstLen;			/* The maximum length of output buffer in
  2323 				 * bytes. */
  2324     int *srcReadPtr;		/* Filled with the number of bytes from the
  2325 				 * source string that were converted.  This
  2326 				 * may be less than the original source length
  2327 				 * if there was a problem converting some
  2328 				 * source characters. */
  2329     int *dstWrotePtr;		/* Filled with the number of bytes that were
  2330 				 * stored in the output buffer as a result of
  2331 				 * the conversion. */
  2332     int *dstCharsPtr;		/* Filled with the number of characters that
  2333 				 * correspond to the bytes stored in the
  2334 				 * output buffer. */
  2335 {
  2336     CONST char *srcStart, *srcEnd;
  2337     char *dstEnd, *dstStart, *prefixBytes;
  2338     int result, byte, numChars;
  2339     Tcl_UniChar ch;
  2340     unsigned short **toUnicode;
  2341     unsigned short *pageZero;
  2342     TableEncodingData *dataPtr;
  2343     
  2344     srcStart = src;
  2345     srcEnd = src + srcLen;
  2346 
  2347     dstStart = dst;
  2348     dstEnd = dst + dstLen - TCL_UTF_MAX;
  2349 
  2350     dataPtr = (TableEncodingData *) clientData;
  2351     toUnicode = dataPtr->toUnicode;
  2352     prefixBytes = dataPtr->prefixBytes;
  2353     pageZero = toUnicode[0];
  2354 
  2355     result = TCL_OK;
  2356     for (numChars = 0; src < srcEnd; numChars++) {
  2357         if (dst > dstEnd) {
  2358             result = TCL_CONVERT_NOSPACE;
  2359             break;
  2360         }
  2361 	byte = *((unsigned char *) src);
  2362 	if (prefixBytes[byte]) {
  2363 	    src++;
  2364 	    if (src >= srcEnd) {
  2365 		src--;
  2366 		result = TCL_CONVERT_MULTIBYTE;
  2367 		break;
  2368 	    }
  2369 	    ch = toUnicode[byte][*((unsigned char *) src)];
  2370 	} else {
  2371 	    ch = pageZero[byte];
  2372 	}
  2373 	if ((ch == 0) && (byte != 0)) {
  2374 	    if (flags & TCL_ENCODING_STOPONERROR) {
  2375 		result = TCL_CONVERT_SYNTAX;
  2376 		break;
  2377 	    }
  2378 	    if (prefixBytes[byte]) {
  2379 		src--;
  2380 	    }
  2381 	    ch = (Tcl_UniChar) byte;
  2382 	}
  2383 	/*
  2384 	 * Special case for 1-byte utf chars for speed.
  2385 	 */
  2386 	if (ch && ch < 0x80) {
  2387 	    *dst++ = (char) ch;
  2388 	} else {
  2389 	    dst += Tcl_UniCharToUtf(ch, dst);
  2390 	}
  2391         src++;
  2392     }
  2393     *srcReadPtr = src - srcStart;
  2394     *dstWrotePtr = dst - dstStart;
  2395     *dstCharsPtr = numChars;
  2396     return result;
  2397 }
  2398 
  2399 /*
  2400  *-------------------------------------------------------------------------
  2401  *
  2402  * TableFromUtfProc --
  2403  *
  2404  *	Convert from UTF-8 into the encoding specified by the
  2405  *	TableEncodingData.
  2406  *
  2407  * Results:
  2408  *	Returns TCL_OK if conversion was successful.
  2409  *
  2410  * Side effects:
  2411  *	None.
  2412  *
  2413  *-------------------------------------------------------------------------
  2414  */
  2415 
  2416 static int 
  2417 TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  2418 	srcReadPtr, dstWrotePtr, dstCharsPtr)
  2419     ClientData clientData;	/* TableEncodingData that specifies
  2420 				 * encoding. */
  2421     CONST char *src;		/* Source string in UTF-8. */
  2422     int srcLen;			/* Source string length in bytes. */
  2423     int flags;			/* Conversion control flags. */
  2424     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  2425 				 * state information used during a piecewise
  2426 				 * conversion.  Contents of statePtr are
  2427 				 * initialized and/or reset by conversion
  2428 				 * routine under control of flags argument. */
  2429     char *dst;			/* Output buffer in which converted string
  2430 				 * is stored. */
  2431     int dstLen;			/* The maximum length of output buffer in
  2432 				 * bytes. */
  2433     int *srcReadPtr;		/* Filled with the number of bytes from the
  2434 				 * source string that were converted.  This
  2435 				 * may be less than the original source length
  2436 				 * if there was a problem converting some
  2437 				 * source characters. */
  2438     int *dstWrotePtr;		/* Filled with the number of bytes that were
  2439 				 * stored in the output buffer as a result of
  2440 				 * the conversion. */
  2441     int *dstCharsPtr;		/* Filled with the number of characters that
  2442 				 * correspond to the bytes stored in the
  2443 				 * output buffer. */
  2444 {
  2445     CONST char *srcStart, *srcEnd, *srcClose;
  2446     char *dstStart, *dstEnd, *prefixBytes;
  2447     Tcl_UniChar ch;
  2448     int result, len, word, numChars;
  2449     TableEncodingData *dataPtr;
  2450     unsigned short **fromUnicode;
  2451     
  2452     result = TCL_OK;    
  2453 
  2454     dataPtr = (TableEncodingData *) clientData;
  2455     prefixBytes = dataPtr->prefixBytes;
  2456     fromUnicode = dataPtr->fromUnicode;
  2457     
  2458     srcStart = src;
  2459     srcEnd = src + srcLen;
  2460     srcClose = srcEnd;
  2461     if ((flags & TCL_ENCODING_END) == 0) {
  2462 	srcClose -= TCL_UTF_MAX;
  2463     }
  2464 
  2465     dstStart = dst;
  2466     dstEnd = dst + dstLen - 1;
  2467 
  2468     for (numChars = 0; src < srcEnd; numChars++) {
  2469 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  2470 	    /*
  2471 	     * If there is more string to follow, this will ensure that the
  2472 	     * last UTF-8 character in the source buffer hasn't been cut off.
  2473 	     */
  2474 
  2475 	    result = TCL_CONVERT_MULTIBYTE;
  2476 	    break;
  2477 	}
  2478 	len = TclUtfToUniChar(src, &ch);
  2479 
  2480 #if TCL_UTF_MAX > 3
  2481 	/*
  2482 	 * This prevents a crash condition.  More evaluation is required
  2483 	 * for full support of int Tcl_UniChar. [Bug 1004065]
  2484 	 */
  2485 	if (ch & 0xffff0000) {
  2486 	    word = 0;
  2487 	} else
  2488 #endif
  2489 	    word = fromUnicode[(ch >> 8)][ch & 0xff];
  2490 
  2491 	if ((word == 0) && (ch != 0)) {
  2492 	    if (flags & TCL_ENCODING_STOPONERROR) {
  2493 		result = TCL_CONVERT_UNKNOWN;
  2494 		break;
  2495 	    }
  2496 	    word = dataPtr->fallback; 
  2497 	}
  2498 	if (prefixBytes[(word >> 8)] != 0) {
  2499 	    if (dst + 1 > dstEnd) {
  2500 		result = TCL_CONVERT_NOSPACE;
  2501 		break;
  2502 	    }
  2503 	    dst[0] = (char) (word >> 8);
  2504 	    dst[1] = (char) word;
  2505 	    dst += 2;
  2506 	} else {
  2507 	    if (dst > dstEnd) {
  2508 		result = TCL_CONVERT_NOSPACE;
  2509 		break;
  2510 	    }
  2511 	    dst[0] = (char) word;
  2512 	    dst++;
  2513 	} 
  2514 	src += len;
  2515     }
  2516     *srcReadPtr = src - srcStart;
  2517     *dstWrotePtr = dst - dstStart;
  2518     *dstCharsPtr = numChars;
  2519     return result;
  2520 }
  2521 
  2522 /*
  2523  *---------------------------------------------------------------------------
  2524  *
  2525  * TableFreeProc --
  2526  *
  2527  *	This procedure is invoked when an encoding is deleted.  It deletes
  2528  *	the memory used by the TableEncodingData.
  2529  *
  2530  * Results:
  2531  *	None.
  2532  *
  2533  * Side effects:
  2534  *	Memory freed.
  2535  *
  2536  *---------------------------------------------------------------------------
  2537  */
  2538 
  2539 static void
  2540 TableFreeProc(clientData)
  2541     ClientData clientData;	/* TableEncodingData that specifies
  2542 				 * encoding. */
  2543 {
  2544     TableEncodingData *dataPtr;
  2545 
  2546     /*
  2547      * Make sure we aren't freeing twice on shutdown.  [Bug #219314]
  2548      */
  2549 
  2550     dataPtr = (TableEncodingData *) clientData;
  2551     ckfree((char *) dataPtr->toUnicode);
  2552     ckfree((char *) dataPtr->fromUnicode);
  2553     ckfree((char *) dataPtr);
  2554 }
  2555 
  2556 /*
  2557  *-------------------------------------------------------------------------
  2558  *
  2559  * EscapeToUtfProc --
  2560  *
  2561  *	Convert from the encoding specified by the EscapeEncodingData into
  2562  *	UTF-8.
  2563  *
  2564  * Results:
  2565  *	Returns TCL_OK if conversion was successful.
  2566  *
  2567  * Side effects:
  2568  *	None.
  2569  *
  2570  *-------------------------------------------------------------------------
  2571  */
  2572 
  2573 static int 
  2574 EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  2575 	srcReadPtr, dstWrotePtr, dstCharsPtr)
  2576     ClientData clientData;	/* EscapeEncodingData that specifies
  2577 				 * encoding. */
  2578     CONST char *src;		/* Source string in specified encoding. */
  2579     int srcLen;			/* Source string length in bytes. */
  2580     int flags;			/* Conversion control flags. */
  2581     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  2582 				 * state information used during a piecewise
  2583 				 * conversion.  Contents of statePtr are
  2584 				 * initialized and/or reset by conversion
  2585 				 * routine under control of flags argument. */
  2586     char *dst;			/* Output buffer in which converted string
  2587 				 * is stored. */
  2588     int dstLen;			/* The maximum length of output buffer in
  2589 				 * bytes. */
  2590     int *srcReadPtr;		/* Filled with the number of bytes from the
  2591 				 * source string that were converted.  This
  2592 				 * may be less than the original source length
  2593 				 * if there was a problem converting some
  2594 				 * source characters. */
  2595     int *dstWrotePtr;		/* Filled with the number of bytes that were
  2596 				 * stored in the output buffer as a result of
  2597 				 * the conversion. */
  2598     int *dstCharsPtr;		/* Filled with the number of characters that
  2599 				 * correspond to the bytes stored in the
  2600 				 * output buffer. */
  2601 {
  2602     EscapeEncodingData *dataPtr;
  2603     char *prefixBytes, *tablePrefixBytes;
  2604     unsigned short **tableToUnicode;
  2605     Encoding *encodingPtr;
  2606     int state, result, numChars;
  2607     CONST char *srcStart, *srcEnd;
  2608     char *dstStart, *dstEnd;
  2609 
  2610     result = TCL_OK;
  2611 
  2612     tablePrefixBytes = NULL;	/* lint. */
  2613     tableToUnicode = NULL;	/* lint. */
  2614 
  2615     dataPtr = (EscapeEncodingData *) clientData;
  2616     prefixBytes = dataPtr->prefixBytes;
  2617     encodingPtr = NULL;
  2618 
  2619     srcStart = src;
  2620     srcEnd = src + srcLen;
  2621 
  2622     dstStart = dst;
  2623     dstEnd = dst + dstLen - TCL_UTF_MAX;
  2624 
  2625     state = (int) *statePtr;
  2626     if (flags & TCL_ENCODING_START) {
  2627 	state = 0;
  2628     }
  2629 
  2630     for (numChars = 0; src < srcEnd; ) {
  2631 	int byte, hi, lo, ch;
  2632 
  2633         if (dst > dstEnd) {
  2634             result = TCL_CONVERT_NOSPACE;
  2635             break;
  2636         }
  2637 	byte = *((unsigned char *) src);
  2638 	if (prefixBytes[byte]) {
  2639 	    unsigned int left, len, longest;
  2640 	    int checked, i;
  2641 	    EscapeSubTable *subTablePtr;
  2642 	    
  2643 	    /*
  2644 	     * Saw the beginning of an escape sequence. 
  2645 	     */
  2646 	     
  2647 	    left = srcEnd - src;
  2648 	    len = dataPtr->initLen;
  2649 	    longest = len;
  2650 	    checked = 0;
  2651 	    if (len <= left) {
  2652 		checked++;
  2653 		if ((len > 0) && 
  2654 			(memcmp(src, dataPtr->init, len) == 0)) {
  2655 		    /*
  2656 		     * If we see initialization string, skip it, even if we're
  2657 		     * not at the beginning of the buffer. 
  2658 		     */
  2659 		     
  2660 		    src += len;
  2661 		    continue;
  2662 		}
  2663 	    }
  2664 	    len = dataPtr->finalLen;
  2665 	    if (len > longest) {
  2666 		longest = len;
  2667 	    }
  2668 	    if (len <= left) {
  2669 		checked++;
  2670 		if ((len > 0) && 
  2671 			(memcmp(src, dataPtr->final, len) == 0)) {
  2672 		    /*
  2673 		     * If we see finalization string, skip it, even if we're
  2674 		     * not at the end of the buffer. 
  2675 		     */
  2676 		     
  2677 		    src += len;
  2678 		    continue;
  2679 		}
  2680 	    }
  2681 	    subTablePtr = dataPtr->subTables;
  2682 	    for (i = 0; i < dataPtr->numSubTables; i++) {
  2683 		len = subTablePtr->sequenceLen;
  2684 		if (len > longest) {
  2685 		    longest = len;
  2686 		}
  2687 		if (len <= left) {
  2688 		    checked++;
  2689 		    if ((len > 0) && 
  2690 			    (memcmp(src, subTablePtr->sequence, len) == 0)) {
  2691 			state = i;
  2692 			encodingPtr = NULL;
  2693 			subTablePtr = NULL;
  2694 			src += len;
  2695 			break;
  2696 		    }
  2697 		}
  2698 		subTablePtr++;
  2699 	    }
  2700 	    if (subTablePtr == NULL) {
  2701 		/*
  2702 		 * A match was found, the escape sequence was consumed, and
  2703 		 * the state was updated.
  2704 		 */
  2705 
  2706 		continue;
  2707 	    }
  2708 
  2709 	    /*
  2710 	     * We have a split-up or unrecognized escape sequence.  If we
  2711 	     * checked all the sequences, then it's a syntax error,
  2712 	     * otherwise we need more bytes to determine a match.
  2713 	     */
  2714 
  2715 	    if ((checked == dataPtr->numSubTables + 2)
  2716 		    || (flags & TCL_ENCODING_END)) {
  2717 		if ((flags & TCL_ENCODING_STOPONERROR) == 0) {
  2718 		    /*
  2719 		     * Skip the unknown escape sequence.
  2720 		     */
  2721 
  2722 		    src += longest;
  2723 		    continue;
  2724 		}
  2725 		result = TCL_CONVERT_SYNTAX;
  2726 	    } else {
  2727 		result = TCL_CONVERT_MULTIBYTE;
  2728 	    }
  2729 	    break;
  2730 	}
  2731 
  2732 	if (encodingPtr == NULL) {
  2733 	    TableEncodingData *tableDataPtr;
  2734 
  2735 	    encodingPtr = GetTableEncoding(dataPtr, state);
  2736 	    tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  2737 	    tablePrefixBytes = tableDataPtr->prefixBytes;
  2738 	    tableToUnicode = tableDataPtr->toUnicode;
  2739 	}
  2740 	if (tablePrefixBytes[byte]) {
  2741 	    src++;
  2742 	    if (src >= srcEnd) {
  2743 		src--;
  2744 		result = TCL_CONVERT_MULTIBYTE;
  2745 		break;
  2746 	    }
  2747 	    hi = byte;
  2748 	    lo = *((unsigned char *) src);
  2749 	} else {
  2750 	    hi = 0;
  2751 	    lo = byte;
  2752 	}
  2753 	ch = tableToUnicode[hi][lo];
  2754 	dst += Tcl_UniCharToUtf(ch, dst);
  2755 	src++;
  2756 	numChars++;
  2757     }
  2758 
  2759     *statePtr = (Tcl_EncodingState) state;
  2760     *srcReadPtr = src - srcStart;
  2761     *dstWrotePtr = dst - dstStart;
  2762     *dstCharsPtr = numChars;
  2763     return result;
  2764 }
  2765 
  2766 /*
  2767  *-------------------------------------------------------------------------
  2768  *
  2769  * EscapeFromUtfProc --
  2770  *
  2771  *	Convert from UTF-8 into the encoding specified by the
  2772  *	EscapeEncodingData.
  2773  *
  2774  * Results:
  2775  *	Returns TCL_OK if conversion was successful.
  2776  *
  2777  * Side effects:
  2778  *	None.
  2779  *
  2780  *-------------------------------------------------------------------------
  2781  */
  2782 
  2783 static int 
  2784 EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  2785 	srcReadPtr, dstWrotePtr, dstCharsPtr)
  2786     ClientData clientData;	/* EscapeEncodingData that specifies
  2787 				 * encoding. */
  2788     CONST char *src;		/* Source string in UTF-8. */
  2789     int srcLen;			/* Source string length in bytes. */
  2790     int flags;			/* Conversion control flags. */
  2791     Tcl_EncodingState *statePtr;/* Place for conversion routine to store
  2792 				 * state information used during a piecewise
  2793 				 * conversion.  Contents of statePtr are
  2794 				 * initialized and/or reset by conversion
  2795 				 * routine under control of flags argument. */
  2796     char *dst;			/* Output buffer in which converted string
  2797 				 * is stored. */
  2798     int dstLen;			/* The maximum length of output buffer in
  2799 				 * bytes. */
  2800     int *srcReadPtr;		/* Filled with the number of bytes from the
  2801 				 * source string that were converted.  This
  2802 				 * may be less than the original source length
  2803 				 * if there was a problem converting some
  2804 				 * source characters. */
  2805     int *dstWrotePtr;		/* Filled with the number of bytes that were
  2806 				 * stored in the output buffer as a result of
  2807 				 * the conversion. */
  2808     int *dstCharsPtr;		/* Filled with the number of characters that
  2809 				 * correspond to the bytes stored in the
  2810 				 * output buffer. */
  2811 {
  2812     EscapeEncodingData *dataPtr;
  2813     Encoding *encodingPtr;
  2814     CONST char *srcStart, *srcEnd, *srcClose;
  2815     char *dstStart, *dstEnd;
  2816     int state, result, numChars;
  2817     TableEncodingData *tableDataPtr;
  2818     char *tablePrefixBytes;
  2819     unsigned short **tableFromUnicode;
  2820     
  2821     result = TCL_OK;    
  2822 
  2823     dataPtr = (EscapeEncodingData *) clientData;
  2824 
  2825     srcStart = src;
  2826     srcEnd = src + srcLen;
  2827     srcClose = srcEnd;
  2828     if ((flags & TCL_ENCODING_END) == 0) {
  2829 	srcClose -= TCL_UTF_MAX;
  2830     }
  2831 
  2832     dstStart = dst;
  2833     dstEnd = dst + dstLen - 1;
  2834 
  2835     /*
  2836      * RFC1468 states that the text starts in ASCII, and switches to Japanese
  2837      * characters, and that the text must end in ASCII. [Patch #474358]
  2838      */
  2839 
  2840     if (flags & TCL_ENCODING_START) {
  2841 	state = 0;
  2842 	if ((dst + dataPtr->initLen) > dstEnd) {
  2843 	    *srcReadPtr = 0;
  2844 	    *dstWrotePtr = 0;
  2845 	    return TCL_CONVERT_NOSPACE;
  2846 	}
  2847 	memcpy((VOID *) dst, (VOID *) dataPtr->init,
  2848 		(size_t) dataPtr->initLen);
  2849 	dst += dataPtr->initLen;
  2850     } else {
  2851         state = (int) *statePtr;
  2852     }
  2853 
  2854     encodingPtr = GetTableEncoding(dataPtr, state);
  2855     tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  2856     tablePrefixBytes = tableDataPtr->prefixBytes;
  2857     tableFromUnicode = tableDataPtr->fromUnicode;
  2858 
  2859     for (numChars = 0; src < srcEnd; numChars++) {
  2860 	unsigned int len;
  2861 	int word;
  2862 	Tcl_UniChar ch;
  2863 	
  2864 	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
  2865 	    /*
  2866 	     * If there is more string to follow, this will ensure that the
  2867 	     * last UTF-8 character in the source buffer hasn't been cut off.
  2868 	     */
  2869 
  2870 	    result = TCL_CONVERT_MULTIBYTE;
  2871 	    break;
  2872 	}
  2873 	len = TclUtfToUniChar(src, &ch);
  2874 	word = tableFromUnicode[(ch >> 8)][ch & 0xff];
  2875 
  2876 	if ((word == 0) && (ch != 0)) {
  2877 	    int oldState;
  2878 	    EscapeSubTable *subTablePtr;
  2879 	    
  2880 	    oldState = state;
  2881 	    for (state = 0; state < dataPtr->numSubTables; state++) {
  2882 		encodingPtr = GetTableEncoding(dataPtr, state);
  2883 		tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  2884 	    	word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff];
  2885 		if (word != 0) {
  2886 		    break;
  2887 		}
  2888 	    }
  2889 
  2890 	    if (word == 0) {
  2891 		state = oldState;
  2892 		if (flags & TCL_ENCODING_STOPONERROR) {
  2893 		    result = TCL_CONVERT_UNKNOWN;
  2894 		    break;
  2895 		}
  2896 		encodingPtr = GetTableEncoding(dataPtr, state);
  2897 		tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
  2898 		word = tableDataPtr->fallback;
  2899 	    } 
  2900 	    
  2901 	    tablePrefixBytes = tableDataPtr->prefixBytes;
  2902 	    tableFromUnicode = tableDataPtr->fromUnicode;
  2903 
  2904 	    /*
  2905 	     * The state variable has the value of oldState when word is 0.
  2906 	     * In this case, the escape sequense should not be copied to dst 
  2907 	     * because the current character set is not changed.
  2908 	     */
  2909 	    if (state != oldState) {
  2910 		subTablePtr = &dataPtr->subTables[state];
  2911 		if ((dst + subTablePtr->sequenceLen) > dstEnd) {
  2912 		    /*
  2913 		     * If there is no space to write the escape sequence, the
  2914 		     * state variable must be changed to the value of oldState
  2915 		     * variable because this escape sequence must be written
  2916 		     * in the next conversion.
  2917 		     */
  2918 		    state = oldState;
  2919 		    result = TCL_CONVERT_NOSPACE;
  2920 		    break;
  2921 		}
  2922 		memcpy((VOID *) dst, (VOID *) subTablePtr->sequence,
  2923 			(size_t) subTablePtr->sequenceLen);
  2924 		dst += subTablePtr->sequenceLen;
  2925 	    }
  2926 	}
  2927 
  2928 	if (tablePrefixBytes[(word >> 8)] != 0) {
  2929 	    if (dst + 1 > dstEnd) {
  2930 		result = TCL_CONVERT_NOSPACE;
  2931 		break;
  2932 	    }
  2933 	    dst[0] = (char) (word >> 8);
  2934 	    dst[1] = (char) word;
  2935 	    dst += 2;
  2936 	} else {
  2937 	    if (dst > dstEnd) {
  2938 		result = TCL_CONVERT_NOSPACE;
  2939 		break;
  2940 	    }
  2941 	    dst[0] = (char) word;
  2942 	    dst++;
  2943 	} 
  2944 	src += len;
  2945     }
  2946 
  2947     if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) {
  2948 	unsigned int len = dataPtr->subTables[0].sequenceLen;
  2949 	/*
  2950 	 * [Bug 1516109].
  2951 	 * Certain encodings like iso2022-jp need to write
  2952 	 * an escape sequence after all characters have
  2953 	 * been converted. This logic checks that enough
  2954 	 * room is available in the buffer for the escape bytes.
  2955 	 * The TCL_ENCODING_END flag is cleared after a final
  2956 	 * escape sequence has been added to the buffer so
  2957 	 * that another call to this method does not attempt
  2958 	 * to append escape bytes a second time.
  2959 	 */
  2960 	if ((dst + dataPtr->finalLen + (state?len:0)) > dstEnd) {
  2961 	    result = TCL_CONVERT_NOSPACE;
  2962 	} else {
  2963 	    if (state) {
  2964 		memcpy((VOID *) dst, (VOID *) dataPtr->subTables[0].sequence,
  2965 			(size_t) len);
  2966 		dst += len;
  2967 	    }
  2968 	    memcpy((VOID *) dst, (VOID *) dataPtr->final,
  2969 		    (size_t) dataPtr->finalLen);
  2970 	    dst += dataPtr->finalLen;
  2971 	    state &= ~TCL_ENCODING_END;
  2972 	}
  2973     }
  2974 
  2975     *statePtr = (Tcl_EncodingState) state;
  2976     *srcReadPtr = src - srcStart;
  2977     *dstWrotePtr = dst - dstStart;
  2978     *dstCharsPtr = numChars;
  2979     return result;
  2980 }
  2981 
  2982 /*
  2983  *---------------------------------------------------------------------------
  2984  *
  2985  * EscapeFreeProc --
  2986  *
  2987  *	This procedure is invoked when an EscapeEncodingData encoding is 
  2988  *	deleted.  It deletes the memory used by the encoding.
  2989  *
  2990  * Results:
  2991  *	None.
  2992  *
  2993  * Side effects:
  2994  *	Memory freed.
  2995  *
  2996  *---------------------------------------------------------------------------
  2997  */
  2998 
  2999 static void
  3000 EscapeFreeProc(clientData)
  3001     ClientData clientData;	/* EscapeEncodingData that specifies encoding. */
  3002 {
  3003     EscapeEncodingData *dataPtr;
  3004     EscapeSubTable *subTablePtr;
  3005     int i;
  3006 
  3007     dataPtr = (EscapeEncodingData *) clientData;
  3008     if (dataPtr == NULL) {
  3009 	return;
  3010     }
  3011     subTablePtr = dataPtr->subTables;
  3012     for (i = 0; i < dataPtr->numSubTables; i++) {
  3013 	FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr);
  3014 	subTablePtr++;
  3015     }
  3016     ckfree((char *) dataPtr);
  3017 }
  3018 
  3019 /*
  3020  *---------------------------------------------------------------------------
  3021  *
  3022  * GetTableEncoding --
  3023  *
  3024  *	Helper function for the EscapeEncodingData conversions.  Gets the
  3025  *	encoding (of type TextEncodingData) that represents the specified
  3026  *	state.
  3027  *
  3028  * Results:
  3029  *	The return value is the encoding.
  3030  *
  3031  * Side effects:
  3032  *	If the encoding that represents the specified state has not
  3033  *	already been used by this EscapeEncoding, it will be loaded
  3034  *	and cached in the dataPtr.
  3035  *
  3036  *---------------------------------------------------------------------------
  3037  */
  3038 
  3039 static Encoding *
  3040 GetTableEncoding(dataPtr, state)
  3041     EscapeEncodingData *dataPtr;/* Contains names of encodings. */
  3042     int state;			/* Index in dataPtr of desired Encoding. */
  3043 {
  3044     EscapeSubTable *subTablePtr;
  3045     Encoding *encodingPtr;
  3046     
  3047     subTablePtr = &dataPtr->subTables[state];
  3048     encodingPtr = subTablePtr->encodingPtr;
  3049     if (encodingPtr == NULL) {
  3050 	/*
  3051 	 * Now that escape encodings load their sub-encodings first, and
  3052 	 * fail to load if any sub-encodings are missing, this branch should
  3053 	 * never happen.  
  3054 	 */
  3055 	encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name);
  3056 	if ((encodingPtr == NULL) 
  3057 		|| (encodingPtr->toUtfProc != TableToUtfProc)) {
  3058 	    panic("EscapeToUtfProc: invalid sub table");
  3059 	}
  3060 	subTablePtr->encodingPtr = encodingPtr;
  3061     }
  3062     return encodingPtr;
  3063 }
  3064 
  3065 /*
  3066  *---------------------------------------------------------------------------
  3067  *
  3068  * unilen --
  3069  *
  3070  *	A helper function for the Tcl_ExternalToUtf functions.  This
  3071  *	function is similar to strlen for double-byte characters: it
  3072  *	returns the number of bytes in a 0x0000 terminated string.
  3073  *
  3074  * Results:
  3075  *	As above.
  3076  *
  3077  * Side effects:
  3078  *	None.
  3079  *
  3080  *---------------------------------------------------------------------------
  3081  */
  3082 
  3083 static size_t
  3084 unilen(src)
  3085     CONST char *src;
  3086 {
  3087     unsigned short *p;
  3088 
  3089     p = (unsigned short *) src;
  3090     while (*p != 0x0000) {
  3091 	p++;
  3092     }
  3093     return (char *) p - src;
  3094 }
  3095 
  3096 /*
  3097  *-------------------------------------------------------------------------
  3098  *
  3099  * TclFindEncodings --
  3100  *
  3101  *	Find and load the encoding file for this operating system.
  3102  *	Before this is called, Tcl makes assumptions about the
  3103  *	native string representation, but the true encoding is not
  3104  *	assured.
  3105  *
  3106  * Results:
  3107  *	Return result of TclpInitLibraryPath, which reports whether the
  3108  *	path is clean (0) or dirty (1) UTF.
  3109  *
  3110  * Side effects:
  3111  *	Varied, see the respective initialization routines.
  3112  *
  3113  *-------------------------------------------------------------------------
  3114  */
  3115 
  3116 static int
  3117 TclFindEncodings(argv0)
  3118     CONST char *argv0;		/* Name of executable from argv[0] to main()
  3119 				 * in native multi-byte encoding. */
  3120 {
  3121     int mustCleanUtf = 0;
  3122 
  3123     if (encodingsInitialized == 0) {
  3124 	/* 
  3125 	 * Double check inside the mutex.  There may be calls
  3126 	 * back into this routine from some of the procedures below.
  3127 	 */
  3128 
  3129 	TclpInitLock();
  3130 	if (encodingsInitialized == 0) {
  3131 	    char *native;
  3132 	    Tcl_Obj *pathPtr;
  3133 	    Tcl_DString libPath, buffer;
  3134 
  3135 	    /*
  3136 	     * Have to set this bit here to avoid deadlock with the
  3137 	     * routines below us that call into TclInitSubsystems.
  3138 	     */
  3139 
  3140 	    encodingsInitialized = 1;
  3141 
  3142 	    native = TclpFindExecutable(argv0);
  3143 	    mustCleanUtf = TclpInitLibraryPath(native);
  3144 
  3145 	    /*
  3146 	     * The library path was set in the TclpInitLibraryPath routine.
  3147 	     * The string set is a dirty UTF string.  To preserve the value
  3148 	     * convert the UTF string back to native before setting the new
  3149 	     * default encoding.
  3150 	     */
  3151 
  3152 	    pathPtr = TclGetLibraryPath();
  3153 	    if ((pathPtr != NULL) && mustCleanUtf) {
  3154 		Tcl_UtfToExternalDString(NULL, Tcl_GetString(pathPtr), -1,
  3155 			&libPath);
  3156 	    }
  3157 
  3158 	    TclpSetInitialEncodings();
  3159 
  3160 	    /*
  3161 	     * Now convert the native string back to UTF.
  3162 	     */
  3163 
  3164 	    if ((pathPtr != NULL) && mustCleanUtf) {
  3165 		Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&libPath), -1,
  3166 			&buffer);
  3167 		pathPtr = Tcl_NewStringObj(Tcl_DStringValue(&buffer), -1);
  3168 		TclSetLibraryPath(pathPtr);
  3169 
  3170 		Tcl_DStringFree(&libPath);
  3171 		Tcl_DStringFree(&buffer);
  3172 	    }
  3173 	}
  3174 	TclpInitUnlock();
  3175     }
  3176 
  3177     return mustCleanUtf;
  3178 }