Symaptic: os/persistentdata/persistentstorage/sqlite3api/TEST/TCL/tcldistribution/generic/tclEncoding.c@bde4ae8d615e (annotated)

sl@0	1	/*
sl@0	2	* tclEncoding.c --
sl@0	3	*
sl@0	4	* Contains the implementation of the encoding conversion package.
sl@0	5	*
sl@0	6	* Copyright (c) 1996-1998 Sun Microsystems, Inc.
sl@0	7	* Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved.
sl@0	8	*
sl@0	9	* See the file "license.terms" for information on usage and redistribution
sl@0	10	* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
sl@0	11	*
sl@0	12	* RCS: @(#) $Id: tclEncoding.c,v 1.16.2.14 2007/02/12 19:25:42 andreas_kupries Exp $
sl@0	13	*/
sl@0	14
sl@0	15	#include "tclInt.h"
sl@0	16	#include "tclPort.h"
sl@0	17	#if defined(__SYMBIAN32__)
sl@0	18	#include "tclSymbianGlobals.h"
sl@0	19	#endif
sl@0	20
sl@0	21	typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src));
sl@0	22
sl@0	23	/*
sl@0	24	* The following data structure represents an encoding, which describes how
sl@0	25	* to convert between various character sets and UTF-8.
sl@0	26	*/
sl@0	27
sl@0	28	typedef struct Encoding {
sl@0	29	char name; / Name of encoding. Malloced because (1)
sl@0	30	* hash table entry that owns this encoding
sl@0	31	* may be freed prior to this encoding being
sl@0	32	* freed, (2) string passed in the
sl@0	33	* Tcl_EncodingType structure may not be
sl@0	34	* persistent. */
sl@0	35	Tcl_EncodingConvertProc *toUtfProc;
sl@0	36	/* Procedure to convert from external
sl@0	37	* encoding into UTF-8. */
sl@0	38	Tcl_EncodingConvertProc *fromUtfProc;
sl@0	39	/* Procedure to convert from UTF-8 into
sl@0	40	* external encoding. */
sl@0	41	Tcl_EncodingFreeProc *freeProc;
sl@0	42	/* If non-NULL, procedure to call when this
sl@0	43	* encoding is deleted. */
sl@0	44	int nullSize; /* Number of 0x00 bytes that signify
sl@0	45	* end-of-string in this encoding. This
sl@0	46	* number is used to determine the source
sl@0	47	* string length when the srcLen argument is
sl@0	48	* negative. This number can be 1 or 2. */
sl@0	49	ClientData clientData; /* Arbitrary value associated with encoding
sl@0	50	* type. Passed to conversion procedures. */
sl@0	51	LengthProc lengthProc; / Function to compute length of
sl@0	52	* null-terminated strings in this encoding.
sl@0	53	* If nullSize is 1, this is strlen; if
sl@0	54	* nullSize is 2, this is a function that
sl@0	55	* returns the number of bytes in a 0x0000
sl@0	56	* terminated string. */
sl@0	57	int refCount; /* Number of uses of this structure. */
sl@0	58	Tcl_HashEntry hPtr; / Hash table entry that owns this encoding. */
sl@0	59	} Encoding;
sl@0	60
sl@0	61	/*
sl@0	62	* The following structure is the clientData for a dynamically-loaded,
sl@0	63	* table-driven encoding created by LoadTableEncoding(). It maps between
sl@0	64	* Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only)
sl@0	65	* encoding.
sl@0	66	*/
sl@0	67
sl@0	68	typedef struct TableEncodingData {
sl@0	69	int fallback; /* Character (in this encoding) to
sl@0	70	* substitute when this encoding cannot
sl@0	71	* represent a UTF-8 character. */
sl@0	72	char prefixBytes[256]; /* If a byte in the input stream is a lead
sl@0	73	* byte for a 2-byte sequence, the
sl@0	74	* corresponding entry in this array is 1,
sl@0	75	* otherwise it is 0. */
sl@0	76	unsigned short *toUnicode; / Two dimensional sparse matrix to map
sl@0	77	* characters from the encoding to Unicode.
sl@0	78	* Each element of the toUnicode array points
sl@0	79	* to an array of 256 shorts. If there is no
sl@0	80	* corresponding character in Unicode, the
sl@0	81	* value in the matrix is 0x0000. malloc'd. */
sl@0	82	unsigned short **fromUnicode;
sl@0	83	/* Two dimensional sparse matrix to map
sl@0	84	* characters from Unicode to the encoding.
sl@0	85	* Each element of the fromUnicode array
sl@0	86	* points to an array of 256 shorts. If there
sl@0	87	* is no corresponding character the encoding,
sl@0	88	* the value in the matrix is 0x0000.
sl@0	89	* malloc'd. */
sl@0	90	} TableEncodingData;
sl@0	91
sl@0	92	/*
sl@0	93	* The following structures is the clientData for a dynamically-loaded,
sl@0	94	* escape-driven encoding that is itself comprised of other simpler
sl@0	95	* encodings. An example is "iso-2022-jp", which uses escape sequences to
sl@0	96	* switch between ascii, jis0208, jis0212, gb2312, and ksc5601. Note that
sl@0	97	* "escape-driven" does not necessarily mean that the ESCAPE character is
sl@0	98	* the character used for switching character sets.
sl@0	99	*/
sl@0	100
sl@0	101	typedef struct EscapeSubTable {
sl@0	102	unsigned int sequenceLen; /* Length of following string. */
sl@0	103	char sequence[16]; /* Escape code that marks this encoding. */
sl@0	104	char name[32]; /* Name for encoding. */
sl@0	105	Encoding encodingPtr; / Encoding loaded using above name, or NULL
sl@0	106	* if this sub-encoding has not been needed
sl@0	107	* yet. */
sl@0	108	} EscapeSubTable;
sl@0	109
sl@0	110	typedef struct EscapeEncodingData {
sl@0	111	int fallback; /* Character (in this encoding) to
sl@0	112	* substitute when this encoding cannot
sl@0	113	* represent a UTF-8 character. */
sl@0	114	unsigned int initLen; /* Length of following string. */
sl@0	115	char init[16]; /* String to emit or expect before first char
sl@0	116	* in conversion. */
sl@0	117	unsigned int finalLen; /* Length of following string. */
sl@0	118	char final[16]; /* String to emit or expect after last char
sl@0	119	* in conversion. */
sl@0	120	char prefixBytes[256]; /* If a byte in the input stream is the
sl@0	121	* first character of one of the escape
sl@0	122	* sequences in the following array, the
sl@0	123	* corresponding entry in this array is 1,
sl@0	124	* otherwise it is 0. */
sl@0	125	int numSubTables; /* Length of following array. */
sl@0	126	EscapeSubTable subTables[1];/* Information about each EscapeSubTable
sl@0	127	* used by this encoding type. The actual
sl@0	128	* size will be as large as necessary to
sl@0	129	* hold all EscapeSubTables. */
sl@0	130	} EscapeEncodingData;
sl@0	131
sl@0	132	/*
sl@0	133	* Constants used when loading an encoding file to identify the type of the
sl@0	134	* file.
sl@0	135	*/
sl@0	136
sl@0	137	#define ENCODING_SINGLEBYTE 0
sl@0	138	#define ENCODING_DOUBLEBYTE 1
sl@0	139	#define ENCODING_MULTIBYTE 2
sl@0	140	#define ENCODING_ESCAPE 3
sl@0	141
sl@0	142	#if !defined(__SYMBIAN32__) \|\| !defined(__WINSCW__)
sl@0	143	/*
sl@0	144	* Initialize the default encoding directory. If this variable contains
sl@0	145	* a non NULL value, it will be the first path used to locate the
sl@0	146	* system encoding files.
sl@0	147	*/
sl@0	148
sl@0	149	char *tclDefaultEncodingDir = NULL;
sl@0	150
sl@0	151	static int encodingsInitialized = 0;
sl@0	152
sl@0	153	/*
sl@0	154	* Hash table that keeps track of all loaded Encodings. Keys are
sl@0	155	* the string names that represent the encoding, values are (Encoding *).
sl@0	156	*/
sl@0	157
sl@0	158	static Tcl_HashTable encodingTable;
sl@0	159	TCL_DECLARE_MUTEX(encodingMutex)
sl@0	160
sl@0	161	/*
sl@0	162	* The following are used to hold the default and current system encodings.
sl@0	163	* If NULL is passed to one of the conversion routines, the current setting
sl@0	164	* of the system encoding will be used to perform the conversion.
sl@0	165	*/
sl@0	166
sl@0	167	static Tcl_Encoding defaultEncoding;
sl@0	168	static Tcl_Encoding systemEncoding;
sl@0	169	#endif
sl@0	170	/*
sl@0	171	* The following variable is used in the sparse matrix code for a
sl@0	172	* TableEncoding to represent a page in the table that has no entries.
sl@0	173	*/
sl@0	174
sl@0	175	static unsigned short emptyPage[256];
sl@0	176
sl@0	177	/*
sl@0	178	* Procedures used only in this module.
sl@0	179	*/
sl@0	180
sl@0	181	static int BinaryProc _ANSI_ARGS_((ClientData clientData,
sl@0	182	CONST char *src, int srcLen, int flags,
sl@0	183	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	184	int srcReadPtr, int dstWrotePtr,
sl@0	185	int *dstCharsPtr));
sl@0	186	static void DupEncodingIntRep _ANSI_ARGS_((Tcl_Obj *srcPtr,
sl@0	187	Tcl_Obj *dupPtr));
sl@0	188	static void EscapeFreeProc _ANSI_ARGS_((ClientData clientData));
sl@0	189	static int EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData,
sl@0	190	CONST char *src, int srcLen, int flags,
sl@0	191	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	192	int srcReadPtr, int dstWrotePtr,
sl@0	193	int *dstCharsPtr));
sl@0	194	static int EscapeToUtfProc _ANSI_ARGS_((ClientData clientData,
sl@0	195	CONST char *src, int srcLen, int flags,
sl@0	196	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	197	int srcReadPtr, int dstWrotePtr,
sl@0	198	int *dstCharsPtr));
sl@0	199	static void FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding));
sl@0	200	static void FreeEncodingIntRep _ANSI_ARGS_((Tcl_Obj *objPtr));
sl@0	201	static Encoding * GetTableEncoding _ANSI_ARGS_((
sl@0	202	EscapeEncodingData *dataPtr, int state));
sl@0	203	static Tcl_Encoding LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp,
sl@0	204	CONST char *name));
sl@0	205	static Tcl_Encoding LoadTableEncoding _ANSI_ARGS_((Tcl_Interp *interp,
sl@0	206	CONST char *name, int type, Tcl_Channel chan));
sl@0	207	static Tcl_Encoding LoadEscapeEncoding _ANSI_ARGS_((CONST char *name,
sl@0	208	Tcl_Channel chan));
sl@0	209	static Tcl_Channel OpenEncodingFile _ANSI_ARGS_((CONST char *dir,
sl@0	210	CONST char *name));
sl@0	211	static void TableFreeProc _ANSI_ARGS_((ClientData clientData));
sl@0	212	static int TableFromUtfProc _ANSI_ARGS_((ClientData clientData,
sl@0	213	CONST char *src, int srcLen, int flags,
sl@0	214	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	215	int srcReadPtr, int dstWrotePtr,
sl@0	216	int *dstCharsPtr));
sl@0	217	static int TableToUtfProc _ANSI_ARGS_((ClientData clientData,
sl@0	218	CONST char *src, int srcLen, int flags,
sl@0	219	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	220	int srcReadPtr, int dstWrotePtr,
sl@0	221	int *dstCharsPtr));
sl@0	222	static size_t unilen _ANSI_ARGS_((CONST char *src));
sl@0	223	static int UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData,
sl@0	224	CONST char *src, int srcLen, int flags,
sl@0	225	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	226	int srcReadPtr, int dstWrotePtr,
sl@0	227	int *dstCharsPtr));
sl@0	228	static int UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData,
sl@0	229	CONST char *src, int srcLen, int flags,
sl@0	230	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	231	int srcReadPtr, int dstWrotePtr,
sl@0	232	int *dstCharsPtr));
sl@0	233	static int UtfToUtfProc _ANSI_ARGS_((ClientData clientData,
sl@0	234	CONST char *src, int srcLen, int flags,
sl@0	235	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	236	int srcReadPtr, int dstWrotePtr,
sl@0	237	int *dstCharsPtr, int pureNullMode));
sl@0	238	static int UtfIntToUtfExtProc _ANSI_ARGS_((ClientData clientData,
sl@0	239	CONST char *src, int srcLen, int flags,
sl@0	240	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	241	int srcReadPtr, int dstWrotePtr,
sl@0	242	int *dstCharsPtr));
sl@0	243	static int UtfExtToUtfIntProc _ANSI_ARGS_((ClientData clientData,
sl@0	244	CONST char *src, int srcLen, int flags,
sl@0	245	Tcl_EncodingState statePtr, char dst, int dstLen,
sl@0	246	int srcReadPtr, int dstWrotePtr,
sl@0	247	int *dstCharsPtr));
sl@0	248	static int TclFindEncodings _ANSI_ARGS_((CONST char *argv0));
sl@0	249
sl@0	250	/*
sl@0	251	* A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep.
sl@0	252	* This should help the lifetime of encodings be more useful.
sl@0	253	* See concerns raised in [Bug 1077262].
sl@0	254	*/
sl@0	255
sl@0	256	static Tcl_ObjType EncodingType = {
sl@0	257	"encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
sl@0	258	};
sl@0	259
sl@0	260
sl@0	261	/*
sl@0	262	*----------------------------------------------------------------------
sl@0	263	*
sl@0	264	* TclGetEncodingFromObj --
sl@0	265	*
sl@0	266	* Writes to (encodingPtr) the Tcl_Encoding value of (objPtr),
sl@0	267	* if possible, and returns TCL_OK. If no such encoding exists,
sl@0	268	* TCL_ERROR is returned, and if interp is non-NULL, an error message
sl@0	269	* is written there.
sl@0	270	*
sl@0	271	* Results:
sl@0	272	* Standard Tcl return code.
sl@0	273	*
sl@0	274	* Side effects:
sl@0	275	* Caches the Tcl_Encoding value as the internal rep of (*objPtr).
sl@0	276	*
sl@0	277	*----------------------------------------------------------------------
sl@0	278	*/
sl@0	279	int
sl@0	280	TclGetEncodingFromObj(interp, objPtr, encodingPtr)
sl@0	281	Tcl_Interp *interp;
sl@0	282	Tcl_Obj *objPtr;
sl@0	283	Tcl_Encoding *encodingPtr;
sl@0	284	{
sl@0	285	CONST char *name = Tcl_GetString(objPtr);
sl@0	286	if (objPtr->typePtr != &EncodingType) {
sl@0	287	Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
sl@0	288
sl@0	289	if (encoding == NULL) {
sl@0	290	return TCL_ERROR;
sl@0	291	}
sl@0	292	if (objPtr->typePtr && objPtr->typePtr->freeIntRepProc) {
sl@0	293	objPtr->typePtr->freeIntRepProc(objPtr);
sl@0	294	}
sl@0	295	objPtr->internalRep.otherValuePtr = (VOID *) encoding;
sl@0	296	objPtr->typePtr = &EncodingType;
sl@0	297	}
sl@0	298	*encodingPtr = Tcl_GetEncoding(NULL, name);
sl@0	299	return TCL_OK;
sl@0	300	}
sl@0	301
sl@0	302	/*
sl@0	303	*----------------------------------------------------------------------
sl@0	304	*
sl@0	305	* FreeEncodingIntRep --
sl@0	306	*
sl@0	307	* The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType.
sl@0	308	*
sl@0	309	*----------------------------------------------------------------------
sl@0	310	*/
sl@0	311	static void
sl@0	312	FreeEncodingIntRep(objPtr)
sl@0	313	Tcl_Obj *objPtr;
sl@0	314	{
sl@0	315	Tcl_FreeEncoding((Tcl_Encoding) objPtr->internalRep.otherValuePtr);
sl@0	316	}
sl@0	317
sl@0	318	/*
sl@0	319	*----------------------------------------------------------------------
sl@0	320	*
sl@0	321	* DupEncodingIntRep --
sl@0	322	*
sl@0	323	* The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType.
sl@0	324	*
sl@0	325	*----------------------------------------------------------------------
sl@0	326	*/
sl@0	327	static void
sl@0	328	DupEncodingIntRep(srcPtr, dupPtr)
sl@0	329	Tcl_Obj *srcPtr;
sl@0	330	Tcl_Obj *dupPtr;
sl@0	331	{
sl@0	332	dupPtr->internalRep.otherValuePtr = (VOID *)
sl@0	333	Tcl_GetEncoding(NULL, srcPtr->bytes);
sl@0	334	}
sl@0	335
sl@0	336	/*
sl@0	337	*---------------------------------------------------------------------------
sl@0	338	*
sl@0	339	* TclInitEncodingSubsystem --
sl@0	340	*
sl@0	341	* Initialize all resources used by this subsystem on a per-process
sl@0	342	* basis.
sl@0	343	*
sl@0	344	* Results:
sl@0	345	* None.
sl@0	346	*
sl@0	347	* Side effects:
sl@0	348	* Depends on the memory, object, and IO subsystems.
sl@0	349	*
sl@0	350	*---------------------------------------------------------------------------
sl@0	351	*/
sl@0	352
sl@0	353	void
sl@0	354	TclInitEncodingSubsystem()
sl@0	355	{
sl@0	356	Tcl_EncodingType type;
sl@0	357
sl@0	358	Tcl_MutexLock(&encodingMutex);
sl@0	359	Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
sl@0	360	Tcl_MutexUnlock(&encodingMutex);
sl@0	361
sl@0	362	/*
sl@0	363	* Create a few initial encodings. Note that the UTF-8 to UTF-8
sl@0	364	* translation is not a no-op, because it will turn a stream of
sl@0	365	* improperly formed UTF-8 into a properly formed stream.
sl@0	366	*/
sl@0	367
sl@0	368	type.encodingName = "identity";
sl@0	369	type.toUtfProc = BinaryProc;
sl@0	370	type.fromUtfProc = BinaryProc;
sl@0	371	type.freeProc = NULL;
sl@0	372	type.nullSize = 1;
sl@0	373	type.clientData = NULL;
sl@0	374
sl@0	375	defaultEncoding = Tcl_CreateEncoding(&type);
sl@0	376	systemEncoding = Tcl_GetEncoding(NULL, type.encodingName);
sl@0	377
sl@0	378	type.encodingName = "utf-8";
sl@0	379	type.toUtfProc = UtfExtToUtfIntProc;
sl@0	380	type.fromUtfProc = UtfIntToUtfExtProc;
sl@0	381	type.freeProc = NULL;
sl@0	382	type.nullSize = 1;
sl@0	383	type.clientData = NULL;
sl@0	384	Tcl_CreateEncoding(&type);
sl@0	385
sl@0	386	type.encodingName = "unicode";
sl@0	387	type.toUtfProc = UnicodeToUtfProc;
sl@0	388	type.fromUtfProc = UtfToUnicodeProc;
sl@0	389	type.freeProc = NULL;
sl@0	390	type.nullSize = 2;
sl@0	391	type.clientData = NULL;
sl@0	392	Tcl_CreateEncoding(&type);
sl@0	393	}
sl@0	394
sl@0	395
sl@0	396	/*
sl@0	397	*----------------------------------------------------------------------
sl@0	398	*
sl@0	399	* TclFinalizeEncodingSubsystem --
sl@0	400	*
sl@0	401	* Release the state associated with the encoding subsystem.
sl@0	402	*
sl@0	403	* Results:
sl@0	404	* None.
sl@0	405	*
sl@0	406	* Side effects:
sl@0	407	* Frees all of the encodings.
sl@0	408	*
sl@0	409	*----------------------------------------------------------------------
sl@0	410	*/
sl@0	411
sl@0	412	void
sl@0	413	TclFinalizeEncodingSubsystem()
sl@0	414	{
sl@0	415	Tcl_HashSearch search;
sl@0	416	Tcl_HashEntry *hPtr;
sl@0	417
sl@0	418	Tcl_MutexLock(&encodingMutex);
sl@0	419	encodingsInitialized = 0;
sl@0	420	FreeEncoding(systemEncoding);
sl@0	421	hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
sl@0	422	while (hPtr != NULL) {
sl@0	423	/*
sl@0	424	* Call FreeEncoding instead of doing it directly to handle refcounts
sl@0	425	* like escape encodings use. [Bug #524674]
sl@0	426	* Make sure to call Tcl_FirstHashEntry repeatedly so that all
sl@0	427	* encodings are eventually cleaned up.
sl@0	428	*/
sl@0	429	FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr));
sl@0	430	hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
sl@0	431	}
sl@0	432	Tcl_DeleteHashTable(&encodingTable);
sl@0	433	Tcl_MutexUnlock(&encodingMutex);
sl@0	434	}
sl@0	435
sl@0	436	/*
sl@0	437	*-------------------------------------------------------------------------
sl@0	438	*
sl@0	439	* Tcl_GetDefaultEncodingDir --
sl@0	440	*
sl@0	441	*
sl@0	442	* Results:
sl@0	443	*
sl@0	444	* Side effects:
sl@0	445	*
sl@0	446	*-------------------------------------------------------------------------
sl@0	447	*/
sl@0	448
sl@0	449	EXPORT_C CONST char *
sl@0	450	Tcl_GetDefaultEncodingDir()
sl@0	451	{
sl@0	452	return tclDefaultEncodingDir;
sl@0	453	}
sl@0	454
sl@0	455	/*
sl@0	456	*-------------------------------------------------------------------------
sl@0	457	*
sl@0	458	* Tcl_SetDefaultEncodingDir --
sl@0	459	*
sl@0	460	*
sl@0	461	* Results:
sl@0	462	*
sl@0	463	* Side effects:
sl@0	464	*
sl@0	465	*-------------------------------------------------------------------------
sl@0	466	*/
sl@0	467
sl@0	468	EXPORT_C void
sl@0	469	Tcl_SetDefaultEncodingDir(path)
sl@0	470	CONST char *path;
sl@0	471	{
sl@0	472	tclDefaultEncodingDir = (char *)ckalloc((unsigned) strlen(path) + 1);
sl@0	473	strcpy(tclDefaultEncodingDir, path);
sl@0	474	}
sl@0	475
sl@0	476	/*
sl@0	477	*-------------------------------------------------------------------------
sl@0	478	*
sl@0	479	* Tcl_GetEncoding --
sl@0	480	*
sl@0	481	* Given the name of a encoding, find the corresponding Tcl_Encoding
sl@0	482	* token. If the encoding did not already exist, Tcl attempts to
sl@0	483	* dynamically load an encoding by that name.
sl@0	484	*
sl@0	485	* Results:
sl@0	486	* Returns a token that represents the encoding. If the name didn't
sl@0	487	* refer to any known or loadable encoding, NULL is returned. If
sl@0	488	* NULL was returned, an error message is left in interp's result
sl@0	489	* object, unless interp was NULL.
sl@0	490	*
sl@0	491	* Side effects:
sl@0	492	* The new encoding type is entered into a table visible to all
sl@0	493	* interpreters, keyed off the encoding's name. For each call to
sl@0	494	* this procedure, there should eventually be a call to
sl@0	495	* Tcl_FreeEncoding, so that the database can be cleaned up when
sl@0	496	* encodings aren't needed anymore.
sl@0	497	*
sl@0	498	*-------------------------------------------------------------------------
sl@0	499	*/
sl@0	500
sl@0	501	EXPORT_C Tcl_Encoding
sl@0	502	Tcl_GetEncoding(interp, name)
sl@0	503	Tcl_Interp interp; / Interp for error reporting, if not NULL. */
sl@0	504	CONST char name; / The name of the desired encoding. */
sl@0	505	{
sl@0	506	Tcl_HashEntry *hPtr;
sl@0	507	Encoding *encodingPtr;
sl@0	508
sl@0	509	Tcl_MutexLock(&encodingMutex);
sl@0	510	if (name == NULL) {
sl@0	511	encodingPtr = (Encoding *) systemEncoding;
sl@0	512	encodingPtr->refCount++;
sl@0	513	Tcl_MutexUnlock(&encodingMutex);
sl@0	514	return systemEncoding;
sl@0	515	}
sl@0	516
sl@0	517	hPtr = Tcl_FindHashEntry(&encodingTable, name);
sl@0	518	if (hPtr != NULL) {
sl@0	519	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
sl@0	520	encodingPtr->refCount++;
sl@0	521	Tcl_MutexUnlock(&encodingMutex);
sl@0	522	return (Tcl_Encoding) encodingPtr;
sl@0	523	}
sl@0	524	Tcl_MutexUnlock(&encodingMutex);
sl@0	525	return LoadEncodingFile(interp, name);
sl@0	526	}
sl@0	527
sl@0	528	/*
sl@0	529	*---------------------------------------------------------------------------
sl@0	530	*
sl@0	531	* Tcl_FreeEncoding --
sl@0	532	*
sl@0	533	* This procedure is called to release an encoding allocated by
sl@0	534	* Tcl_CreateEncoding() or Tcl_GetEncoding().
sl@0	535	*
sl@0	536	* Results:
sl@0	537	* None.
sl@0	538	*
sl@0	539	* Side effects:
sl@0	540	* The reference count associated with the encoding is decremented
sl@0	541	* and the encoding may be deleted if nothing is using it anymore.
sl@0	542	*
sl@0	543	*---------------------------------------------------------------------------
sl@0	544	*/
sl@0	545
sl@0	546	EXPORT_C void
sl@0	547	Tcl_FreeEncoding(encoding)
sl@0	548	Tcl_Encoding encoding;
sl@0	549	{
sl@0	550	Tcl_MutexLock(&encodingMutex);
sl@0	551	FreeEncoding(encoding);
sl@0	552	Tcl_MutexUnlock(&encodingMutex);
sl@0	553	}
sl@0	554
sl@0	555	/*
sl@0	556	*----------------------------------------------------------------------
sl@0	557	*
sl@0	558	* FreeEncoding --
sl@0	559	*
sl@0	560	* This procedure is called to release an encoding by procedures
sl@0	561	* that already have the encodingMutex.
sl@0	562	*
sl@0	563	* Results:
sl@0	564	* None.
sl@0	565	*
sl@0	566	* Side effects:
sl@0	567	* The reference count associated with the encoding is decremented
sl@0	568	* and the encoding may be deleted if nothing is using it anymore.
sl@0	569	*
sl@0	570	*----------------------------------------------------------------------
sl@0	571	*/
sl@0	572
sl@0	573	static void
sl@0	574	FreeEncoding(encoding)
sl@0	575	Tcl_Encoding encoding;
sl@0	576	{
sl@0	577	Encoding *encodingPtr;
sl@0	578
sl@0	579	encodingPtr = (Encoding *) encoding;
sl@0	580	if (encodingPtr == NULL) {
sl@0	581	return;
sl@0	582	}
sl@0	583	encodingPtr->refCount--;
sl@0	584	if (encodingPtr->refCount == 0) {
sl@0	585	if (encodingPtr->freeProc != NULL) {
sl@0	586	(*encodingPtr->freeProc)(encodingPtr->clientData);
sl@0	587	}
sl@0	588	if (encodingPtr->hPtr != NULL) {
sl@0	589	Tcl_DeleteHashEntry(encodingPtr->hPtr);
sl@0	590	}
sl@0	591	ckfree((char *) encodingPtr->name);
sl@0	592	ckfree((char *) encodingPtr);
sl@0	593	}
sl@0	594	}
sl@0	595
sl@0	596	/*
sl@0	597	*-------------------------------------------------------------------------
sl@0	598	*
sl@0	599	* Tcl_GetEncodingName --
sl@0	600	*
sl@0	601	* Given an encoding, return the name that was used to constuct
sl@0	602	* the encoding.
sl@0	603	*
sl@0	604	* Results:
sl@0	605	* The name of the encoding.
sl@0	606	*
sl@0	607	* Side effects:
sl@0	608	* None.
sl@0	609	*
sl@0	610	*---------------------------------------------------------------------------
sl@0	611	*/
sl@0	612
sl@0	613	EXPORT_C CONST char *
sl@0	614	Tcl_GetEncodingName(encoding)
sl@0	615	Tcl_Encoding encoding; /* The encoding whose name to fetch. */
sl@0	616	{
sl@0	617	Encoding *encodingPtr;
sl@0	618
sl@0	619	if (encoding == NULL) {
sl@0	620	encoding = systemEncoding;
sl@0	621	}
sl@0	622	encodingPtr = (Encoding *) encoding;
sl@0	623	return encodingPtr->name;
sl@0	624	}
sl@0	625
sl@0	626	/*
sl@0	627	*-------------------------------------------------------------------------
sl@0	628	*
sl@0	629	* Tcl_GetEncodingNames --
sl@0	630	*
sl@0	631	* Get the list of all known encodings, including the ones stored
sl@0	632	* as files on disk in the encoding path.
sl@0	633	*
sl@0	634	* Results:
sl@0	635	* Modifies interp's result object to hold a list of all the available
sl@0	636	* encodings.
sl@0	637	*
sl@0	638	* Side effects:
sl@0	639	* None.
sl@0	640	*
sl@0	641	*-------------------------------------------------------------------------
sl@0	642	*/
sl@0	643
sl@0	644	EXPORT_C void
sl@0	645	Tcl_GetEncodingNames(interp)
sl@0	646	Tcl_Interp interp; / Interp to hold result. */
sl@0	647	{
sl@0	648	Tcl_HashSearch search;
sl@0	649	Tcl_HashEntry *hPtr;
sl@0	650	Tcl_Obj pathPtr, resultPtr;
sl@0	651	int dummy;
sl@0	652
sl@0	653	Tcl_HashTable table;
sl@0	654
sl@0	655	Tcl_MutexLock(&encodingMutex);
sl@0	656	Tcl_InitHashTable(&table, TCL_STRING_KEYS);
sl@0	657	hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
sl@0	658	while (hPtr != NULL) {
sl@0	659	Encoding *encodingPtr;
sl@0	660
sl@0	661	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
sl@0	662	Tcl_CreateHashEntry(&table, encodingPtr->name, &dummy);
sl@0	663	hPtr = Tcl_NextHashEntry(&search);
sl@0	664	}
sl@0	665	Tcl_MutexUnlock(&encodingMutex);
sl@0	666
sl@0	667	pathPtr = TclGetLibraryPath();
sl@0	668	if (pathPtr != NULL) {
sl@0	669	int i, objc;
sl@0	670	Tcl_Obj **objv;
sl@0	671	char globArgString[10];
sl@0	672	Tcl_Obj* encodingObj = Tcl_NewStringObj("encoding",-1);
sl@0	673	Tcl_IncrRefCount(encodingObj);
sl@0	674
sl@0	675	objc = 0;
sl@0	676	Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
sl@0	677
sl@0	678	for (i = 0; i < objc; i++) {
sl@0	679	Tcl_Obj *searchIn;
sl@0	680
sl@0	681	/*
sl@0	682	* Construct the path from the element of pathPtr,
sl@0	683	* joined with 'encoding'.
sl@0	684	*/
sl@0	685	searchIn = Tcl_FSJoinToPath(objv[i],1,&encodingObj);
sl@0	686	Tcl_IncrRefCount(searchIn);
sl@0	687	Tcl_ResetResult(interp);
sl@0	688
sl@0	689	/*
sl@0	690	* TclGlob() changes the contents of globArgString, which causes
sl@0	691	* a segfault if we pass in a pointer to non-writeable memory.
sl@0	692	* TclGlob() puts its results directly into interp.
sl@0	693	*/
sl@0	694
sl@0	695	strcpy(globArgString, "*.enc");
sl@0	696	/*
sl@0	697	* The GLOBMODE_TAILS flag returns just the tail of each file
sl@0	698	* which is the encoding name with a .enc extension
sl@0	699	*/
sl@0	700	if ((TclGlob(interp, globArgString, searchIn,
sl@0	701	TCL_GLOBMODE_TAILS, NULL) == TCL_OK)) {
sl@0	702	int objc2 = 0;
sl@0	703	Tcl_Obj **objv2;
sl@0	704	int j;
sl@0	705
sl@0	706	Tcl_ListObjGetElements(NULL, Tcl_GetObjResult(interp), &objc2,
sl@0	707	&objv2);
sl@0	708
sl@0	709	for (j = 0; j < objc2; j++) {
sl@0	710	int length;
sl@0	711	char *string;
sl@0	712	string = Tcl_GetStringFromObj(objv2[j], &length);
sl@0	713	length -= 4;
sl@0	714	if (length > 0) {
sl@0	715	string[length] = '\0';
sl@0	716	Tcl_CreateHashEntry(&table, string, &dummy);
sl@0	717	string[length] = '.';
sl@0	718	}
sl@0	719	}
sl@0	720	}
sl@0	721	Tcl_DecrRefCount(searchIn);
sl@0	722	}
sl@0	723	Tcl_DecrRefCount(encodingObj);
sl@0	724	}
sl@0	725
sl@0	726	/*
sl@0	727	* Clear any values placed in the result by globbing.
sl@0	728	*/
sl@0	729
sl@0	730	Tcl_ResetResult(interp);
sl@0	731	resultPtr = Tcl_GetObjResult(interp);
sl@0	732
sl@0	733	hPtr = Tcl_FirstHashEntry(&table, &search);
sl@0	734	while (hPtr != NULL) {
sl@0	735	Tcl_Obj *strPtr;
sl@0	736
sl@0	737	strPtr = Tcl_NewStringObj(Tcl_GetHashKey(&table, hPtr), -1);
sl@0	738	Tcl_ListObjAppendElement(NULL, resultPtr, strPtr);
sl@0	739	hPtr = Tcl_NextHashEntry(&search);
sl@0	740	}
sl@0	741	Tcl_DeleteHashTable(&table);
sl@0	742	}
sl@0	743
sl@0	744	/*
sl@0	745	*------------------------------------------------------------------------
sl@0	746	*
sl@0	747	* Tcl_SetSystemEncoding --
sl@0	748	*
sl@0	749	* Sets the default encoding that should be used whenever the user
sl@0	750	* passes a NULL value in to one of the conversion routines.
sl@0	751	* If the supplied name is NULL, the system encoding is reset to the
sl@0	752	* default system encoding.
sl@0	753	*
sl@0	754	* Results:
sl@0	755	* The return value is TCL_OK if the system encoding was successfully
sl@0	756	* set to the encoding specified by name, TCL_ERROR otherwise. If
sl@0	757	* TCL_ERROR is returned, an error message is left in interp's result
sl@0	758	* object, unless interp was NULL.
sl@0	759	*
sl@0	760	* Side effects:
sl@0	761	* The reference count of the new system encoding is incremented.
sl@0	762	* The reference count of the old system encoding is decremented and
sl@0	763	* it may be freed.
sl@0	764	*
sl@0	765	*------------------------------------------------------------------------
sl@0	766	*/
sl@0	767
sl@0	768	EXPORT_C int
sl@0	769	Tcl_SetSystemEncoding(interp, name)
sl@0	770	Tcl_Interp interp; / Interp for error reporting, if not NULL. */
sl@0	771	CONST char name; / The name of the desired encoding, or NULL
sl@0	772	* to reset to default encoding. */
sl@0	773	{
sl@0	774	Tcl_Encoding encoding;
sl@0	775	Encoding *encodingPtr;
sl@0	776
sl@0	777	if (name == NULL) {
sl@0	778	Tcl_MutexLock(&encodingMutex);
sl@0	779	encoding = defaultEncoding;
sl@0	780	encodingPtr = (Encoding *) encoding;
sl@0	781	encodingPtr->refCount++;
sl@0	782	Tcl_MutexUnlock(&encodingMutex);
sl@0	783	} else {
sl@0	784	encoding = Tcl_GetEncoding(interp, name);
sl@0	785	if (encoding == NULL) {
sl@0	786	return TCL_ERROR;
sl@0	787	}
sl@0	788	}
sl@0	789
sl@0	790	Tcl_MutexLock(&encodingMutex);
sl@0	791	FreeEncoding(systemEncoding);
sl@0	792	systemEncoding = encoding;
sl@0	793	Tcl_MutexUnlock(&encodingMutex);
sl@0	794
sl@0	795	return TCL_OK;
sl@0	796	}
sl@0	797
sl@0	798	/*
sl@0	799	*---------------------------------------------------------------------------
sl@0	800	*
sl@0	801	* Tcl_CreateEncoding --
sl@0	802	*
sl@0	803	* This procedure is called to define a new encoding and the procedures
sl@0	804	* that are used to convert between the specified encoding and Unicode.
sl@0	805	*
sl@0	806	* Results:
sl@0	807	* Returns a token that represents the encoding. If an encoding with
sl@0	808	* the same name already existed, the old encoding token remains
sl@0	809	* valid and continues to behave as it used to, and will eventually
sl@0	810	* be garbage collected when the last reference to it goes away. Any
sl@0	811	* subsequent calls to Tcl_GetEncoding with the specified name will
sl@0	812	* retrieve the most recent encoding token.
sl@0	813	*
sl@0	814	* Side effects:
sl@0	815	* The new encoding type is entered into a table visible to all
sl@0	816	* interpreters, keyed off the encoding's name. For each call to
sl@0	817	* this procedure, there should eventually be a call to
sl@0	818	* Tcl_FreeEncoding, so that the database can be cleaned up when
sl@0	819	* encodings aren't needed anymore.
sl@0	820	*
sl@0	821	*---------------------------------------------------------------------------
sl@0	822	*/
sl@0	823
sl@0	824	EXPORT_C Tcl_Encoding
sl@0	825	Tcl_CreateEncoding(typePtr)
sl@0	826	Tcl_EncodingType typePtr; / The encoding type. */
sl@0	827	{
sl@0	828	Tcl_HashEntry *hPtr;
sl@0	829	int new;
sl@0	830	Encoding *encodingPtr;
sl@0	831	char *name;
sl@0	832
sl@0	833	Tcl_MutexLock(&encodingMutex);
sl@0	834	hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &new);
sl@0	835	if (new == 0) {
sl@0	836	/*
sl@0	837	* Remove old encoding from hash table, but don't delete it until
sl@0	838	* last reference goes away.
sl@0	839	*/
sl@0	840
sl@0	841	encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
sl@0	842	encodingPtr->hPtr = NULL;
sl@0	843	}
sl@0	844
sl@0	845	name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1);
sl@0	846
sl@0	847	encodingPtr = (Encoding *) ckalloc(sizeof(Encoding));
sl@0	848	encodingPtr->name = strcpy(name, typePtr->encodingName);
sl@0	849	encodingPtr->toUtfProc = typePtr->toUtfProc;
sl@0	850	encodingPtr->fromUtfProc = typePtr->fromUtfProc;
sl@0	851	encodingPtr->freeProc = typePtr->freeProc;
sl@0	852	encodingPtr->nullSize = typePtr->nullSize;
sl@0	853	encodingPtr->clientData = typePtr->clientData;
sl@0	854	if (typePtr->nullSize == 1) {
sl@0	855	encodingPtr->lengthProc = (LengthProc *) strlen;
sl@0	856	} else {
sl@0	857	encodingPtr->lengthProc = (LengthProc *) unilen;
sl@0	858	}
sl@0	859	encodingPtr->refCount = 1;
sl@0	860	encodingPtr->hPtr = hPtr;
sl@0	861	Tcl_SetHashValue(hPtr, encodingPtr);
sl@0	862
sl@0	863	Tcl_MutexUnlock(&encodingMutex);
sl@0	864
sl@0	865	return (Tcl_Encoding) encodingPtr;
sl@0	866	}
sl@0	867
sl@0	868	/*
sl@0	869	*-------------------------------------------------------------------------
sl@0	870	*
sl@0	871	* Tcl_ExternalToUtfDString --
sl@0	872	*
sl@0	873	* Convert a source buffer from the specified encoding into UTF-8.
sl@0	874	* If any of the bytes in the source buffer are invalid or cannot
sl@0	875	* be represented in the target encoding, a default fallback
sl@0	876	* character will be substituted.
sl@0	877	*
sl@0	878	* Results:
sl@0	879	* The converted bytes are stored in the DString, which is then NULL
sl@0	880	* terminated. The return value is a pointer to the value stored
sl@0	881	* in the DString.
sl@0	882	*
sl@0	883	* Side effects:
sl@0	884	* None.
sl@0	885	*
sl@0	886	*-------------------------------------------------------------------------
sl@0	887	*/
sl@0	888
sl@0	889	EXPORT_C char *
sl@0	890	Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)
sl@0	891	Tcl_Encoding encoding; /* The encoding for the source string, or
sl@0	892	* NULL for the default system encoding. */
sl@0	893	CONST char src; / Source string in specified encoding. */
sl@0	894	int srcLen; /* Source string length in bytes, or < 0 for
sl@0	895	* encoding-specific string length. */
sl@0	896	Tcl_DString dstPtr; / Uninitialized or free DString in which
sl@0	897	* the converted string is stored. */
sl@0	898	{
sl@0	899	char *dst;
sl@0	900	Tcl_EncodingState state;
sl@0	901	Encoding *encodingPtr;
sl@0	902	int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
sl@0	903
sl@0	904	Tcl_DStringInit(dstPtr);
sl@0	905	dst = Tcl_DStringValue(dstPtr);
sl@0	906	dstLen = dstPtr->spaceAvl - 1;
sl@0	907
sl@0	908	if (encoding == NULL) {
sl@0	909	encoding = systemEncoding;
sl@0	910	}
sl@0	911	encodingPtr = (Encoding *) encoding;
sl@0	912
sl@0	913	if (src == NULL) {
sl@0	914	srcLen = 0;
sl@0	915	} else if (srcLen < 0) {
sl@0	916	srcLen = (*encodingPtr->lengthProc)(src);
sl@0	917	}
sl@0	918	flags = TCL_ENCODING_START \| TCL_ENCODING_END;
sl@0	919	while (1) {
sl@0	920	result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src,
sl@0	921	srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
sl@0	922	&dstChars);
sl@0	923	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
sl@0	924	if (result != TCL_CONVERT_NOSPACE) {
sl@0	925	Tcl_DStringSetLength(dstPtr, soFar);
sl@0	926	return Tcl_DStringValue(dstPtr);
sl@0	927	}
sl@0	928	flags &= ~TCL_ENCODING_START;
sl@0	929	src += srcRead;
sl@0	930	srcLen -= srcRead;
sl@0	931	if (Tcl_DStringLength(dstPtr) == 0) {
sl@0	932	Tcl_DStringSetLength(dstPtr, dstLen);
sl@0	933	}
sl@0	934	Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
sl@0	935	dst = Tcl_DStringValue(dstPtr) + soFar;
sl@0	936	dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
sl@0	937	}
sl@0	938	}
sl@0	939
sl@0	940	/*
sl@0	941	*-------------------------------------------------------------------------
sl@0	942	*
sl@0	943	* Tcl_ExternalToUtf --
sl@0	944	*
sl@0	945	* Convert a source buffer from the specified encoding into UTF-8.
sl@0	946	*
sl@0	947	* Results:
sl@0	948	* The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
sl@0	949	* TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
sl@0	950	* as documented in tcl.h.
sl@0	951	*
sl@0	952	* Side effects:
sl@0	953	* The converted bytes are stored in the output buffer.
sl@0	954	*
sl@0	955	*-------------------------------------------------------------------------
sl@0	956	*/
sl@0	957
sl@0	958	EXPORT_C int
sl@0	959	Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
sl@0	960	dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	961	Tcl_Interp interp; / Interp for error return, if not NULL. */
sl@0	962	Tcl_Encoding encoding; /* The encoding for the source string, or
sl@0	963	* NULL for the default system encoding. */
sl@0	964	CONST char src; / Source string in specified encoding. */
sl@0	965	int srcLen; /* Source string length in bytes, or < 0 for
sl@0	966	* encoding-specific string length. */
sl@0	967	int flags; /* Conversion control flags. */
sl@0	968	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	969	* state information used during a piecewise
sl@0	970	* conversion. Contents of statePtr are
sl@0	971	* initialized and/or reset by conversion
sl@0	972	* routine under control of flags argument. */
sl@0	973	char dst; / Output buffer in which converted string
sl@0	974	* is stored. */
sl@0	975	int dstLen; /* The maximum length of output buffer in
sl@0	976	* bytes. */
sl@0	977	int srcReadPtr; / Filled with the number of bytes from the
sl@0	978	* source string that were converted. This
sl@0	979	* may be less than the original source length
sl@0	980	* if there was a problem converting some
sl@0	981	* source characters. */
sl@0	982	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	983	* stored in the output buffer as a result of
sl@0	984	* the conversion. */
sl@0	985	int dstCharsPtr; / Filled with the number of characters that
sl@0	986	* correspond to the bytes stored in the
sl@0	987	* output buffer. */
sl@0	988	{
sl@0	989	Encoding *encodingPtr;
sl@0	990	int result, srcRead, dstWrote, dstChars;
sl@0	991	Tcl_EncodingState state;
sl@0	992
sl@0	993	if (encoding == NULL) {
sl@0	994	encoding = systemEncoding;
sl@0	995	}
sl@0	996	encodingPtr = (Encoding *) encoding;
sl@0	997
sl@0	998	if (src == NULL) {
sl@0	999	srcLen = 0;
sl@0	1000	} else if (srcLen < 0) {
sl@0	1001	srcLen = (*encodingPtr->lengthProc)(src);
sl@0	1002	}
sl@0	1003	if (statePtr == NULL) {
sl@0	1004	flags \|= TCL_ENCODING_START \| TCL_ENCODING_END;
sl@0	1005	statePtr = &state;
sl@0	1006	}
sl@0	1007	if (srcReadPtr == NULL) {
sl@0	1008	srcReadPtr = &srcRead;
sl@0	1009	}
sl@0	1010	if (dstWrotePtr == NULL) {
sl@0	1011	dstWrotePtr = &dstWrote;
sl@0	1012	}
sl@0	1013	if (dstCharsPtr == NULL) {
sl@0	1014	dstCharsPtr = &dstChars;
sl@0	1015	}
sl@0	1016
sl@0	1017	/*
sl@0	1018	* If there are any null characters in the middle of the buffer, they will
sl@0	1019	* converted to the UTF-8 null character (\xC080). To get the actual
sl@0	1020	* \0 at the end of the destination buffer, we need to append it manually.
sl@0	1021	*/
sl@0	1022
sl@0	1023	dstLen--;
sl@0	1024	result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen,
sl@0	1025	flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
sl@0	1026	dstCharsPtr);
sl@0	1027	dst[*dstWrotePtr] = '\0';
sl@0	1028	return result;
sl@0	1029	}
sl@0	1030
sl@0	1031	/*
sl@0	1032	*-------------------------------------------------------------------------
sl@0	1033	*
sl@0	1034	* Tcl_UtfToExternalDString --
sl@0	1035	*
sl@0	1036	* Convert a source buffer from UTF-8 into the specified encoding.
sl@0	1037	* If any of the bytes in the source buffer are invalid or cannot
sl@0	1038	* be represented in the target encoding, a default fallback
sl@0	1039	* character will be substituted.
sl@0	1040	*
sl@0	1041	* Results:
sl@0	1042	* The converted bytes are stored in the DString, which is then
sl@0	1043	* NULL terminated in an encoding-specific manner. The return value
sl@0	1044	* is a pointer to the value stored in the DString.
sl@0	1045	*
sl@0	1046	* Side effects:
sl@0	1047	* None.
sl@0	1048	*
sl@0	1049	*-------------------------------------------------------------------------
sl@0	1050	*/
sl@0	1051
sl@0	1052	EXPORT_C char *
sl@0	1053	Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)
sl@0	1054	Tcl_Encoding encoding; /* The encoding for the converted string,
sl@0	1055	* or NULL for the default system encoding. */
sl@0	1056	CONST char src; / Source string in UTF-8. */
sl@0	1057	int srcLen; /* Source string length in bytes, or < 0 for
sl@0	1058	* strlen(). */
sl@0	1059	Tcl_DString dstPtr; / Uninitialized or free DString in which
sl@0	1060	* the converted string is stored. */
sl@0	1061	{
sl@0	1062	char *dst;
sl@0	1063	Tcl_EncodingState state;
sl@0	1064	Encoding *encodingPtr;
sl@0	1065	int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
sl@0	1066
sl@0	1067	Tcl_DStringInit(dstPtr);
sl@0	1068	dst = Tcl_DStringValue(dstPtr);
sl@0	1069	dstLen = dstPtr->spaceAvl - 1;
sl@0	1070
sl@0	1071	if (encoding == NULL) {
sl@0	1072	encoding = systemEncoding;
sl@0	1073	}
sl@0	1074	encodingPtr = (Encoding *) encoding;
sl@0	1075
sl@0	1076	if (src == NULL) {
sl@0	1077	srcLen = 0;
sl@0	1078	} else if (srcLen < 0) {
sl@0	1079	srcLen = strlen(src);
sl@0	1080	}
sl@0	1081	flags = TCL_ENCODING_START \| TCL_ENCODING_END;
sl@0	1082	while (1) {
sl@0	1083	result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src,
sl@0	1084	srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
sl@0	1085	&dstChars);
sl@0	1086	soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
sl@0	1087	if (result != TCL_CONVERT_NOSPACE) {
sl@0	1088	if (encodingPtr->nullSize == 2) {
sl@0	1089	Tcl_DStringSetLength(dstPtr, soFar + 1);
sl@0	1090	}
sl@0	1091	Tcl_DStringSetLength(dstPtr, soFar);
sl@0	1092	return Tcl_DStringValue(dstPtr);
sl@0	1093	}
sl@0	1094	flags &= ~TCL_ENCODING_START;
sl@0	1095	src += srcRead;
sl@0	1096	srcLen -= srcRead;
sl@0	1097	if (Tcl_DStringLength(dstPtr) == 0) {
sl@0	1098	Tcl_DStringSetLength(dstPtr, dstLen);
sl@0	1099	}
sl@0	1100	Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
sl@0	1101	dst = Tcl_DStringValue(dstPtr) + soFar;
sl@0	1102	dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
sl@0	1103	}
sl@0	1104	}
sl@0	1105
sl@0	1106	/*
sl@0	1107	*-------------------------------------------------------------------------
sl@0	1108	*
sl@0	1109	* Tcl_UtfToExternal --
sl@0	1110	*
sl@0	1111	* Convert a buffer from UTF-8 into the specified encoding.
sl@0	1112	*
sl@0	1113	* Results:
sl@0	1114	* The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
sl@0	1115	* TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
sl@0	1116	* as documented in tcl.h.
sl@0	1117	*
sl@0	1118	* Side effects:
sl@0	1119	* The converted bytes are stored in the output buffer.
sl@0	1120	*
sl@0	1121	*-------------------------------------------------------------------------
sl@0	1122	*/
sl@0	1123
sl@0	1124	EXPORT_C int
sl@0	1125	Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst,
sl@0	1126	dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	1127	Tcl_Interp interp; / Interp for error return, if not NULL. */
sl@0	1128	Tcl_Encoding encoding; /* The encoding for the converted string,
sl@0	1129	* or NULL for the default system encoding. */
sl@0	1130	CONST char src; / Source string in UTF-8. */
sl@0	1131	int srcLen; /* Source string length in bytes, or < 0 for
sl@0	1132	* strlen(). */
sl@0	1133	int flags; /* Conversion control flags. */
sl@0	1134	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	1135	* state information used during a piecewise
sl@0	1136	* conversion. Contents of statePtr are
sl@0	1137	* initialized and/or reset by conversion
sl@0	1138	* routine under control of flags argument. */
sl@0	1139	char dst; / Output buffer in which converted string
sl@0	1140	* is stored. */
sl@0	1141	int dstLen; /* The maximum length of output buffer in
sl@0	1142	* bytes. */
sl@0	1143	int srcReadPtr; / Filled with the number of bytes from the
sl@0	1144	* source string that were converted. This
sl@0	1145	* may be less than the original source length
sl@0	1146	* if there was a problem converting some
sl@0	1147	* source characters. */
sl@0	1148	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	1149	* stored in the output buffer as a result of
sl@0	1150	* the conversion. */
sl@0	1151	int dstCharsPtr; / Filled with the number of characters that
sl@0	1152	* correspond to the bytes stored in the
sl@0	1153	* output buffer. */
sl@0	1154	{
sl@0	1155	Encoding *encodingPtr;
sl@0	1156	int result, srcRead, dstWrote, dstChars;
sl@0	1157	Tcl_EncodingState state;
sl@0	1158
sl@0	1159	if (encoding == NULL) {
sl@0	1160	encoding = systemEncoding;
sl@0	1161	}
sl@0	1162	encodingPtr = (Encoding *) encoding;
sl@0	1163
sl@0	1164	if (src == NULL) {
sl@0	1165	srcLen = 0;
sl@0	1166	} else if (srcLen < 0) {
sl@0	1167	srcLen = strlen(src);
sl@0	1168	}
sl@0	1169	if (statePtr == NULL) {
sl@0	1170	flags \|= TCL_ENCODING_START \| TCL_ENCODING_END;
sl@0	1171	statePtr = &state;
sl@0	1172	}
sl@0	1173	if (srcReadPtr == NULL) {
sl@0	1174	srcReadPtr = &srcRead;
sl@0	1175	}
sl@0	1176	if (dstWrotePtr == NULL) {
sl@0	1177	dstWrotePtr = &dstWrote;
sl@0	1178	}
sl@0	1179	if (dstCharsPtr == NULL) {
sl@0	1180	dstCharsPtr = &dstChars;
sl@0	1181	}
sl@0	1182
sl@0	1183	dstLen -= encodingPtr->nullSize;
sl@0	1184	result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, srcLen,
sl@0	1185	flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
sl@0	1186	dstCharsPtr);
sl@0	1187	if (encodingPtr->nullSize == 2) {
sl@0	1188	dst[*dstWrotePtr + 1] = '\0';
sl@0	1189	}
sl@0	1190	dst[*dstWrotePtr] = '\0';
sl@0	1191
sl@0	1192	return result;
sl@0	1193	}
sl@0	1194
sl@0	1195	/*
sl@0	1196	*---------------------------------------------------------------------------
sl@0	1197	*
sl@0	1198	* Tcl_FindExecutable --
sl@0	1199	*
sl@0	1200	* This procedure computes the absolute path name of the current
sl@0	1201	* application, given its argv[0] value.
sl@0	1202	*
sl@0	1203	* Results:
sl@0	1204	* None.
sl@0	1205	*
sl@0	1206	* Side effects:
sl@0	1207	* The variable tclExecutableName gets filled in with the file
sl@0	1208	* name for the application, if we figured it out. If we couldn't
sl@0	1209	* figure it out, tclExecutableName is set to NULL.
sl@0	1210	*
sl@0	1211	*---------------------------------------------------------------------------
sl@0	1212	*/
sl@0	1213
sl@0	1214	EXPORT_C void
sl@0	1215	Tcl_FindExecutable(argv0)
sl@0	1216	CONST char argv0; / The value of the application's argv[0]
sl@0	1217	* (native). */
sl@0	1218	{
sl@0	1219	int mustCleanUtf;
sl@0	1220	CONST char *name;
sl@0	1221	Tcl_DString buffer, nameString;
sl@0	1222
sl@0	1223	TclInitSubsystems(argv0);
sl@0	1224
sl@0	1225	if (argv0 == NULL) {
sl@0	1226	goto done;
sl@0	1227	}
sl@0	1228	if (tclExecutableName != NULL) {
sl@0	1229	ckfree(tclExecutableName);
sl@0	1230	tclExecutableName = NULL;
sl@0	1231	}
sl@0	1232	if ((name = TclpFindExecutable(argv0)) == NULL) {
sl@0	1233	goto done;
sl@0	1234	}
sl@0	1235
sl@0	1236	/*
sl@0	1237	* The value returned from TclpNameOfExecutable is a UTF string that
sl@0	1238	* is possibly dirty depending on when it was initialized.
sl@0	1239	* TclFindEncodings will indicate whether we must "clean" the UTF (as
sl@0	1240	* reported by the underlying system). To assure that the UTF string
sl@0	1241	* is a properly encoded native string for this system, convert the
sl@0	1242	* UTF string to the default native encoding before the default
sl@0	1243	* encoding is initialized. Then, convert it back to UTF after the
sl@0	1244	* system encoding is loaded.
sl@0	1245	*/
sl@0	1246
sl@0	1247	Tcl_UtfToExternalDString(NULL, name, -1, &buffer);
sl@0	1248	mustCleanUtf = TclFindEncodings(argv0);
sl@0	1249
sl@0	1250	/*
sl@0	1251	* Now it is OK to convert the native string back to UTF and set
sl@0	1252	* the value of the tclExecutableName.
sl@0	1253	*/
sl@0	1254
sl@0	1255	if (mustCleanUtf) {
sl@0	1256	Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&buffer), -1,
sl@0	1257	&nameString);
sl@0	1258	tclExecutableName = (char *)
sl@0	1259	ckalloc((unsigned) (Tcl_DStringLength(&nameString) + 1));
sl@0	1260	strcpy(tclExecutableName, Tcl_DStringValue(&nameString));
sl@0	1261
sl@0	1262	Tcl_DStringFree(&nameString);
sl@0	1263	} else {
sl@0	1264	tclExecutableName = (char *) ckalloc((unsigned) (strlen(name) + 1));
sl@0	1265	strcpy(tclExecutableName, name);
sl@0	1266	}
sl@0	1267	Tcl_DStringFree(&buffer);
sl@0	1268	return;
sl@0	1269
sl@0	1270	done:
sl@0	1271	(void) TclFindEncodings(argv0);
sl@0	1272	}
sl@0	1273
sl@0	1274	/*
sl@0	1275	*---------------------------------------------------------------------------
sl@0	1276	*
sl@0	1277	* LoadEncodingFile --
sl@0	1278	*
sl@0	1279	* Read a file that describes an encoding and create a new Encoding
sl@0	1280	* from the data.
sl@0	1281	*
sl@0	1282	* Results:
sl@0	1283	* The return value is the newly loaded Encoding, or NULL if
sl@0	1284	* the file didn't exist of was in the incorrect format. If NULL was
sl@0	1285	* returned, an error message is left in interp's result object,
sl@0	1286	* unless interp was NULL.
sl@0	1287	*
sl@0	1288	* Side effects:
sl@0	1289	* File read from disk.
sl@0	1290	*
sl@0	1291	*---------------------------------------------------------------------------
sl@0	1292	*/
sl@0	1293
sl@0	1294	static Tcl_Encoding
sl@0	1295	LoadEncodingFile(interp, name)
sl@0	1296	Tcl_Interp interp; / Interp for error reporting, if not NULL. */
sl@0	1297	CONST char name; / The name of the encoding file on disk
sl@0	1298	* and also the name for new encoding. */
sl@0	1299	{
sl@0	1300	int objc, i, ch;
sl@0	1301	Tcl_Obj **objv;
sl@0	1302	Tcl_Obj *pathPtr;
sl@0	1303	Tcl_Channel chan;
sl@0	1304	Tcl_Encoding encoding;
sl@0	1305
sl@0	1306	pathPtr = TclGetLibraryPath();
sl@0	1307	if (pathPtr == NULL) {
sl@0	1308	goto unknown;
sl@0	1309	}
sl@0	1310	objc = 0;
sl@0	1311	Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv);
sl@0	1312
sl@0	1313	chan = NULL;
sl@0	1314	for (i = 0; i < objc; i++) {
sl@0	1315	chan = OpenEncodingFile(Tcl_GetString(objv[i]), name);
sl@0	1316	if (chan != NULL) {
sl@0	1317	break;
sl@0	1318	}
sl@0	1319	}
sl@0	1320
sl@0	1321	if (chan == NULL) {
sl@0	1322	goto unknown;
sl@0	1323	}
sl@0	1324
sl@0	1325	Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8");
sl@0	1326
sl@0	1327	while (1) {
sl@0	1328	Tcl_DString ds;
sl@0	1329
sl@0	1330	Tcl_DStringInit(&ds);
sl@0	1331	Tcl_Gets(chan, &ds);
sl@0	1332	ch = Tcl_DStringValue(&ds)[0];
sl@0	1333	Tcl_DStringFree(&ds);
sl@0	1334	if (ch != '#') {
sl@0	1335	break;
sl@0	1336	}
sl@0	1337	}
sl@0	1338
sl@0	1339	encoding = NULL;
sl@0	1340	switch (ch) {
sl@0	1341	case 'S': {
sl@0	1342	encoding = LoadTableEncoding(interp, name, ENCODING_SINGLEBYTE,
sl@0	1343	chan);
sl@0	1344	break;
sl@0	1345	}
sl@0	1346	case 'D': {
sl@0	1347	encoding = LoadTableEncoding(interp, name, ENCODING_DOUBLEBYTE,
sl@0	1348	chan);
sl@0	1349	break;
sl@0	1350	}
sl@0	1351	case 'M': {
sl@0	1352	encoding = LoadTableEncoding(interp, name, ENCODING_MULTIBYTE,
sl@0	1353	chan);
sl@0	1354	break;
sl@0	1355	}
sl@0	1356	case 'E': {
sl@0	1357	encoding = LoadEscapeEncoding(name, chan);
sl@0	1358	break;
sl@0	1359	}
sl@0	1360	}
sl@0	1361	if ((encoding == NULL) && (interp != NULL)) {
sl@0	1362	Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL);
sl@0	1363	if (ch == 'E') {
sl@0	1364	Tcl_AppendResult(interp, " or missing sub-encoding", NULL);
sl@0	1365	}
sl@0	1366	}
sl@0	1367	Tcl_Close(NULL, chan);
sl@0	1368	return encoding;
sl@0	1369
sl@0	1370	unknown:
sl@0	1371	if (interp != NULL) {
sl@0	1372	Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
sl@0	1373	}
sl@0	1374	return NULL;
sl@0	1375	}
sl@0	1376
sl@0	1377	/*
sl@0	1378	*----------------------------------------------------------------------
sl@0	1379	*
sl@0	1380	* OpenEncodingFile --
sl@0	1381	*
sl@0	1382	* Look for the file encoding/<name>.enc in the specified
sl@0	1383	* directory.
sl@0	1384	*
sl@0	1385	* Results:
sl@0	1386	* Returns an open file channel if the file exists.
sl@0	1387	*
sl@0	1388	* Side effects:
sl@0	1389	* None.
sl@0	1390	*
sl@0	1391	*----------------------------------------------------------------------
sl@0	1392	*/
sl@0	1393
sl@0	1394	static Tcl_Channel
sl@0	1395	OpenEncodingFile(dir, name)
sl@0	1396	CONST char *dir;
sl@0	1397	CONST char *name;
sl@0	1398
sl@0	1399	{
sl@0	1400	CONST char *argv[3];
sl@0	1401	Tcl_DString pathString;
sl@0	1402	CONST char *path;
sl@0	1403	Tcl_Channel chan;
sl@0	1404	Tcl_Obj *pathPtr;
sl@0	1405
sl@0	1406	argv[0] = dir;
sl@0	1407	argv[1] = "encoding";
sl@0	1408	argv[2] = name;
sl@0	1409
sl@0	1410	Tcl_DStringInit(&pathString);
sl@0	1411	Tcl_JoinPath(3, argv, &pathString);
sl@0	1412	path = Tcl_DStringAppend(&pathString, ".enc", -1);
sl@0	1413	pathPtr = Tcl_NewStringObj(path,-1);
sl@0	1414
sl@0	1415	Tcl_IncrRefCount(pathPtr);
sl@0	1416	chan = Tcl_FSOpenFileChannel(NULL, pathPtr, "r", 0);
sl@0	1417	Tcl_DecrRefCount(pathPtr);
sl@0	1418
sl@0	1419	Tcl_DStringFree(&pathString);
sl@0	1420
sl@0	1421	return chan;
sl@0	1422	}
sl@0	1423
sl@0	1424	/*
sl@0	1425	*-------------------------------------------------------------------------
sl@0	1426	*
sl@0	1427	* LoadTableEncoding --
sl@0	1428	*
sl@0	1429	* Helper function for LoadEncodingTable(). Loads a table to that
sl@0	1430	* converts between Unicode and some other encoding and creates an
sl@0	1431	* encoding (using a TableEncoding structure) from that information.
sl@0	1432	*
sl@0	1433	* File contains binary data, but begins with a marker to indicate
sl@0	1434	* byte-ordering, so that same binary file can be read on either
sl@0	1435	* endian platforms.
sl@0	1436	*
sl@0	1437	* Results:
sl@0	1438	* The return value is the new encoding, or NULL if the encoding
sl@0	1439	* could not be created (because the file contained invalid data).
sl@0	1440	*
sl@0	1441	* Side effects:
sl@0	1442	* None.
sl@0	1443	*
sl@0	1444	*-------------------------------------------------------------------------
sl@0	1445	*/
sl@0	1446
sl@0	1447	static Tcl_Encoding
sl@0	1448	LoadTableEncoding(interp, name, type, chan)
sl@0	1449	Tcl_Interp interp; / Interp for temporary obj while reading. */
sl@0	1450	CONST char name; / Name for new encoding. */
sl@0	1451	int type; /* Type of encoding (ENCODING_?????). */
sl@0	1452	Tcl_Channel chan; /* File containing new encoding. */
sl@0	1453	{
sl@0	1454	Tcl_DString lineString;
sl@0	1455	Tcl_Obj *objPtr;
sl@0	1456	char *line;
sl@0	1457	int i, hi, lo, numPages, symbol, fallback;
sl@0	1458	unsigned char used[256];
sl@0	1459	unsigned int size;
sl@0	1460	TableEncodingData *dataPtr;
sl@0	1461	unsigned short *pageMemPtr;
sl@0	1462	Tcl_EncodingType encType;
sl@0	1463
sl@0	1464	/*
sl@0	1465	* Speed over memory. Use a full 256 character table to decode hex
sl@0	1466	* sequences in the encoding files.
sl@0	1467	*/
sl@0	1468
sl@0	1469	static char staticHex[] = {
sl@0	1470	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 ... 15 */
sl@0	1471	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 ... 31 */
sl@0	1472	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32 ... 47 */
sl@0	1473	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 48 ... 63 */
sl@0	1474	0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 64 ... 79 */
sl@0	1475	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 80 ... 95 */
sl@0	1476	0, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 96 ... 111 */
sl@0	1477	0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, /* 112 ... 127 */
sl@0	1478	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128 ... 143 */
sl@0	1479	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144 ... 159 */
sl@0	1480	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 160 ... 175 */
sl@0	1481	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 176 ... 191 */
sl@0	1482	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 192 ... 207 */
sl@0	1483	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 208 ... 223 */
sl@0	1484	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 224 ... 239 */
sl@0	1485	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 240 ... 255 */
sl@0	1486	};
sl@0	1487
sl@0	1488	Tcl_DStringInit(&lineString);
sl@0	1489	Tcl_Gets(chan, &lineString);
sl@0	1490	line = Tcl_DStringValue(&lineString);
sl@0	1491
sl@0	1492	fallback = (int) strtol(line, &line, 16);
sl@0	1493	symbol = (int) strtol(line, &line, 10);
sl@0	1494	numPages = (int) strtol(line, &line, 10);
sl@0	1495	Tcl_DStringFree(&lineString);
sl@0	1496
sl@0	1497	if (numPages < 0) {
sl@0	1498	numPages = 0;
sl@0	1499	} else if (numPages > 256) {
sl@0	1500	numPages = 256;
sl@0	1501	}
sl@0	1502
sl@0	1503	memset(used, 0, sizeof(used));
sl@0	1504
sl@0	1505	#undef PAGESIZE
sl@0	1506	#define PAGESIZE (256 * sizeof(unsigned short))
sl@0	1507
sl@0	1508	dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData));
sl@0	1509	memset(dataPtr, 0, sizeof(TableEncodingData));
sl@0	1510
sl@0	1511	dataPtr->fallback = fallback;
sl@0	1512
sl@0	1513	/*
sl@0	1514	* Read the table that maps characters to Unicode. Performs a single
sl@0	1515	* malloc to get the memory for the array and all the pages needed by
sl@0	1516	* the array.
sl@0	1517	*/
sl@0	1518
sl@0	1519	size = 256 * sizeof(unsigned short ) + numPages PAGESIZE;
sl@0	1520	dataPtr->toUnicode = (unsigned short **) ckalloc(size);
sl@0	1521	memset(dataPtr->toUnicode, 0, size);
sl@0	1522	pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256);
sl@0	1523
sl@0	1524	if (interp == NULL) {
sl@0	1525	objPtr = Tcl_NewObj();
sl@0	1526	} else {
sl@0	1527	objPtr = Tcl_GetObjResult(interp);
sl@0	1528	}
sl@0	1529	for (i = 0; i < numPages; i++) {
sl@0	1530	int ch;
sl@0	1531	char *p;
sl@0	1532
sl@0	1533	Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0);
sl@0	1534	p = Tcl_GetString(objPtr);
sl@0	1535	hi = (staticHex[(unsigned int)p[0]] << 4) + staticHex[(unsigned int)p[1]];
sl@0	1536	dataPtr->toUnicode[hi] = pageMemPtr;
sl@0	1537	p += 2;
sl@0	1538	for (lo = 0; lo < 256; lo++) {
sl@0	1539	if ((lo & 0x0f) == 0) {
sl@0	1540	p++;
sl@0	1541	}
sl@0	1542	ch = (staticHex[(unsigned int)p[0]] << 12) + (staticHex[(unsigned int)p[1]] << 8)
sl@0	1543	+ (staticHex[(unsigned int)p[2]] << 4) + staticHex[(unsigned int)p[3]];
sl@0	1544	if (ch != 0) {
sl@0	1545	used[ch >> 8] = 1;
sl@0	1546	}
sl@0	1547	*pageMemPtr = (unsigned short) ch;
sl@0	1548	pageMemPtr++;
sl@0	1549	p += 4;
sl@0	1550	}
sl@0	1551	}
sl@0	1552	if (interp == NULL) {
sl@0	1553	Tcl_DecrRefCount(objPtr);
sl@0	1554	} else {
sl@0	1555	Tcl_ResetResult(interp);
sl@0	1556	}
sl@0	1557
sl@0	1558	if (type == ENCODING_DOUBLEBYTE) {
sl@0	1559	memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes));
sl@0	1560	} else {
sl@0	1561	for (hi = 1; hi < 256; hi++) {
sl@0	1562	if (dataPtr->toUnicode[hi] != NULL) {
sl@0	1563	dataPtr->prefixBytes[hi] = 1;
sl@0	1564	}
sl@0	1565	}
sl@0	1566	}
sl@0	1567
sl@0	1568	/*
sl@0	1569	* Invert toUnicode array to produce the fromUnicode array. Performs a
sl@0	1570	* single malloc to get the memory for the array and all the pages
sl@0	1571	* needed by the array. While reading in the toUnicode array, we
sl@0	1572	* remembered what pages that would be needed for the fromUnicode array.
sl@0	1573	*/
sl@0	1574
sl@0	1575	if (symbol) {
sl@0	1576	used[0] = 1;
sl@0	1577	}
sl@0	1578	numPages = 0;
sl@0	1579	for (hi = 0; hi < 256; hi++) {
sl@0	1580	if (used[hi]) {
sl@0	1581	numPages++;
sl@0	1582	}
sl@0	1583	}
sl@0	1584	size = 256 * sizeof(unsigned short ) + numPages PAGESIZE;
sl@0	1585	dataPtr->fromUnicode = (unsigned short **) ckalloc(size);
sl@0	1586	memset(dataPtr->fromUnicode, 0, size);
sl@0	1587	pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256);
sl@0	1588
sl@0	1589	for (hi = 0; hi < 256; hi++) {
sl@0	1590	if (dataPtr->toUnicode[hi] == NULL) {
sl@0	1591	dataPtr->toUnicode[hi] = emptyPage;
sl@0	1592	} else {
sl@0	1593	for (lo = 0; lo < 256; lo++) {
sl@0	1594	int ch;
sl@0	1595
sl@0	1596	ch = dataPtr->toUnicode[hi][lo];
sl@0	1597	if (ch != 0) {
sl@0	1598	unsigned short *page;
sl@0	1599
sl@0	1600	page = dataPtr->fromUnicode[ch >> 8];
sl@0	1601	if (page == NULL) {
sl@0	1602	page = pageMemPtr;
sl@0	1603	pageMemPtr += 256;
sl@0	1604	dataPtr->fromUnicode[ch >> 8] = page;
sl@0	1605	}
sl@0	1606	page[ch & 0xff] = (unsigned short) ((hi << 8) + lo);
sl@0	1607	}
sl@0	1608	}
sl@0	1609	}
sl@0	1610	}
sl@0	1611	if (type == ENCODING_MULTIBYTE) {
sl@0	1612	/*
sl@0	1613	* If multibyte encodings don't have a backslash character, define
sl@0	1614	* one. Otherwise, on Windows, native file names won't work because
sl@0	1615	* the backslash in the file name will map to the unknown character
sl@0	1616	* (question mark) when converting from UTF-8 to external encoding.
sl@0	1617	*/
sl@0	1618
sl@0	1619	if (dataPtr->fromUnicode[0] != NULL) {
sl@0	1620	if (dataPtr->fromUnicode[0]['\\'] == '\0') {
sl@0	1621	dataPtr->fromUnicode[0]['\\'] = '\\';
sl@0	1622	}
sl@0	1623	}
sl@0	1624	}
sl@0	1625	if (symbol) {
sl@0	1626	unsigned short *page;
sl@0	1627
sl@0	1628	/*
sl@0	1629	* Make a special symbol encoding that not only maps the symbol
sl@0	1630	* characters from their Unicode code points down into page 0, but
sl@0	1631	* also ensure that the characters on page 0 map to themselves.
sl@0	1632	* This is so that a symbol font can be used to display a simple
sl@0	1633	* string like "abcd" and have alpha, beta, chi, delta show up,
sl@0	1634	* rather than have "unknown" chars show up because strictly
sl@0	1635	* speaking the symbol font doesn't have glyphs for those low ascii
sl@0	1636	* chars.
sl@0	1637	*/
sl@0	1638
sl@0	1639	page = dataPtr->fromUnicode[0];
sl@0	1640	if (page == NULL) {
sl@0	1641	page = pageMemPtr;
sl@0	1642	dataPtr->fromUnicode[0] = page;
sl@0	1643	}
sl@0	1644	for (lo = 0; lo < 256; lo++) {
sl@0	1645	if (dataPtr->toUnicode[0][lo] != 0) {
sl@0	1646	page[lo] = (unsigned short) lo;
sl@0	1647	}
sl@0	1648	}
sl@0	1649	}
sl@0	1650	for (hi = 0; hi < 256; hi++) {
sl@0	1651	if (dataPtr->fromUnicode[hi] == NULL) {
sl@0	1652	dataPtr->fromUnicode[hi] = emptyPage;
sl@0	1653	}
sl@0	1654	}
sl@0	1655	/*
sl@0	1656	* For trailing 'R'everse encoding, see [Patch #689341]
sl@0	1657	*/
sl@0	1658	Tcl_DStringInit(&lineString);
sl@0	1659	do {
sl@0	1660	int len;
sl@0	1661	/* skip leading empty lines */
sl@0	1662	while ((len = Tcl_Gets(chan, &lineString)) == 0)
sl@0	1663	;
sl@0	1664	if (len < 0) {
sl@0	1665	break;
sl@0	1666	}
sl@0	1667	line = Tcl_DStringValue(&lineString);
sl@0	1668	if (line[0] != 'R') {
sl@0	1669	break;
sl@0	1670	}
sl@0	1671	for (Tcl_DStringSetLength(&lineString, 0);
sl@0	1672	(len = Tcl_Gets(chan, &lineString)) >= 0;
sl@0	1673	Tcl_DStringSetLength(&lineString, 0)) {
sl@0	1674	unsigned char* p;
sl@0	1675	int to, from;
sl@0	1676	if (len < 5) {
sl@0	1677	continue;
sl@0	1678	}
sl@0	1679	p = (unsigned char*) Tcl_DStringValue(&lineString);
sl@0	1680	to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
sl@0	1681	+ (staticHex[p[2]] << 4) + staticHex[p[3]];
sl@0	1682	if (to == 0) {
sl@0	1683	continue;
sl@0	1684	}
sl@0	1685	for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) {
sl@0	1686	from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
sl@0	1687	+ (staticHex[p[2]] << 4) + staticHex[p[3]];
sl@0	1688	if (from == 0) {
sl@0	1689	continue;
sl@0	1690	}
sl@0	1691	dataPtr->fromUnicode[from >> 8][from & 0xff] = to;
sl@0	1692	}
sl@0	1693	}
sl@0	1694	} while (0);
sl@0	1695	Tcl_DStringFree(&lineString);
sl@0	1696
sl@0	1697	encType.encodingName = name;
sl@0	1698	encType.toUtfProc = TableToUtfProc;
sl@0	1699	encType.fromUtfProc = TableFromUtfProc;
sl@0	1700	encType.freeProc = TableFreeProc;
sl@0	1701	encType.nullSize = (type == ENCODING_DOUBLEBYTE) ? 2 : 1;
sl@0	1702	encType.clientData = (ClientData) dataPtr;
sl@0	1703	return Tcl_CreateEncoding(&encType);
sl@0	1704	}
sl@0	1705
sl@0	1706	/*
sl@0	1707	*-------------------------------------------------------------------------
sl@0	1708	*
sl@0	1709	* LoadEscapeEncoding --
sl@0	1710	*
sl@0	1711	* Helper function for LoadEncodingTable(). Loads a state machine
sl@0	1712	* that converts between Unicode and some other encoding.
sl@0	1713	*
sl@0	1714	* File contains text data that describes the escape sequences that
sl@0	1715	* are used to choose an encoding and the associated names for the
sl@0	1716	* sub-encodings.
sl@0	1717	*
sl@0	1718	* Results:
sl@0	1719	* The return value is the new encoding, or NULL if the encoding
sl@0	1720	* could not be created (because the file contained invalid data).
sl@0	1721	*
sl@0	1722	* Side effects:
sl@0	1723	* None.
sl@0	1724	*
sl@0	1725	*-------------------------------------------------------------------------
sl@0	1726	*/
sl@0	1727
sl@0	1728	static Tcl_Encoding
sl@0	1729	LoadEscapeEncoding(name, chan)
sl@0	1730	CONST char name; / Name for new encoding. */
sl@0	1731	Tcl_Channel chan; /* File containing new encoding. */
sl@0	1732	{
sl@0	1733	int i, missingSubEncoding = 0;
sl@0	1734	unsigned int size;
sl@0	1735	Tcl_DString escapeData;
sl@0	1736	char init[16], final[16];
sl@0	1737	EscapeEncodingData *dataPtr;
sl@0	1738	Tcl_EncodingType type;
sl@0	1739
sl@0	1740	init[0] = '\0';
sl@0	1741	final[0] = '\0';
sl@0	1742	Tcl_DStringInit(&escapeData);
sl@0	1743
sl@0	1744	while (1) {
sl@0	1745	int argc;
sl@0	1746	CONST char **argv;
sl@0	1747	char *line;
sl@0	1748	Tcl_DString lineString;
sl@0	1749
sl@0	1750	Tcl_DStringInit(&lineString);
sl@0	1751	if (Tcl_Gets(chan, &lineString) < 0) {
sl@0	1752	break;
sl@0	1753	}
sl@0	1754	line = Tcl_DStringValue(&lineString);
sl@0	1755	if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) {
sl@0	1756	continue;
sl@0	1757	}
sl@0	1758	if (argc >= 2) {
sl@0	1759	if (strcmp(argv[0], "name") == 0) {
sl@0	1760	;
sl@0	1761	} else if (strcmp(argv[0], "init") == 0) {
sl@0	1762	strncpy(init, argv[1], sizeof(init));
sl@0	1763	init[sizeof(init) - 1] = '\0';
sl@0	1764	} else if (strcmp(argv[0], "final") == 0) {
sl@0	1765	strncpy(final, argv[1], sizeof(final));
sl@0	1766	final[sizeof(final) - 1] = '\0';
sl@0	1767	} else {
sl@0	1768	EscapeSubTable est;
sl@0	1769
sl@0	1770	strncpy(est.sequence, argv[1], sizeof(est.sequence));
sl@0	1771	est.sequence[sizeof(est.sequence) - 1] = '\0';
sl@0	1772	est.sequenceLen = strlen(est.sequence);
sl@0	1773
sl@0	1774	strncpy(est.name, argv[0], sizeof(est.name));
sl@0	1775	est.name[sizeof(est.name) - 1] = '\0';
sl@0	1776
sl@0	1777	/*
sl@0	1778	* Load the subencodings first so we're never stuck
sl@0	1779	* trying to use a half-loaded system encoding to
sl@0	1780	* open/read a *.enc file.
sl@0	1781	*/
sl@0	1782
sl@0	1783	est.encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, est.name);
sl@0	1784	if ((est.encodingPtr == NULL)
sl@0	1785	\|\| (est.encodingPtr->toUtfProc != TableToUtfProc)) {
sl@0	1786	missingSubEncoding = 1;
sl@0	1787	}
sl@0	1788	Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est));
sl@0	1789	}
sl@0	1790	}
sl@0	1791	ckfree((char *) argv);
sl@0	1792	Tcl_DStringFree(&lineString);
sl@0	1793	}
sl@0	1794	if (missingSubEncoding) {
sl@0	1795	Tcl_DStringFree(&escapeData);
sl@0	1796	return NULL;
sl@0	1797	}
sl@0	1798
sl@0	1799	size = sizeof(EscapeEncodingData)
sl@0	1800	- sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData);
sl@0	1801	dataPtr = (EscapeEncodingData *) ckalloc(size);
sl@0	1802	dataPtr->initLen = strlen(init);
sl@0	1803	strcpy(dataPtr->init, init);
sl@0	1804	dataPtr->finalLen = strlen(final);
sl@0	1805	strcpy(dataPtr->final, final);
sl@0	1806	dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
sl@0	1807	memcpy((VOID ) dataPtr->subTables, (VOID ) Tcl_DStringValue(&escapeData),
sl@0	1808	(size_t) Tcl_DStringLength(&escapeData));
sl@0	1809	Tcl_DStringFree(&escapeData);
sl@0	1810
sl@0	1811	memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes));
sl@0	1812	for (i = 0; i < dataPtr->numSubTables; i++) {
sl@0	1813	dataPtr->prefixBytes[UCHAR(dataPtr->subTables[i].sequence[0])] = 1;
sl@0	1814	}
sl@0	1815	if (dataPtr->init[0] != '\0') {
sl@0	1816	dataPtr->prefixBytes[UCHAR(dataPtr->init[0])] = 1;
sl@0	1817	}
sl@0	1818	if (dataPtr->final[0] != '\0') {
sl@0	1819	dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1;
sl@0	1820	}
sl@0	1821
sl@0	1822	type.encodingName = name;
sl@0	1823	type.toUtfProc = EscapeToUtfProc;
sl@0	1824	type.fromUtfProc = EscapeFromUtfProc;
sl@0	1825	type.freeProc = EscapeFreeProc;
sl@0	1826	type.nullSize = 1;
sl@0	1827	type.clientData = (ClientData) dataPtr;
sl@0	1828
sl@0	1829	return Tcl_CreateEncoding(&type);
sl@0	1830	}
sl@0	1831
sl@0	1832	/*
sl@0	1833	*-------------------------------------------------------------------------
sl@0	1834	*
sl@0	1835	* BinaryProc --
sl@0	1836	*
sl@0	1837	* The default conversion when no other conversion is specified.
sl@0	1838	* No translation is done; source bytes are copied directly to
sl@0	1839	* destination bytes.
sl@0	1840	*
sl@0	1841	* Results:
sl@0	1842	* Returns TCL_OK if conversion was successful.
sl@0	1843	*
sl@0	1844	* Side effects:
sl@0	1845	* None.
sl@0	1846	*
sl@0	1847	*-------------------------------------------------------------------------
sl@0	1848	*/
sl@0	1849
sl@0	1850	static int
sl@0	1851	BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	1852	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	1853	ClientData clientData; /* Not used. */
sl@0	1854	CONST char src; / Source string (unknown encoding). */
sl@0	1855	int srcLen; /* Source string length in bytes. */
sl@0	1856	int flags; /* Conversion control flags. */
sl@0	1857	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	1858	* state information used during a piecewise
sl@0	1859	* conversion. Contents of statePtr are
sl@0	1860	* initialized and/or reset by conversion
sl@0	1861	* routine under control of flags argument. */
sl@0	1862	char dst; / Output buffer in which converted string
sl@0	1863	* is stored. */
sl@0	1864	int dstLen; /* The maximum length of output buffer in
sl@0	1865	* bytes. */
sl@0	1866	int srcReadPtr; / Filled with the number of bytes from the
sl@0	1867	* source string that were converted. */
sl@0	1868	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	1869	* stored in the output buffer as a result of
sl@0	1870	* the conversion. */
sl@0	1871	int dstCharsPtr; / Filled with the number of characters that
sl@0	1872	* correspond to the bytes stored in the
sl@0	1873	* output buffer. */
sl@0	1874	{
sl@0	1875	int result;
sl@0	1876
sl@0	1877	result = TCL_OK;
sl@0	1878	dstLen -= TCL_UTF_MAX - 1;
sl@0	1879	if (dstLen < 0) {
sl@0	1880	dstLen = 0;
sl@0	1881	}
sl@0	1882	if (srcLen > dstLen) {
sl@0	1883	srcLen = dstLen;
sl@0	1884	result = TCL_CONVERT_NOSPACE;
sl@0	1885	}
sl@0	1886
sl@0	1887	*srcReadPtr = srcLen;
sl@0	1888	*dstWrotePtr = srcLen;
sl@0	1889	*dstCharsPtr = srcLen;
sl@0	1890	memcpy((void ) dst, (void ) src, (size_t) srcLen);
sl@0	1891	return result;
sl@0	1892	}
sl@0	1893
sl@0	1894
sl@0	1895	/*
sl@0	1896	*-------------------------------------------------------------------------
sl@0	1897	*
sl@0	1898	* UtfExtToUtfIntProc --
sl@0	1899	*
sl@0	1900	* Convert from UTF-8 to UTF-8. While converting null-bytes from
sl@0	1901	* the Tcl's internal representation (0xc0, 0x80) to the official
sl@0	1902	* representation (0x00). See UtfToUtfProc for details.
sl@0	1903	*
sl@0	1904	* Results:
sl@0	1905	* Returns TCL_OK if conversion was successful.
sl@0	1906	*
sl@0	1907	* Side effects:
sl@0	1908	* None.
sl@0	1909	*
sl@0	1910	*-------------------------------------------------------------------------
sl@0	1911	*/
sl@0	1912	static int
sl@0	1913	UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	1914	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	1915	ClientData clientData; /* Not used. */
sl@0	1916	CONST char src; / Source string in UTF-8. */
sl@0	1917	int srcLen; /* Source string length in bytes. */
sl@0	1918	int flags; /* Conversion control flags. */
sl@0	1919	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	1920	* state information used during a piecewise
sl@0	1921	* conversion. Contents of statePtr are
sl@0	1922	* initialized and/or reset by conversion
sl@0	1923	* routine under control of flags argument. */
sl@0	1924	char dst; / Output buffer in which converted string
sl@0	1925	* is stored. */
sl@0	1926	int dstLen; /* The maximum length of output buffer in
sl@0	1927	* bytes. */
sl@0	1928	int srcReadPtr; / Filled with the number of bytes from the
sl@0	1929	* source string that were converted. This
sl@0	1930	* may be less than the original source length
sl@0	1931	* if there was a problem converting some
sl@0	1932	* source characters. */
sl@0	1933	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	1934	* stored in the output buffer as a result of
sl@0	1935	* the conversion. */
sl@0	1936	int dstCharsPtr; / Filled with the number of characters that
sl@0	1937	* correspond to the bytes stored in the
sl@0	1938	* output buffer. */
sl@0	1939	{
sl@0	1940	return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	1941	srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
sl@0	1942	}
sl@0	1943
sl@0	1944	/*
sl@0	1945	*-------------------------------------------------------------------------
sl@0	1946	*
sl@0	1947	* UtfExtToUtfIntProc --
sl@0	1948	*
sl@0	1949	* Convert from UTF-8 to UTF-8 while converting null-bytes from
sl@0	1950	* the official representation (0x00) to Tcl's internal
sl@0	1951	* representation (0xc0, 0x80). See UtfToUtfProc for details.
sl@0	1952	*
sl@0	1953	* Results:
sl@0	1954	* Returns TCL_OK if conversion was successful.
sl@0	1955	*
sl@0	1956	* Side effects:
sl@0	1957	* None.
sl@0	1958	*
sl@0	1959	*-------------------------------------------------------------------------
sl@0	1960	*/
sl@0	1961	static int
sl@0	1962	UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	1963	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	1964	ClientData clientData; /* Not used. */
sl@0	1965	CONST char src; / Source string in UTF-8. */
sl@0	1966	int srcLen; /* Source string length in bytes. */
sl@0	1967	int flags; /* Conversion control flags. */
sl@0	1968	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	1969	* state information used during a piecewise
sl@0	1970	* conversion. Contents of statePtr are
sl@0	1971	* initialized and/or reset by conversion
sl@0	1972	* routine under control of flags argument. */
sl@0	1973	char dst; / Output buffer in which converted string
sl@0	1974	* is stored. */
sl@0	1975	int dstLen; /* The maximum length of output buffer in
sl@0	1976	* bytes. */
sl@0	1977	int srcReadPtr; / Filled with the number of bytes from the
sl@0	1978	* source string that were converted. This
sl@0	1979	* may be less than the original source length
sl@0	1980	* if there was a problem converting some
sl@0	1981	* source characters. */
sl@0	1982	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	1983	* stored in the output buffer as a result of
sl@0	1984	* the conversion. */
sl@0	1985	int dstCharsPtr; / Filled with the number of characters that
sl@0	1986	* correspond to the bytes stored in the
sl@0	1987	* output buffer. */
sl@0	1988	{
sl@0	1989	return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	1990	srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
sl@0	1991	}
sl@0	1992
sl@0	1993	/*
sl@0	1994	*-------------------------------------------------------------------------
sl@0	1995	*
sl@0	1996	* UtfToUtfProc --
sl@0	1997	*
sl@0	1998	* Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8
sl@0	1999	* translation is not a no-op, because it will turn a stream of
sl@0	2000	* improperly formed UTF-8 into a properly formed stream.
sl@0	2001	*
sl@0	2002	* Results:
sl@0	2003	* Returns TCL_OK if conversion was successful.
sl@0	2004	*
sl@0	2005	* Side effects:
sl@0	2006	* None.
sl@0	2007	*
sl@0	2008	*-------------------------------------------------------------------------
sl@0	2009	*/
sl@0	2010
sl@0	2011	static int
sl@0	2012	UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	2013	srcReadPtr, dstWrotePtr, dstCharsPtr, pureNullMode)
sl@0	2014	ClientData clientData; /* Not used. */
sl@0	2015	CONST char src; / Source string in UTF-8. */
sl@0	2016	int srcLen; /* Source string length in bytes. */
sl@0	2017	int flags; /* Conversion control flags. */
sl@0	2018	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	2019	* state information used during a piecewise
sl@0	2020	* conversion. Contents of statePtr are
sl@0	2021	* initialized and/or reset by conversion
sl@0	2022	* routine under control of flags argument. */
sl@0	2023	char dst; / Output buffer in which converted string
sl@0	2024	* is stored. */
sl@0	2025	int dstLen; /* The maximum length of output buffer in
sl@0	2026	* bytes. */
sl@0	2027	int srcReadPtr; / Filled with the number of bytes from the
sl@0	2028	* source string that were converted. This
sl@0	2029	* may be less than the original source length
sl@0	2030	* if there was a problem converting some
sl@0	2031	* source characters. */
sl@0	2032	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	2033	* stored in the output buffer as a result of
sl@0	2034	* the conversion. */
sl@0	2035	int dstCharsPtr; / Filled with the number of characters that
sl@0	2036	* correspond to the bytes stored in the
sl@0	2037	* output buffer. */
sl@0	2038	int pureNullMode; /* Convert embedded nulls from
sl@0	2039	* internal representation to real
sl@0	2040	* null-bytes or vice versa */
sl@0	2041
sl@0	2042	{
sl@0	2043	CONST char srcStart, srcEnd, *srcClose;
sl@0	2044	char dstStart, dstEnd;
sl@0	2045	int result, numChars;
sl@0	2046	Tcl_UniChar ch;
sl@0	2047
sl@0	2048	result = TCL_OK;
sl@0	2049
sl@0	2050	srcStart = src;
sl@0	2051	srcEnd = src + srcLen;
sl@0	2052	srcClose = srcEnd;
sl@0	2053	if ((flags & TCL_ENCODING_END) == 0) {
sl@0	2054	srcClose -= TCL_UTF_MAX;
sl@0	2055	}
sl@0	2056
sl@0	2057	dstStart = dst;
sl@0	2058	dstEnd = dst + dstLen - TCL_UTF_MAX;
sl@0	2059
sl@0	2060	for (numChars = 0; src < srcEnd; numChars++) {
sl@0	2061	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
sl@0	2062	/*
sl@0	2063	* If there is more string to follow, this will ensure that the
sl@0	2064	* last UTF-8 character in the source buffer hasn't been cut off.
sl@0	2065	*/
sl@0	2066
sl@0	2067	result = TCL_CONVERT_MULTIBYTE;
sl@0	2068	break;
sl@0	2069	}
sl@0	2070	if (dst > dstEnd) {
sl@0	2071	result = TCL_CONVERT_NOSPACE;
sl@0	2072	break;
sl@0	2073	}
sl@0	2074	if (UCHAR(*src) < 0x80 &&
sl@0	2075	!(UCHAR(*src) == 0 && pureNullMode == 0)) {
sl@0	2076	/*
sl@0	2077	* Copy 7bit chatacters, but skip null-bytes when we are
sl@0	2078	* in input mode, so that they get converted to 0xc080.
sl@0	2079	*/
sl@0	2080	dst++ = src++;
sl@0	2081	} else if (pureNullMode == 1 &&
sl@0	2082	UCHAR(*src) == 0xc0 &&
sl@0	2083	UCHAR(*(src+1)) == 0x80) {
sl@0	2084	/*
sl@0	2085	* Convert 0xc080 to real nulls when we are in output mode.
sl@0	2086	*/
sl@0	2087	*dst++ = 0;
sl@0	2088	src += 2;
sl@0	2089	} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
sl@0	2090	/* Always check before using Tcl_UtfToUniChar. Not doing
sl@0	2091	* can so cause it run beyond the endof the buffer! If we
sl@0	2092	* * happen such an incomplete char its byts are made to *
sl@0	2093	* represent themselves.
sl@0	2094	*/
sl@0	2095
sl@0	2096	ch = (Tcl_UniChar) *src;
sl@0	2097	src += 1;
sl@0	2098	dst += Tcl_UniCharToUtf(ch, dst);
sl@0	2099	} else {
sl@0	2100	src += Tcl_UtfToUniChar(src, &ch);
sl@0	2101	dst += Tcl_UniCharToUtf(ch, dst);
sl@0	2102	}
sl@0	2103	}
sl@0	2104
sl@0	2105	*srcReadPtr = src - srcStart;
sl@0	2106	*dstWrotePtr = dst - dstStart;
sl@0	2107	*dstCharsPtr = numChars;
sl@0	2108	return result;
sl@0	2109	}
sl@0	2110
sl@0	2111	/*
sl@0	2112	*-------------------------------------------------------------------------
sl@0	2113	*
sl@0	2114	* UnicodeToUtfProc --
sl@0	2115	*
sl@0	2116	* Convert from Unicode to UTF-8.
sl@0	2117	*
sl@0	2118	* Results:
sl@0	2119	* Returns TCL_OK if conversion was successful.
sl@0	2120	*
sl@0	2121	* Side effects:
sl@0	2122	* None.
sl@0	2123	*
sl@0	2124	*-------------------------------------------------------------------------
sl@0	2125	*/
sl@0	2126
sl@0	2127	static int
sl@0	2128	UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	2129	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	2130	ClientData clientData; /* Not used. */
sl@0	2131	CONST char src; / Source string in Unicode. */
sl@0	2132	int srcLen; /* Source string length in bytes. */
sl@0	2133	int flags; /* Conversion control flags. */
sl@0	2134	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	2135	* state information used during a piecewise
sl@0	2136	* conversion. Contents of statePtr are
sl@0	2137	* initialized and/or reset by conversion
sl@0	2138	* routine under control of flags argument. */
sl@0	2139	char dst; / Output buffer in which converted string
sl@0	2140	* is stored. */
sl@0	2141	int dstLen; /* The maximum length of output buffer in
sl@0	2142	* bytes. */
sl@0	2143	int srcReadPtr; / Filled with the number of bytes from the
sl@0	2144	* source string that were converted. This
sl@0	2145	* may be less than the original source length
sl@0	2146	* if there was a problem converting some
sl@0	2147	* source characters. */
sl@0	2148	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	2149	* stored in the output buffer as a result of
sl@0	2150	* the conversion. */
sl@0	2151	int dstCharsPtr; / Filled with the number of characters that
sl@0	2152	* correspond to the bytes stored in the
sl@0	2153	* output buffer. */
sl@0	2154	{
sl@0	2155	CONST char srcStart, srcEnd;
sl@0	2156	char dstEnd, dstStart;
sl@0	2157	int result, numChars;
sl@0	2158	Tcl_UniChar ch;
sl@0	2159
sl@0	2160	result = TCL_OK;
sl@0	2161	if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
sl@0	2162	result = TCL_CONVERT_MULTIBYTE;
sl@0	2163	srcLen /= sizeof(Tcl_UniChar);
sl@0	2164	srcLen *= sizeof(Tcl_UniChar);
sl@0	2165	}
sl@0	2166
sl@0	2167	srcStart = src;
sl@0	2168	srcEnd = src + srcLen;
sl@0	2169
sl@0	2170	dstStart = dst;
sl@0	2171	dstEnd = dst + dstLen - TCL_UTF_MAX;
sl@0	2172
sl@0	2173	for (numChars = 0; src < srcEnd; numChars++) {
sl@0	2174	if (dst > dstEnd) {
sl@0	2175	result = TCL_CONVERT_NOSPACE;
sl@0	2176	break;
sl@0	2177	}
sl@0	2178	/*
sl@0	2179	* Special case for 1-byte utf chars for speed. Make sure we
sl@0	2180	* work with Tcl_UniChar-size data.
sl@0	2181	*/
sl@0	2182	ch = (Tcl_UniChar )src;
sl@0	2183	if (ch && ch < 0x80) {
sl@0	2184	*dst++ = (ch & 0xFF);
sl@0	2185	} else {
sl@0	2186	dst += Tcl_UniCharToUtf(ch, dst);
sl@0	2187	}
sl@0	2188	src += sizeof(Tcl_UniChar);
sl@0	2189	}
sl@0	2190
sl@0	2191	*srcReadPtr = src - srcStart;
sl@0	2192	*dstWrotePtr = dst - dstStart;
sl@0	2193	*dstCharsPtr = numChars;
sl@0	2194	return result;
sl@0	2195	}
sl@0	2196
sl@0	2197	/*
sl@0	2198	*-------------------------------------------------------------------------
sl@0	2199	*
sl@0	2200	* UtfToUnicodeProc --
sl@0	2201	*
sl@0	2202	* Convert from UTF-8 to Unicode.
sl@0	2203	*
sl@0	2204	* Results:
sl@0	2205	* Returns TCL_OK if conversion was successful.
sl@0	2206	*
sl@0	2207	* Side effects:
sl@0	2208	* None.
sl@0	2209	*
sl@0	2210	*-------------------------------------------------------------------------
sl@0	2211	*/
sl@0	2212
sl@0	2213	static int
sl@0	2214	UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	2215	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	2216	ClientData clientData; /* TableEncodingData that specifies encoding. */
sl@0	2217	CONST char src; / Source string in UTF-8. */
sl@0	2218	int srcLen; /* Source string length in bytes. */
sl@0	2219	int flags; /* Conversion control flags. */
sl@0	2220	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	2221	* state information used during a piecewise
sl@0	2222	* conversion. Contents of statePtr are
sl@0	2223	* initialized and/or reset by conversion
sl@0	2224	* routine under control of flags argument. */
sl@0	2225	char dst; / Output buffer in which converted string
sl@0	2226	* is stored. */
sl@0	2227	int dstLen; /* The maximum length of output buffer in
sl@0	2228	* bytes. */
sl@0	2229	int srcReadPtr; / Filled with the number of bytes from the
sl@0	2230	* source string that were converted. This
sl@0	2231	* may be less than the original source length
sl@0	2232	* if there was a problem converting some
sl@0	2233	* source characters. */
sl@0	2234	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	2235	* stored in the output buffer as a result of
sl@0	2236	* the conversion. */
sl@0	2237	int dstCharsPtr; / Filled with the number of characters that
sl@0	2238	* correspond to the bytes stored in the
sl@0	2239	* output buffer. */
sl@0	2240	{
sl@0	2241	CONST char srcStart, srcEnd, srcClose, dstStart, *dstEnd;
sl@0	2242	int result, numChars;
sl@0	2243	Tcl_UniChar ch;
sl@0	2244
sl@0	2245	srcStart = src;
sl@0	2246	srcEnd = src + srcLen;
sl@0	2247	srcClose = srcEnd;
sl@0	2248	if ((flags & TCL_ENCODING_END) == 0) {
sl@0	2249	srcClose -= TCL_UTF_MAX;
sl@0	2250	}
sl@0	2251
sl@0	2252	dstStart = dst;
sl@0	2253	dstEnd = dst + dstLen - sizeof(Tcl_UniChar);
sl@0	2254
sl@0	2255	result = TCL_OK;
sl@0	2256	for (numChars = 0; src < srcEnd; numChars++) {
sl@0	2257	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
sl@0	2258	/*
sl@0	2259	* If there is more string to follow, this will ensure that the
sl@0	2260	* last UTF-8 character in the source buffer hasn't been cut off.
sl@0	2261	*/
sl@0	2262
sl@0	2263	result = TCL_CONVERT_MULTIBYTE;
sl@0	2264	break;
sl@0	2265	}
sl@0	2266	if (dst > dstEnd) {
sl@0	2267	result = TCL_CONVERT_NOSPACE;
sl@0	2268	break;
sl@0	2269	}
sl@0	2270	src += TclUtfToUniChar(src, &ch);
sl@0	2271	/*
sl@0	2272	* Need to handle this in a way that won't cause misalignment
sl@0	2273	* by casting dst to a Tcl_UniChar. [Bug 1122671]
sl@0	2274	* XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
sl@0	2275	*/
sl@0	2276	#ifdef WORDS_BIGENDIAN
sl@0	2277	*dst++ = (ch >> 8);
sl@0	2278	*dst++ = (ch & 0xFF);
sl@0	2279	#else
sl@0	2280	*dst++ = (ch & 0xFF);
sl@0	2281	*dst++ = (ch >> 8);
sl@0	2282	#endif
sl@0	2283	}
sl@0	2284	*srcReadPtr = src - srcStart;
sl@0	2285	*dstWrotePtr = dst - dstStart;
sl@0	2286	*dstCharsPtr = numChars;
sl@0	2287	return result;
sl@0	2288	}
sl@0	2289
sl@0	2290	/*
sl@0	2291	*-------------------------------------------------------------------------
sl@0	2292	*
sl@0	2293	* TableToUtfProc --
sl@0	2294	*
sl@0	2295	* Convert from the encoding specified by the TableEncodingData into
sl@0	2296	* UTF-8.
sl@0	2297	*
sl@0	2298	* Results:
sl@0	2299	* Returns TCL_OK if conversion was successful.
sl@0	2300	*
sl@0	2301	* Side effects:
sl@0	2302	* None.
sl@0	2303	*
sl@0	2304	*-------------------------------------------------------------------------
sl@0	2305	*/
sl@0	2306
sl@0	2307	static int
sl@0	2308	TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	2309	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	2310	ClientData clientData; /* TableEncodingData that specifies
sl@0	2311	* encoding. */
sl@0	2312	CONST char src; / Source string in specified encoding. */
sl@0	2313	int srcLen; /* Source string length in bytes. */
sl@0	2314	int flags; /* Conversion control flags. */
sl@0	2315	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	2316	* state information used during a piecewise
sl@0	2317	* conversion. Contents of statePtr are
sl@0	2318	* initialized and/or reset by conversion
sl@0	2319	* routine under control of flags argument. */
sl@0	2320	char dst; / Output buffer in which converted string
sl@0	2321	* is stored. */
sl@0	2322	int dstLen; /* The maximum length of output buffer in
sl@0	2323	* bytes. */
sl@0	2324	int srcReadPtr; / Filled with the number of bytes from the
sl@0	2325	* source string that were converted. This
sl@0	2326	* may be less than the original source length
sl@0	2327	* if there was a problem converting some
sl@0	2328	* source characters. */
sl@0	2329	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	2330	* stored in the output buffer as a result of
sl@0	2331	* the conversion. */
sl@0	2332	int dstCharsPtr; / Filled with the number of characters that
sl@0	2333	* correspond to the bytes stored in the
sl@0	2334	* output buffer. */
sl@0	2335	{
sl@0	2336	CONST char srcStart, srcEnd;
sl@0	2337	char dstEnd, dstStart, *prefixBytes;
sl@0	2338	int result, byte, numChars;
sl@0	2339	Tcl_UniChar ch;
sl@0	2340	unsigned short **toUnicode;
sl@0	2341	unsigned short *pageZero;
sl@0	2342	TableEncodingData *dataPtr;
sl@0	2343
sl@0	2344	srcStart = src;
sl@0	2345	srcEnd = src + srcLen;
sl@0	2346
sl@0	2347	dstStart = dst;
sl@0	2348	dstEnd = dst + dstLen - TCL_UTF_MAX;
sl@0	2349
sl@0	2350	dataPtr = (TableEncodingData *) clientData;
sl@0	2351	toUnicode = dataPtr->toUnicode;
sl@0	2352	prefixBytes = dataPtr->prefixBytes;
sl@0	2353	pageZero = toUnicode[0];
sl@0	2354
sl@0	2355	result = TCL_OK;
sl@0	2356	for (numChars = 0; src < srcEnd; numChars++) {
sl@0	2357	if (dst > dstEnd) {
sl@0	2358	result = TCL_CONVERT_NOSPACE;
sl@0	2359	break;
sl@0	2360	}
sl@0	2361	byte = ((unsigned char ) src);
sl@0	2362	if (prefixBytes[byte]) {
sl@0	2363	src++;
sl@0	2364	if (src >= srcEnd) {
sl@0	2365	src--;
sl@0	2366	result = TCL_CONVERT_MULTIBYTE;
sl@0	2367	break;
sl@0	2368	}
sl@0	2369	ch = toUnicode[byte][((unsigned char ) src)];
sl@0	2370	} else {
sl@0	2371	ch = pageZero[byte];
sl@0	2372	}
sl@0	2373	if ((ch == 0) && (byte != 0)) {
sl@0	2374	if (flags & TCL_ENCODING_STOPONERROR) {
sl@0	2375	result = TCL_CONVERT_SYNTAX;
sl@0	2376	break;
sl@0	2377	}
sl@0	2378	if (prefixBytes[byte]) {
sl@0	2379	src--;
sl@0	2380	}
sl@0	2381	ch = (Tcl_UniChar) byte;
sl@0	2382	}
sl@0	2383	/*
sl@0	2384	* Special case for 1-byte utf chars for speed.
sl@0	2385	*/
sl@0	2386	if (ch && ch < 0x80) {
sl@0	2387	*dst++ = (char) ch;
sl@0	2388	} else {
sl@0	2389	dst += Tcl_UniCharToUtf(ch, dst);
sl@0	2390	}
sl@0	2391	src++;
sl@0	2392	}
sl@0	2393	*srcReadPtr = src - srcStart;
sl@0	2394	*dstWrotePtr = dst - dstStart;
sl@0	2395	*dstCharsPtr = numChars;
sl@0	2396	return result;
sl@0	2397	}
sl@0	2398
sl@0	2399	/*
sl@0	2400	*-------------------------------------------------------------------------
sl@0	2401	*
sl@0	2402	* TableFromUtfProc --
sl@0	2403	*
sl@0	2404	* Convert from UTF-8 into the encoding specified by the
sl@0	2405	* TableEncodingData.
sl@0	2406	*
sl@0	2407	* Results:
sl@0	2408	* Returns TCL_OK if conversion was successful.
sl@0	2409	*
sl@0	2410	* Side effects:
sl@0	2411	* None.
sl@0	2412	*
sl@0	2413	*-------------------------------------------------------------------------
sl@0	2414	*/
sl@0	2415
sl@0	2416	static int
sl@0	2417	TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	2418	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	2419	ClientData clientData; /* TableEncodingData that specifies
sl@0	2420	* encoding. */
sl@0	2421	CONST char src; / Source string in UTF-8. */
sl@0	2422	int srcLen; /* Source string length in bytes. */
sl@0	2423	int flags; /* Conversion control flags. */
sl@0	2424	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	2425	* state information used during a piecewise
sl@0	2426	* conversion. Contents of statePtr are
sl@0	2427	* initialized and/or reset by conversion
sl@0	2428	* routine under control of flags argument. */
sl@0	2429	char dst; / Output buffer in which converted string
sl@0	2430	* is stored. */
sl@0	2431	int dstLen; /* The maximum length of output buffer in
sl@0	2432	* bytes. */
sl@0	2433	int srcReadPtr; / Filled with the number of bytes from the
sl@0	2434	* source string that were converted. This
sl@0	2435	* may be less than the original source length
sl@0	2436	* if there was a problem converting some
sl@0	2437	* source characters. */
sl@0	2438	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	2439	* stored in the output buffer as a result of
sl@0	2440	* the conversion. */
sl@0	2441	int dstCharsPtr; / Filled with the number of characters that
sl@0	2442	* correspond to the bytes stored in the
sl@0	2443	* output buffer. */
sl@0	2444	{
sl@0	2445	CONST char srcStart, srcEnd, *srcClose;
sl@0	2446	char dstStart, dstEnd, *prefixBytes;
sl@0	2447	Tcl_UniChar ch;
sl@0	2448	int result, len, word, numChars;
sl@0	2449	TableEncodingData *dataPtr;
sl@0	2450	unsigned short **fromUnicode;
sl@0	2451
sl@0	2452	result = TCL_OK;
sl@0	2453
sl@0	2454	dataPtr = (TableEncodingData *) clientData;
sl@0	2455	prefixBytes = dataPtr->prefixBytes;
sl@0	2456	fromUnicode = dataPtr->fromUnicode;
sl@0	2457
sl@0	2458	srcStart = src;
sl@0	2459	srcEnd = src + srcLen;
sl@0	2460	srcClose = srcEnd;
sl@0	2461	if ((flags & TCL_ENCODING_END) == 0) {
sl@0	2462	srcClose -= TCL_UTF_MAX;
sl@0	2463	}
sl@0	2464
sl@0	2465	dstStart = dst;
sl@0	2466	dstEnd = dst + dstLen - 1;
sl@0	2467
sl@0	2468	for (numChars = 0; src < srcEnd; numChars++) {
sl@0	2469	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
sl@0	2470	/*
sl@0	2471	* If there is more string to follow, this will ensure that the
sl@0	2472	* last UTF-8 character in the source buffer hasn't been cut off.
sl@0	2473	*/
sl@0	2474
sl@0	2475	result = TCL_CONVERT_MULTIBYTE;
sl@0	2476	break;
sl@0	2477	}
sl@0	2478	len = TclUtfToUniChar(src, &ch);
sl@0	2479
sl@0	2480	#if TCL_UTF_MAX > 3
sl@0	2481	/*
sl@0	2482	* This prevents a crash condition. More evaluation is required
sl@0	2483	* for full support of int Tcl_UniChar. [Bug 1004065]
sl@0	2484	*/
sl@0	2485	if (ch & 0xffff0000) {
sl@0	2486	word = 0;
sl@0	2487	} else
sl@0	2488	#endif
sl@0	2489	word = fromUnicode[(ch >> 8)][ch & 0xff];
sl@0	2490
sl@0	2491	if ((word == 0) && (ch != 0)) {
sl@0	2492	if (flags & TCL_ENCODING_STOPONERROR) {
sl@0	2493	result = TCL_CONVERT_UNKNOWN;
sl@0	2494	break;
sl@0	2495	}
sl@0	2496	word = dataPtr->fallback;
sl@0	2497	}
sl@0	2498	if (prefixBytes[(word >> 8)] != 0) {
sl@0	2499	if (dst + 1 > dstEnd) {
sl@0	2500	result = TCL_CONVERT_NOSPACE;
sl@0	2501	break;
sl@0	2502	}
sl@0	2503	dst[0] = (char) (word >> 8);
sl@0	2504	dst[1] = (char) word;
sl@0	2505	dst += 2;
sl@0	2506	} else {
sl@0	2507	if (dst > dstEnd) {
sl@0	2508	result = TCL_CONVERT_NOSPACE;
sl@0	2509	break;
sl@0	2510	}
sl@0	2511	dst[0] = (char) word;
sl@0	2512	dst++;
sl@0	2513	}
sl@0	2514	src += len;
sl@0	2515	}
sl@0	2516	*srcReadPtr = src - srcStart;
sl@0	2517	*dstWrotePtr = dst - dstStart;
sl@0	2518	*dstCharsPtr = numChars;
sl@0	2519	return result;
sl@0	2520	}
sl@0	2521
sl@0	2522	/*
sl@0	2523	*---------------------------------------------------------------------------
sl@0	2524	*
sl@0	2525	* TableFreeProc --
sl@0	2526	*
sl@0	2527	* This procedure is invoked when an encoding is deleted. It deletes
sl@0	2528	* the memory used by the TableEncodingData.
sl@0	2529	*
sl@0	2530	* Results:
sl@0	2531	* None.
sl@0	2532	*
sl@0	2533	* Side effects:
sl@0	2534	* Memory freed.
sl@0	2535	*
sl@0	2536	*---------------------------------------------------------------------------
sl@0	2537	*/
sl@0	2538
sl@0	2539	static void
sl@0	2540	TableFreeProc(clientData)
sl@0	2541	ClientData clientData; /* TableEncodingData that specifies
sl@0	2542	* encoding. */
sl@0	2543	{
sl@0	2544	TableEncodingData *dataPtr;
sl@0	2545
sl@0	2546	/*
sl@0	2547	* Make sure we aren't freeing twice on shutdown. [Bug #219314]
sl@0	2548	*/
sl@0	2549
sl@0	2550	dataPtr = (TableEncodingData *) clientData;
sl@0	2551	ckfree((char *) dataPtr->toUnicode);
sl@0	2552	ckfree((char *) dataPtr->fromUnicode);
sl@0	2553	ckfree((char *) dataPtr);
sl@0	2554	}
sl@0	2555
sl@0	2556	/*
sl@0	2557	*-------------------------------------------------------------------------
sl@0	2558	*
sl@0	2559	* EscapeToUtfProc --
sl@0	2560	*
sl@0	2561	* Convert from the encoding specified by the EscapeEncodingData into
sl@0	2562	* UTF-8.
sl@0	2563	*
sl@0	2564	* Results:
sl@0	2565	* Returns TCL_OK if conversion was successful.
sl@0	2566	*
sl@0	2567	* Side effects:
sl@0	2568	* None.
sl@0	2569	*
sl@0	2570	*-------------------------------------------------------------------------
sl@0	2571	*/
sl@0	2572
sl@0	2573	static int
sl@0	2574	EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	2575	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	2576	ClientData clientData; /* EscapeEncodingData that specifies
sl@0	2577	* encoding. */
sl@0	2578	CONST char src; / Source string in specified encoding. */
sl@0	2579	int srcLen; /* Source string length in bytes. */
sl@0	2580	int flags; /* Conversion control flags. */
sl@0	2581	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	2582	* state information used during a piecewise
sl@0	2583	* conversion. Contents of statePtr are
sl@0	2584	* initialized and/or reset by conversion
sl@0	2585	* routine under control of flags argument. */
sl@0	2586	char dst; / Output buffer in which converted string
sl@0	2587	* is stored. */
sl@0	2588	int dstLen; /* The maximum length of output buffer in
sl@0	2589	* bytes. */
sl@0	2590	int srcReadPtr; / Filled with the number of bytes from the
sl@0	2591	* source string that were converted. This
sl@0	2592	* may be less than the original source length
sl@0	2593	* if there was a problem converting some
sl@0	2594	* source characters. */
sl@0	2595	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	2596	* stored in the output buffer as a result of
sl@0	2597	* the conversion. */
sl@0	2598	int dstCharsPtr; / Filled with the number of characters that
sl@0	2599	* correspond to the bytes stored in the
sl@0	2600	* output buffer. */
sl@0	2601	{
sl@0	2602	EscapeEncodingData *dataPtr;
sl@0	2603	char prefixBytes, tablePrefixBytes;
sl@0	2604	unsigned short **tableToUnicode;
sl@0	2605	Encoding *encodingPtr;
sl@0	2606	int state, result, numChars;
sl@0	2607	CONST char srcStart, srcEnd;
sl@0	2608	char dstStart, dstEnd;
sl@0	2609
sl@0	2610	result = TCL_OK;
sl@0	2611
sl@0	2612	tablePrefixBytes = NULL; /* lint. */
sl@0	2613	tableToUnicode = NULL; /* lint. */
sl@0	2614
sl@0	2615	dataPtr = (EscapeEncodingData *) clientData;
sl@0	2616	prefixBytes = dataPtr->prefixBytes;
sl@0	2617	encodingPtr = NULL;
sl@0	2618
sl@0	2619	srcStart = src;
sl@0	2620	srcEnd = src + srcLen;
sl@0	2621
sl@0	2622	dstStart = dst;
sl@0	2623	dstEnd = dst + dstLen - TCL_UTF_MAX;
sl@0	2624
sl@0	2625	state = (int) *statePtr;
sl@0	2626	if (flags & TCL_ENCODING_START) {
sl@0	2627	state = 0;
sl@0	2628	}
sl@0	2629
sl@0	2630	for (numChars = 0; src < srcEnd; ) {
sl@0	2631	int byte, hi, lo, ch;
sl@0	2632
sl@0	2633	if (dst > dstEnd) {
sl@0	2634	result = TCL_CONVERT_NOSPACE;
sl@0	2635	break;
sl@0	2636	}
sl@0	2637	byte = ((unsigned char ) src);
sl@0	2638	if (prefixBytes[byte]) {
sl@0	2639	unsigned int left, len, longest;
sl@0	2640	int checked, i;
sl@0	2641	EscapeSubTable *subTablePtr;
sl@0	2642
sl@0	2643	/*
sl@0	2644	* Saw the beginning of an escape sequence.
sl@0	2645	*/
sl@0	2646
sl@0	2647	left = srcEnd - src;
sl@0	2648	len = dataPtr->initLen;
sl@0	2649	longest = len;
sl@0	2650	checked = 0;
sl@0	2651	if (len <= left) {
sl@0	2652	checked++;
sl@0	2653	if ((len > 0) &&
sl@0	2654	(memcmp(src, dataPtr->init, len) == 0)) {
sl@0	2655	/*
sl@0	2656	* If we see initialization string, skip it, even if we're
sl@0	2657	* not at the beginning of the buffer.
sl@0	2658	*/
sl@0	2659
sl@0	2660	src += len;
sl@0	2661	continue;
sl@0	2662	}
sl@0	2663	}
sl@0	2664	len = dataPtr->finalLen;
sl@0	2665	if (len > longest) {
sl@0	2666	longest = len;
sl@0	2667	}
sl@0	2668	if (len <= left) {
sl@0	2669	checked++;
sl@0	2670	if ((len > 0) &&
sl@0	2671	(memcmp(src, dataPtr->final, len) == 0)) {
sl@0	2672	/*
sl@0	2673	* If we see finalization string, skip it, even if we're
sl@0	2674	* not at the end of the buffer.
sl@0	2675	*/
sl@0	2676
sl@0	2677	src += len;
sl@0	2678	continue;
sl@0	2679	}
sl@0	2680	}
sl@0	2681	subTablePtr = dataPtr->subTables;
sl@0	2682	for (i = 0; i < dataPtr->numSubTables; i++) {
sl@0	2683	len = subTablePtr->sequenceLen;
sl@0	2684	if (len > longest) {
sl@0	2685	longest = len;
sl@0	2686	}
sl@0	2687	if (len <= left) {
sl@0	2688	checked++;
sl@0	2689	if ((len > 0) &&
sl@0	2690	(memcmp(src, subTablePtr->sequence, len) == 0)) {
sl@0	2691	state = i;
sl@0	2692	encodingPtr = NULL;
sl@0	2693	subTablePtr = NULL;
sl@0	2694	src += len;
sl@0	2695	break;
sl@0	2696	}
sl@0	2697	}
sl@0	2698	subTablePtr++;
sl@0	2699	}
sl@0	2700	if (subTablePtr == NULL) {
sl@0	2701	/*
sl@0	2702	* A match was found, the escape sequence was consumed, and
sl@0	2703	* the state was updated.
sl@0	2704	*/
sl@0	2705
sl@0	2706	continue;
sl@0	2707	}
sl@0	2708
sl@0	2709	/*
sl@0	2710	* We have a split-up or unrecognized escape sequence. If we
sl@0	2711	* checked all the sequences, then it's a syntax error,
sl@0	2712	* otherwise we need more bytes to determine a match.
sl@0	2713	*/
sl@0	2714
sl@0	2715	if ((checked == dataPtr->numSubTables + 2)
sl@0	2716	\|\| (flags & TCL_ENCODING_END)) {
sl@0	2717	if ((flags & TCL_ENCODING_STOPONERROR) == 0) {
sl@0	2718	/*
sl@0	2719	* Skip the unknown escape sequence.
sl@0	2720	*/
sl@0	2721
sl@0	2722	src += longest;
sl@0	2723	continue;
sl@0	2724	}
sl@0	2725	result = TCL_CONVERT_SYNTAX;
sl@0	2726	} else {
sl@0	2727	result = TCL_CONVERT_MULTIBYTE;
sl@0	2728	}
sl@0	2729	break;
sl@0	2730	}
sl@0	2731
sl@0	2732	if (encodingPtr == NULL) {
sl@0	2733	TableEncodingData *tableDataPtr;
sl@0	2734
sl@0	2735	encodingPtr = GetTableEncoding(dataPtr, state);
sl@0	2736	tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
sl@0	2737	tablePrefixBytes = tableDataPtr->prefixBytes;
sl@0	2738	tableToUnicode = tableDataPtr->toUnicode;
sl@0	2739	}
sl@0	2740	if (tablePrefixBytes[byte]) {
sl@0	2741	src++;
sl@0	2742	if (src >= srcEnd) {
sl@0	2743	src--;
sl@0	2744	result = TCL_CONVERT_MULTIBYTE;
sl@0	2745	break;
sl@0	2746	}
sl@0	2747	hi = byte;
sl@0	2748	lo = ((unsigned char ) src);
sl@0	2749	} else {
sl@0	2750	hi = 0;
sl@0	2751	lo = byte;
sl@0	2752	}
sl@0	2753	ch = tableToUnicode[hi][lo];
sl@0	2754	dst += Tcl_UniCharToUtf(ch, dst);
sl@0	2755	src++;
sl@0	2756	numChars++;
sl@0	2757	}
sl@0	2758
sl@0	2759	*statePtr = (Tcl_EncodingState) state;
sl@0	2760	*srcReadPtr = src - srcStart;
sl@0	2761	*dstWrotePtr = dst - dstStart;
sl@0	2762	*dstCharsPtr = numChars;
sl@0	2763	return result;
sl@0	2764	}
sl@0	2765
sl@0	2766	/*
sl@0	2767	*-------------------------------------------------------------------------
sl@0	2768	*
sl@0	2769	* EscapeFromUtfProc --
sl@0	2770	*
sl@0	2771	* Convert from UTF-8 into the encoding specified by the
sl@0	2772	* EscapeEncodingData.
sl@0	2773	*
sl@0	2774	* Results:
sl@0	2775	* Returns TCL_OK if conversion was successful.
sl@0	2776	*
sl@0	2777	* Side effects:
sl@0	2778	* None.
sl@0	2779	*
sl@0	2780	*-------------------------------------------------------------------------
sl@0	2781	*/
sl@0	2782
sl@0	2783	static int
sl@0	2784	EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
sl@0	2785	srcReadPtr, dstWrotePtr, dstCharsPtr)
sl@0	2786	ClientData clientData; /* EscapeEncodingData that specifies
sl@0	2787	* encoding. */
sl@0	2788	CONST char src; / Source string in UTF-8. */
sl@0	2789	int srcLen; /* Source string length in bytes. */
sl@0	2790	int flags; /* Conversion control flags. */
sl@0	2791	Tcl_EncodingState statePtr;/ Place for conversion routine to store
sl@0	2792	* state information used during a piecewise
sl@0	2793	* conversion. Contents of statePtr are
sl@0	2794	* initialized and/or reset by conversion
sl@0	2795	* routine under control of flags argument. */
sl@0	2796	char dst; / Output buffer in which converted string
sl@0	2797	* is stored. */
sl@0	2798	int dstLen; /* The maximum length of output buffer in
sl@0	2799	* bytes. */
sl@0	2800	int srcReadPtr; / Filled with the number of bytes from the
sl@0	2801	* source string that were converted. This
sl@0	2802	* may be less than the original source length
sl@0	2803	* if there was a problem converting some
sl@0	2804	* source characters. */
sl@0	2805	int dstWrotePtr; / Filled with the number of bytes that were
sl@0	2806	* stored in the output buffer as a result of
sl@0	2807	* the conversion. */
sl@0	2808	int dstCharsPtr; / Filled with the number of characters that
sl@0	2809	* correspond to the bytes stored in the
sl@0	2810	* output buffer. */
sl@0	2811	{
sl@0	2812	EscapeEncodingData *dataPtr;
sl@0	2813	Encoding *encodingPtr;
sl@0	2814	CONST char srcStart, srcEnd, *srcClose;
sl@0	2815	char dstStart, dstEnd;
sl@0	2816	int state, result, numChars;
sl@0	2817	TableEncodingData *tableDataPtr;
sl@0	2818	char *tablePrefixBytes;
sl@0	2819	unsigned short **tableFromUnicode;
sl@0	2820
sl@0	2821	result = TCL_OK;
sl@0	2822
sl@0	2823	dataPtr = (EscapeEncodingData *) clientData;
sl@0	2824
sl@0	2825	srcStart = src;
sl@0	2826	srcEnd = src + srcLen;
sl@0	2827	srcClose = srcEnd;
sl@0	2828	if ((flags & TCL_ENCODING_END) == 0) {
sl@0	2829	srcClose -= TCL_UTF_MAX;
sl@0	2830	}
sl@0	2831
sl@0	2832	dstStart = dst;
sl@0	2833	dstEnd = dst + dstLen - 1;
sl@0	2834
sl@0	2835	/*
sl@0	2836	* RFC1468 states that the text starts in ASCII, and switches to Japanese
sl@0	2837	* characters, and that the text must end in ASCII. [Patch #474358]
sl@0	2838	*/
sl@0	2839
sl@0	2840	if (flags & TCL_ENCODING_START) {
sl@0	2841	state = 0;
sl@0	2842	if ((dst + dataPtr->initLen) > dstEnd) {
sl@0	2843	*srcReadPtr = 0;
sl@0	2844	*dstWrotePtr = 0;
sl@0	2845	return TCL_CONVERT_NOSPACE;
sl@0	2846	}
sl@0	2847	memcpy((VOID ) dst, (VOID ) dataPtr->init,
sl@0	2848	(size_t) dataPtr->initLen);
sl@0	2849	dst += dataPtr->initLen;
sl@0	2850	} else {
sl@0	2851	state = (int) *statePtr;
sl@0	2852	}
sl@0	2853
sl@0	2854	encodingPtr = GetTableEncoding(dataPtr, state);
sl@0	2855	tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
sl@0	2856	tablePrefixBytes = tableDataPtr->prefixBytes;
sl@0	2857	tableFromUnicode = tableDataPtr->fromUnicode;
sl@0	2858
sl@0	2859	for (numChars = 0; src < srcEnd; numChars++) {
sl@0	2860	unsigned int len;
sl@0	2861	int word;
sl@0	2862	Tcl_UniChar ch;
sl@0	2863
sl@0	2864	if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
sl@0	2865	/*
sl@0	2866	* If there is more string to follow, this will ensure that the
sl@0	2867	* last UTF-8 character in the source buffer hasn't been cut off.
sl@0	2868	*/
sl@0	2869
sl@0	2870	result = TCL_CONVERT_MULTIBYTE;
sl@0	2871	break;
sl@0	2872	}
sl@0	2873	len = TclUtfToUniChar(src, &ch);
sl@0	2874	word = tableFromUnicode[(ch >> 8)][ch & 0xff];
sl@0	2875
sl@0	2876	if ((word == 0) && (ch != 0)) {
sl@0	2877	int oldState;
sl@0	2878	EscapeSubTable *subTablePtr;
sl@0	2879
sl@0	2880	oldState = state;
sl@0	2881	for (state = 0; state < dataPtr->numSubTables; state++) {
sl@0	2882	encodingPtr = GetTableEncoding(dataPtr, state);
sl@0	2883	tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
sl@0	2884	word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff];
sl@0	2885	if (word != 0) {
sl@0	2886	break;
sl@0	2887	}
sl@0	2888	}
sl@0	2889
sl@0	2890	if (word == 0) {
sl@0	2891	state = oldState;
sl@0	2892	if (flags & TCL_ENCODING_STOPONERROR) {
sl@0	2893	result = TCL_CONVERT_UNKNOWN;
sl@0	2894	break;
sl@0	2895	}
sl@0	2896	encodingPtr = GetTableEncoding(dataPtr, state);
sl@0	2897	tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
sl@0	2898	word = tableDataPtr->fallback;
sl@0	2899	}
sl@0	2900
sl@0	2901	tablePrefixBytes = tableDataPtr->prefixBytes;
sl@0	2902	tableFromUnicode = tableDataPtr->fromUnicode;
sl@0	2903
sl@0	2904	/*
sl@0	2905	* The state variable has the value of oldState when word is 0.
sl@0	2906	* In this case, the escape sequense should not be copied to dst
sl@0	2907	* because the current character set is not changed.
sl@0	2908	*/
sl@0	2909	if (state != oldState) {
sl@0	2910	subTablePtr = &dataPtr->subTables[state];
sl@0	2911	if ((dst + subTablePtr->sequenceLen) > dstEnd) {
sl@0	2912	/*
sl@0	2913	* If there is no space to write the escape sequence, the
sl@0	2914	* state variable must be changed to the value of oldState
sl@0	2915	* variable because this escape sequence must be written
sl@0	2916	* in the next conversion.
sl@0	2917	*/
sl@0	2918	state = oldState;
sl@0	2919	result = TCL_CONVERT_NOSPACE;
sl@0	2920	break;
sl@0	2921	}
sl@0	2922	memcpy((VOID ) dst, (VOID ) subTablePtr->sequence,
sl@0	2923	(size_t) subTablePtr->sequenceLen);
sl@0	2924	dst += subTablePtr->sequenceLen;
sl@0	2925	}
sl@0	2926	}
sl@0	2927
sl@0	2928	if (tablePrefixBytes[(word >> 8)] != 0) {
sl@0	2929	if (dst + 1 > dstEnd) {
sl@0	2930	result = TCL_CONVERT_NOSPACE;
sl@0	2931	break;
sl@0	2932	}
sl@0	2933	dst[0] = (char) (word >> 8);
sl@0	2934	dst[1] = (char) word;
sl@0	2935	dst += 2;
sl@0	2936	} else {
sl@0	2937	if (dst > dstEnd) {
sl@0	2938	result = TCL_CONVERT_NOSPACE;
sl@0	2939	break;
sl@0	2940	}
sl@0	2941	dst[0] = (char) word;
sl@0	2942	dst++;
sl@0	2943	}
sl@0	2944	src += len;
sl@0	2945	}
sl@0	2946
sl@0	2947	if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) {
sl@0	2948	unsigned int len = dataPtr->subTables[0].sequenceLen;
sl@0	2949	/*
sl@0	2950	* [Bug 1516109].
sl@0	2951	* Certain encodings like iso2022-jp need to write
sl@0	2952	* an escape sequence after all characters have
sl@0	2953	* been converted. This logic checks that enough
sl@0	2954	* room is available in the buffer for the escape bytes.
sl@0	2955	* The TCL_ENCODING_END flag is cleared after a final
sl@0	2956	* escape sequence has been added to the buffer so
sl@0	2957	* that another call to this method does not attempt
sl@0	2958	* to append escape bytes a second time.
sl@0	2959	*/
sl@0	2960	if ((dst + dataPtr->finalLen + (state?len:0)) > dstEnd) {
sl@0	2961	result = TCL_CONVERT_NOSPACE;
sl@0	2962	} else {
sl@0	2963	if (state) {
sl@0	2964	memcpy((VOID ) dst, (VOID ) dataPtr->subTables[0].sequence,
sl@0	2965	(size_t) len);
sl@0	2966	dst += len;
sl@0	2967	}
sl@0	2968	memcpy((VOID ) dst, (VOID ) dataPtr->final,
sl@0	2969	(size_t) dataPtr->finalLen);
sl@0	2970	dst += dataPtr->finalLen;
sl@0	2971	state &= ~TCL_ENCODING_END;
sl@0	2972	}
sl@0	2973	}
sl@0	2974
sl@0	2975	*statePtr = (Tcl_EncodingState) state;
sl@0	2976	*srcReadPtr = src - srcStart;
sl@0	2977	*dstWrotePtr = dst - dstStart;
sl@0	2978	*dstCharsPtr = numChars;
sl@0	2979	return result;
sl@0	2980	}
sl@0	2981
sl@0	2982	/*
sl@0	2983	*---------------------------------------------------------------------------
sl@0	2984	*
sl@0	2985	* EscapeFreeProc --
sl@0	2986	*
sl@0	2987	* This procedure is invoked when an EscapeEncodingData encoding is
sl@0	2988	* deleted. It deletes the memory used by the encoding.
sl@0	2989	*
sl@0	2990	* Results:
sl@0	2991	* None.
sl@0	2992	*
sl@0	2993	* Side effects:
sl@0	2994	* Memory freed.
sl@0	2995	*
sl@0	2996	*---------------------------------------------------------------------------
sl@0	2997	*/
sl@0	2998
sl@0	2999	static void
sl@0	3000	EscapeFreeProc(clientData)
sl@0	3001	ClientData clientData; /* EscapeEncodingData that specifies encoding. */
sl@0	3002	{
sl@0	3003	EscapeEncodingData *dataPtr;
sl@0	3004	EscapeSubTable *subTablePtr;
sl@0	3005	int i;
sl@0	3006
sl@0	3007	dataPtr = (EscapeEncodingData *) clientData;
sl@0	3008	if (dataPtr == NULL) {
sl@0	3009	return;
sl@0	3010	}
sl@0	3011	subTablePtr = dataPtr->subTables;
sl@0	3012	for (i = 0; i < dataPtr->numSubTables; i++) {
sl@0	3013	FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr);
sl@0	3014	subTablePtr++;
sl@0	3015	}
sl@0	3016	ckfree((char *) dataPtr);
sl@0	3017	}
sl@0	3018
sl@0	3019	/*
sl@0	3020	*---------------------------------------------------------------------------
sl@0	3021	*
sl@0	3022	* GetTableEncoding --
sl@0	3023	*
sl@0	3024	* Helper function for the EscapeEncodingData conversions. Gets the
sl@0	3025	* encoding (of type TextEncodingData) that represents the specified
sl@0	3026	* state.
sl@0	3027	*
sl@0	3028	* Results:
sl@0	3029	* The return value is the encoding.
sl@0	3030	*
sl@0	3031	* Side effects:
sl@0	3032	* If the encoding that represents the specified state has not
sl@0	3033	* already been used by this EscapeEncoding, it will be loaded
sl@0	3034	* and cached in the dataPtr.
sl@0	3035	*
sl@0	3036	*---------------------------------------------------------------------------
sl@0	3037	*/
sl@0	3038
sl@0	3039	static Encoding *
sl@0	3040	GetTableEncoding(dataPtr, state)
sl@0	3041	EscapeEncodingData dataPtr;/ Contains names of encodings. */
sl@0	3042	int state; /* Index in dataPtr of desired Encoding. */
sl@0	3043	{
sl@0	3044	EscapeSubTable *subTablePtr;
sl@0	3045	Encoding *encodingPtr;
sl@0	3046
sl@0	3047	subTablePtr = &dataPtr->subTables[state];
sl@0	3048	encodingPtr = subTablePtr->encodingPtr;
sl@0	3049	if (encodingPtr == NULL) {
sl@0	3050	/*
sl@0	3051	* Now that escape encodings load their sub-encodings first, and
sl@0	3052	* fail to load if any sub-encodings are missing, this branch should
sl@0	3053	* never happen.
sl@0	3054	*/
sl@0	3055	encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name);
sl@0	3056	if ((encodingPtr == NULL)
sl@0	3057	\|\| (encodingPtr->toUtfProc != TableToUtfProc)) {
sl@0	3058	panic("EscapeToUtfProc: invalid sub table");
sl@0	3059	}
sl@0	3060	subTablePtr->encodingPtr = encodingPtr;
sl@0	3061	}
sl@0	3062	return encodingPtr;
sl@0	3063	}
sl@0	3064
sl@0	3065	/*
sl@0	3066	*---------------------------------------------------------------------------
sl@0	3067	*
sl@0	3068	* unilen --
sl@0	3069	*
sl@0	3070	* A helper function for the Tcl_ExternalToUtf functions. This
sl@0	3071	* function is similar to strlen for double-byte characters: it
sl@0	3072	* returns the number of bytes in a 0x0000 terminated string.
sl@0	3073	*
sl@0	3074	* Results:
sl@0	3075	* As above.
sl@0	3076	*
sl@0	3077	* Side effects:
sl@0	3078	* None.
sl@0	3079	*
sl@0	3080	*---------------------------------------------------------------------------
sl@0	3081	*/
sl@0	3082
sl@0	3083	static size_t
sl@0	3084	unilen(src)
sl@0	3085	CONST char *src;
sl@0	3086	{
sl@0	3087	unsigned short *p;
sl@0	3088
sl@0	3089	p = (unsigned short *) src;
sl@0	3090	while (*p != 0x0000) {
sl@0	3091	p++;
sl@0	3092	}
sl@0	3093	return (char *) p - src;
sl@0	3094	}
sl@0	3095
sl@0	3096	/*
sl@0	3097	*-------------------------------------------------------------------------
sl@0	3098	*
sl@0	3099	* TclFindEncodings --
sl@0	3100	*
sl@0	3101	* Find and load the encoding file for this operating system.
sl@0	3102	* Before this is called, Tcl makes assumptions about the
sl@0	3103	* native string representation, but the true encoding is not
sl@0	3104	* assured.
sl@0	3105	*
sl@0	3106	* Results:
sl@0	3107	* Return result of TclpInitLibraryPath, which reports whether the
sl@0	3108	* path is clean (0) or dirty (1) UTF.
sl@0	3109	*
sl@0	3110	* Side effects:
sl@0	3111	* Varied, see the respective initialization routines.
sl@0	3112	*
sl@0	3113	*-------------------------------------------------------------------------
sl@0	3114	*/
sl@0	3115
sl@0	3116	static int
sl@0	3117	TclFindEncodings(argv0)
sl@0	3118	CONST char argv0; / Name of executable from argv[0] to main()
sl@0	3119	* in native multi-byte encoding. */
sl@0	3120	{
sl@0	3121	int mustCleanUtf = 0;
sl@0	3122
sl@0	3123	if (encodingsInitialized == 0) {
sl@0	3124	/*
sl@0	3125	* Double check inside the mutex. There may be calls
sl@0	3126	* back into this routine from some of the procedures below.
sl@0	3127	*/
sl@0	3128
sl@0	3129	TclpInitLock();
sl@0	3130	if (encodingsInitialized == 0) {
sl@0	3131	char *native;
sl@0	3132	Tcl_Obj *pathPtr;
sl@0	3133	Tcl_DString libPath, buffer;
sl@0	3134
sl@0	3135	/*
sl@0	3136	* Have to set this bit here to avoid deadlock with the
sl@0	3137	* routines below us that call into TclInitSubsystems.
sl@0	3138	*/
sl@0	3139
sl@0	3140	encodingsInitialized = 1;
sl@0	3141
sl@0	3142	native = TclpFindExecutable(argv0);
sl@0	3143	mustCleanUtf = TclpInitLibraryPath(native);
sl@0	3144
sl@0	3145	/*
sl@0	3146	* The library path was set in the TclpInitLibraryPath routine.
sl@0	3147	* The string set is a dirty UTF string. To preserve the value
sl@0	3148	* convert the UTF string back to native before setting the new
sl@0	3149	* default encoding.
sl@0	3150	*/
sl@0	3151
sl@0	3152	pathPtr = TclGetLibraryPath();
sl@0	3153	if ((pathPtr != NULL) && mustCleanUtf) {
sl@0	3154	Tcl_UtfToExternalDString(NULL, Tcl_GetString(pathPtr), -1,
sl@0	3155	&libPath);
sl@0	3156	}
sl@0	3157
sl@0	3158	TclpSetInitialEncodings();
sl@0	3159
sl@0	3160	/*
sl@0	3161	* Now convert the native string back to UTF.
sl@0	3162	*/
sl@0	3163
sl@0	3164	if ((pathPtr != NULL) && mustCleanUtf) {
sl@0	3165	Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&libPath), -1,
sl@0	3166	&buffer);
sl@0	3167	pathPtr = Tcl_NewStringObj(Tcl_DStringValue(&buffer), -1);
sl@0	3168	TclSetLibraryPath(pathPtr);
sl@0	3169
sl@0	3170	Tcl_DStringFree(&libPath);
sl@0	3171	Tcl_DStringFree(&buffer);
sl@0	3172	}
sl@0	3173	}
sl@0	3174	TclpInitUnlock();
sl@0	3175	}
sl@0	3176
sl@0	3177	return mustCleanUtf;
sl@0	3178	}

author	sl@SLION-WIN7.fritz.box
	Fri, 15 Jun 2012 03:10:57 +0200
changeset 0	bde4ae8d615e
permissions	-rw-r--r--