sl@0: /* sl@0: * tclStringObj.c -- sl@0: * sl@0: * This file contains procedures that implement string operations on Tcl sl@0: * objects. Some string operations work with UTF strings and others sl@0: * require Unicode format. Functions that require knowledge of the width sl@0: * of each character, such as indexing, operate on Unicode data. sl@0: * sl@0: * A Unicode string is an internationalized string. Conceptually, a sl@0: * Unicode string is an array of 16-bit quantities organized as a sequence sl@0: * of properly formed UTF-8 characters. There is a one-to-one map between sl@0: * Unicode and UTF characters. Because Unicode characters have a fixed sl@0: * width, operations such as indexing operate on Unicode data. The String sl@0: * object is optimized for the case where each UTF char in a string is sl@0: * only one byte. In this case, we store the value of numChars, but we sl@0: * don't store the Unicode data (unless Tcl_GetUnicode is explicitly sl@0: * called). sl@0: * sl@0: * The String object type stores one or both formats. The default sl@0: * behavior is to store UTF. Once Unicode is calculated by a function, it sl@0: * is stored in the internal rep for future access (without an additional sl@0: * O(n) cost). sl@0: * sl@0: * To allow many appends to be done to an object without constantly sl@0: * reallocating the space for the string or Unicode representation, we sl@0: * allocate double the space for the string or Unicode and use the sl@0: * internal representation to keep track of how much space is used sl@0: * vs. allocated. sl@0: * sl@0: * Copyright (c) 1995-1997 Sun Microsystems, Inc. sl@0: * Copyright (c) 1999 by Scriptics Corporation. sl@0: * Portions Copyright (c) 2007-2008 Nokia Corporation and/or its subsidiaries. All rights reserved. sl@0: * sl@0: * See the file "license.terms" for information on usage and redistribution sl@0: * of this file, and for a DISCLAIMER OF ALL WARRANTIES. sl@0: * sl@0: * RCS: @(#) $Id: tclStringObj.c,v 1.32.2.2 2006/09/24 21:15:11 msofer Exp $ */ sl@0: sl@0: #include "tclInt.h" sl@0: sl@0: /* sl@0: * Prototypes for procedures defined later in this file: sl@0: */ sl@0: sl@0: static void AppendUnicodeToUnicodeRep _ANSI_ARGS_(( sl@0: Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode, sl@0: int appendNumChars)); sl@0: static void AppendUnicodeToUtfRep _ANSI_ARGS_(( sl@0: Tcl_Obj *objPtr, CONST Tcl_UniChar *unicode, sl@0: int numChars)); sl@0: static void AppendUtfToUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr, sl@0: CONST char *bytes, int numBytes)); sl@0: static void AppendUtfToUtfRep _ANSI_ARGS_((Tcl_Obj *objPtr, sl@0: CONST char *bytes, int numBytes)); sl@0: sl@0: static void FillUnicodeRep _ANSI_ARGS_((Tcl_Obj *objPtr)); sl@0: sl@0: static void FreeStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr)); sl@0: static void DupStringInternalRep _ANSI_ARGS_((Tcl_Obj *objPtr, sl@0: Tcl_Obj *copyPtr)); sl@0: static int SetStringFromAny _ANSI_ARGS_((Tcl_Interp *interp, sl@0: Tcl_Obj *objPtr)); sl@0: static void UpdateStringOfString _ANSI_ARGS_((Tcl_Obj *objPtr)); sl@0: sl@0: /* sl@0: * The structure below defines the string Tcl object type by means of sl@0: * procedures that can be invoked by generic object code. sl@0: */ sl@0: sl@0: Tcl_ObjType tclStringType = { sl@0: "string", /* name */ sl@0: FreeStringInternalRep, /* freeIntRepPro */ sl@0: DupStringInternalRep, /* dupIntRepProc */ sl@0: UpdateStringOfString, /* updateStringProc */ sl@0: SetStringFromAny /* setFromAnyProc */ sl@0: }; sl@0: sl@0: /* sl@0: * The following structure is the internal rep for a String object. sl@0: * It keeps track of how much memory has been used and how much has been sl@0: * allocated for the Unicode and UTF string to enable growing and sl@0: * shrinking of the UTF and Unicode reps of the String object with fewer sl@0: * mallocs. To optimize string length and indexing operations, this sl@0: * structure also stores the number of characters (same of UTF and Unicode!) sl@0: * once that value has been computed. sl@0: */ sl@0: sl@0: typedef struct String { sl@0: int numChars; /* The number of chars in the string. sl@0: * -1 means this value has not been sl@0: * calculated. >= 0 means that there is a sl@0: * valid Unicode rep, or that the number sl@0: * of UTF bytes == the number of chars. */ sl@0: size_t allocated; /* The amount of space actually allocated sl@0: * for the UTF string (minus 1 byte for sl@0: * the termination char). */ sl@0: size_t uallocated; /* The amount of space actually allocated sl@0: * for the Unicode string (minus 2 bytes for sl@0: * the termination char). */ sl@0: int hasUnicode; /* Boolean determining whether the string sl@0: * has a Unicode representation. */ sl@0: Tcl_UniChar unicode[2]; /* The array of Unicode chars. The actual sl@0: * size of this field depends on the sl@0: * 'uallocated' field above. */ sl@0: } String; sl@0: sl@0: #define STRING_UALLOC(numChars) \ sl@0: (numChars * sizeof(Tcl_UniChar)) sl@0: #define STRING_SIZE(ualloc) \ sl@0: ((unsigned) ((ualloc) \ sl@0: ? sizeof(String) - sizeof(Tcl_UniChar) + (ualloc) \ sl@0: : sizeof(String))) sl@0: #define GET_STRING(objPtr) \ sl@0: ((String *) (objPtr)->internalRep.otherValuePtr) sl@0: #define SET_STRING(objPtr, stringPtr) \ sl@0: (objPtr)->internalRep.otherValuePtr = (VOID *) (stringPtr) sl@0: sl@0: /* sl@0: * TCL STRING GROWTH ALGORITHM sl@0: * sl@0: * When growing strings (during an append, for example), the following growth sl@0: * algorithm is used: sl@0: * sl@0: * Attempt to allocate 2 * (originalLength + appendLength) sl@0: * On failure: sl@0: * attempt to allocate originalLength + 2*appendLength + sl@0: * TCL_GROWTH_MIN_ALLOC sl@0: * sl@0: * This algorithm allows very good performance, as it rapidly increases the sl@0: * memory allocated for a given string, which minimizes the number of sl@0: * reallocations that must be performed. However, using only the doubling sl@0: * algorithm can lead to a significant waste of memory. In particular, it sl@0: * may fail even when there is sufficient memory available to complete the sl@0: * append request (but there is not 2 * totalLength memory available). So when sl@0: * the doubling fails (because there is not enough memory available), the sl@0: * algorithm requests a smaller amount of memory, which is still enough to sl@0: * cover the request, but which hopefully will be less than the total available sl@0: * memory. sl@0: * sl@0: * The addition of TCL_GROWTH_MIN_ALLOC allows for efficient handling sl@0: * of very small appends. Without this extra slush factor, a sequence sl@0: * of several small appends would cause several memory allocations. sl@0: * As long as TCL_GROWTH_MIN_ALLOC is a reasonable size, we can sl@0: * avoid that behavior. sl@0: * sl@0: * The growth algorithm can be tuned by adjusting the following parameters: sl@0: * sl@0: * TCL_GROWTH_MIN_ALLOC Additional space, in bytes, to allocate when sl@0: * the double allocation has failed. sl@0: * Default is 1024 (1 kilobyte). sl@0: */ sl@0: #ifndef TCL_GROWTH_MIN_ALLOC sl@0: #define TCL_GROWTH_MIN_ALLOC 1024 sl@0: #endif sl@0: sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_NewStringObj -- sl@0: * sl@0: * This procedure is normally called when not debugging: i.e., when sl@0: * TCL_MEM_DEBUG is not defined. It creates a new string object and sl@0: * initializes it from the byte pointer and length arguments. sl@0: * sl@0: * When TCL_MEM_DEBUG is defined, this procedure just returns the sl@0: * result of calling the debugging version Tcl_DbNewStringObj. sl@0: * sl@0: * Results: sl@0: * A newly created string object is returned that has ref count zero. sl@0: * sl@0: * Side effects: sl@0: * The new object's internal string representation will be set to a sl@0: * copy of the length bytes starting at "bytes". If "length" is sl@0: * negative, use bytes up to the first NULL byte; i.e., assume "bytes" sl@0: * points to a C-style NULL-terminated string. The object's type is set sl@0: * to NULL. An extra NULL is added to the end of the new object's byte sl@0: * array. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: #ifdef TCL_MEM_DEBUG sl@0: #undef Tcl_NewStringObj sl@0: sl@0: EXPORT_C Tcl_Obj * sl@0: Tcl_NewStringObj(bytes, length) sl@0: CONST char *bytes; /* Points to the first of the length bytes sl@0: * used to initialize the new object. */ sl@0: int length; /* The number of bytes to copy from "bytes" sl@0: * when initializing the new object. If sl@0: * negative, use bytes up to the first sl@0: * NULL byte. */ sl@0: { sl@0: return Tcl_DbNewStringObj(bytes, length, "unknown", 0); sl@0: } sl@0: sl@0: #else /* if not TCL_MEM_DEBUG */ sl@0: sl@0: EXPORT_C Tcl_Obj * sl@0: Tcl_NewStringObj(bytes, length) sl@0: CONST char *bytes; /* Points to the first of the length bytes sl@0: * used to initialize the new object. */ sl@0: int length; /* The number of bytes to copy from "bytes" sl@0: * when initializing the new object. If sl@0: * negative, use bytes up to the first sl@0: * NULL byte. */ sl@0: { sl@0: register Tcl_Obj *objPtr; sl@0: sl@0: if (length < 0) { sl@0: length = (bytes? strlen(bytes) : 0); sl@0: } sl@0: TclNewObj(objPtr); sl@0: TclInitStringRep(objPtr, bytes, length); sl@0: return objPtr; sl@0: } sl@0: #endif /* TCL_MEM_DEBUG */ sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_DbNewStringObj -- sl@0: * sl@0: * This procedure is normally called when debugging: i.e., when sl@0: * TCL_MEM_DEBUG is defined. It creates new string objects. It is the sl@0: * same as the Tcl_NewStringObj procedure above except that it calls sl@0: * Tcl_DbCkalloc directly with the file name and line number from its sl@0: * caller. This simplifies debugging since then the [memory active] sl@0: * command will report the correct file name and line number when sl@0: * reporting objects that haven't been freed. sl@0: * sl@0: * When TCL_MEM_DEBUG is not defined, this procedure just returns the sl@0: * result of calling Tcl_NewStringObj. sl@0: * sl@0: * Results: sl@0: * A newly created string object is returned that has ref count zero. sl@0: * sl@0: * Side effects: sl@0: * The new object's internal string representation will be set to a sl@0: * copy of the length bytes starting at "bytes". If "length" is sl@0: * negative, use bytes up to the first NULL byte; i.e., assume "bytes" sl@0: * points to a C-style NULL-terminated string. The object's type is set sl@0: * to NULL. An extra NULL is added to the end of the new object's byte sl@0: * array. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: #ifdef TCL_MEM_DEBUG sl@0: sl@0: EXPORT_C Tcl_Obj * sl@0: Tcl_DbNewStringObj(bytes, length, file, line) sl@0: CONST char *bytes; /* Points to the first of the length bytes sl@0: * used to initialize the new object. */ sl@0: int length; /* The number of bytes to copy from "bytes" sl@0: * when initializing the new object. If sl@0: * negative, use bytes up to the first sl@0: * NULL byte. */ sl@0: CONST char *file; /* The name of the source file calling this sl@0: * procedure; used for debugging. */ sl@0: int line; /* Line number in the source file; used sl@0: * for debugging. */ sl@0: { sl@0: register Tcl_Obj *objPtr; sl@0: sl@0: if (length < 0) { sl@0: length = (bytes? strlen(bytes) : 0); sl@0: } sl@0: TclDbNewObj(objPtr, file, line); sl@0: TclInitStringRep(objPtr, bytes, length); sl@0: return objPtr; sl@0: } sl@0: sl@0: #else /* if not TCL_MEM_DEBUG */ sl@0: sl@0: EXPORT_C Tcl_Obj * sl@0: Tcl_DbNewStringObj(bytes, length, file, line) sl@0: CONST char *bytes; /* Points to the first of the length bytes sl@0: * used to initialize the new object. */ sl@0: register int length; /* The number of bytes to copy from "bytes" sl@0: * when initializing the new object. If sl@0: * negative, use bytes up to the first sl@0: * NULL byte. */ sl@0: CONST char *file; /* The name of the source file calling this sl@0: * procedure; used for debugging. */ sl@0: int line; /* Line number in the source file; used sl@0: * for debugging. */ sl@0: { sl@0: return Tcl_NewStringObj(bytes, length); sl@0: } sl@0: #endif /* TCL_MEM_DEBUG */ sl@0: sl@0: /* sl@0: *--------------------------------------------------------------------------- sl@0: * sl@0: * Tcl_NewUnicodeObj -- sl@0: * sl@0: * This procedure is creates a new String object and initializes sl@0: * it from the given Unicode String. If the Utf String is the same size sl@0: * as the Unicode string, don't duplicate the data. sl@0: * sl@0: * Results: sl@0: * The newly created object is returned. This object will have no sl@0: * initial string representation. The returned object has a ref count sl@0: * of 0. sl@0: * sl@0: * Side effects: sl@0: * Memory allocated for new object and copy of Unicode argument. sl@0: * sl@0: *--------------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C Tcl_Obj * sl@0: Tcl_NewUnicodeObj(unicode, numChars) sl@0: CONST Tcl_UniChar *unicode; /* The unicode string used to initialize sl@0: * the new object. */ sl@0: int numChars; /* Number of characters in the unicode sl@0: * string. */ sl@0: { sl@0: Tcl_Obj *objPtr; sl@0: String *stringPtr; sl@0: size_t uallocated; sl@0: sl@0: if (numChars < 0) { sl@0: numChars = 0; sl@0: if (unicode) { sl@0: while (unicode[numChars] != 0) { numChars++; } sl@0: } sl@0: } sl@0: uallocated = STRING_UALLOC(numChars); sl@0: sl@0: /* sl@0: * Create a new obj with an invalid string rep. sl@0: */ sl@0: sl@0: TclNewObj(objPtr); sl@0: Tcl_InvalidateStringRep(objPtr); sl@0: objPtr->typePtr = &tclStringType; sl@0: sl@0: stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); sl@0: stringPtr->numChars = numChars; sl@0: stringPtr->uallocated = uallocated; sl@0: stringPtr->hasUnicode = (numChars > 0); sl@0: stringPtr->allocated = 0; sl@0: memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, uallocated); sl@0: stringPtr->unicode[numChars] = 0; sl@0: SET_STRING(objPtr, stringPtr); sl@0: return objPtr; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_GetCharLength -- sl@0: * sl@0: * Get the length of the Unicode string from the Tcl object. sl@0: * sl@0: * Results: sl@0: * Pointer to unicode string representing the unicode object. sl@0: * sl@0: * Side effects: sl@0: * Frees old internal rep. Allocates memory for new "String" sl@0: * internal rep. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C int sl@0: Tcl_GetCharLength(objPtr) sl@0: Tcl_Obj *objPtr; /* The String object to get the num chars of. */ sl@0: { sl@0: String *stringPtr; sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: /* sl@0: * If numChars is unknown, then calculate the number of characaters sl@0: * while populating the Unicode string. sl@0: */ sl@0: sl@0: if (stringPtr->numChars == -1) { sl@0: register int i = objPtr->length; sl@0: register unsigned char *str = (unsigned char *) objPtr->bytes; sl@0: sl@0: /* sl@0: * This is a speed sensitive function, so run specially over the sl@0: * string to count continuous ascii characters before resorting sl@0: * to the Tcl_NumUtfChars call. This is a long form of: sl@0: stringPtr->numChars = Tcl_NumUtfChars(objPtr->bytes, objPtr->length); sl@0: */ sl@0: sl@0: while (i && (*str < 0xC0)) { i--; str++; } sl@0: stringPtr->numChars = objPtr->length - i; sl@0: if (i) { sl@0: stringPtr->numChars += Tcl_NumUtfChars(objPtr->bytes sl@0: + (objPtr->length - i), i); sl@0: } sl@0: sl@0: if (stringPtr->numChars == objPtr->length) { sl@0: sl@0: /* sl@0: * Since we've just calculated the number of chars, and all sl@0: * UTF chars are 1-byte long, we don't need to store the sl@0: * unicode string. sl@0: */ sl@0: sl@0: stringPtr->hasUnicode = 0; sl@0: sl@0: } else { sl@0: sl@0: /* sl@0: * Since we've just calucalated the number of chars, and not sl@0: * all UTF chars are 1-byte long, go ahead and populate the sl@0: * unicode string. sl@0: */ sl@0: sl@0: FillUnicodeRep(objPtr); sl@0: sl@0: /* sl@0: * We need to fetch the pointer again because we have just sl@0: * reallocated the structure to make room for the Unicode data. sl@0: */ sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: } sl@0: } sl@0: return stringPtr->numChars; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_GetUniChar -- sl@0: * sl@0: * Get the index'th Unicode character from the String object. The sl@0: * index is assumed to be in the appropriate range. sl@0: * sl@0: * Results: sl@0: * Returns the index'th Unicode character in the Object. sl@0: * sl@0: * Side effects: sl@0: * Fills unichar with the index'th Unicode character. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C Tcl_UniChar sl@0: Tcl_GetUniChar(objPtr, index) sl@0: Tcl_Obj *objPtr; /* The object to get the Unicode charater from. */ sl@0: int index; /* Get the index'th Unicode character. */ sl@0: { sl@0: Tcl_UniChar unichar; sl@0: String *stringPtr; sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: if (stringPtr->numChars == -1) { sl@0: sl@0: /* sl@0: * We haven't yet calculated the length, so we don't have the sl@0: * Unicode str. We need to know the number of chars before we sl@0: * can do indexing. sl@0: */ sl@0: sl@0: Tcl_GetCharLength(objPtr); sl@0: sl@0: /* sl@0: * We need to fetch the pointer again because we may have just sl@0: * reallocated the structure. sl@0: */ sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: } sl@0: if (stringPtr->hasUnicode == 0) { sl@0: sl@0: /* sl@0: * All of the characters in the Utf string are 1 byte chars, sl@0: * so we don't store the unicode char. We get the Utf string sl@0: * and convert the index'th byte to a Unicode character. sl@0: */ sl@0: sl@0: unichar = (Tcl_UniChar) objPtr->bytes[index]; sl@0: } else { sl@0: unichar = stringPtr->unicode[index]; sl@0: } sl@0: return unichar; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_GetUnicode -- sl@0: * sl@0: * Get the Unicode form of the String object. If sl@0: * the object is not already a String object, it will be converted sl@0: * to one. If the String object does not have a Unicode rep, then sl@0: * one is create from the UTF string format. sl@0: * sl@0: * Results: sl@0: * Returns a pointer to the object's internal Unicode string. sl@0: * sl@0: * Side effects: sl@0: * Converts the object to have the String internal rep. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C Tcl_UniChar * sl@0: Tcl_GetUnicode(objPtr) sl@0: Tcl_Obj *objPtr; /* The object to find the unicode string for. */ sl@0: { sl@0: String *stringPtr; sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { sl@0: sl@0: /* sl@0: * We haven't yet calculated the length, or all of the characters sl@0: * in the Utf string are 1 byte chars (so we didn't store the sl@0: * unicode str). Since this function must return a unicode string, sl@0: * and one has not yet been stored, force the Unicode to be sl@0: * calculated and stored now. sl@0: */ sl@0: sl@0: FillUnicodeRep(objPtr); sl@0: sl@0: /* sl@0: * We need to fetch the pointer again because we have just sl@0: * reallocated the structure to make room for the Unicode data. sl@0: */ sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: } sl@0: return stringPtr->unicode; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_GetUnicodeFromObj -- sl@0: * sl@0: * Get the Unicode form of the String object with length. If sl@0: * the object is not already a String object, it will be converted sl@0: * to one. If the String object does not have a Unicode rep, then sl@0: * one is create from the UTF string format. sl@0: * sl@0: * Results: sl@0: * Returns a pointer to the object's internal Unicode string. sl@0: * sl@0: * Side effects: sl@0: * Converts the object to have the String internal rep. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C Tcl_UniChar * sl@0: Tcl_GetUnicodeFromObj(objPtr, lengthPtr) sl@0: Tcl_Obj *objPtr; /* The object to find the unicode string for. */ sl@0: int *lengthPtr; /* If non-NULL, the location where the sl@0: * string rep's unichar length should be sl@0: * stored. If NULL, no length is stored. */ sl@0: { sl@0: String *stringPtr; sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: if ((stringPtr->numChars == -1) || (stringPtr->hasUnicode == 0)) { sl@0: sl@0: /* sl@0: * We haven't yet calculated the length, or all of the characters sl@0: * in the Utf string are 1 byte chars (so we didn't store the sl@0: * unicode str). Since this function must return a unicode string, sl@0: * and one has not yet been stored, force the Unicode to be sl@0: * calculated and stored now. sl@0: */ sl@0: sl@0: FillUnicodeRep(objPtr); sl@0: sl@0: /* sl@0: * We need to fetch the pointer again because we have just sl@0: * reallocated the structure to make room for the Unicode data. sl@0: */ sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: } sl@0: sl@0: if (lengthPtr != NULL) { sl@0: *lengthPtr = stringPtr->numChars; sl@0: } sl@0: return stringPtr->unicode; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_GetRange -- sl@0: * sl@0: * Create a Tcl Object that contains the chars between first and last sl@0: * of the object indicated by "objPtr". If the object is not already sl@0: * a String object, convert it to one. The first and last indices sl@0: * are assumed to be in the appropriate range. sl@0: * sl@0: * Results: sl@0: * Returns a new Tcl Object of the String type. sl@0: * sl@0: * Side effects: sl@0: * Changes the internal rep of "objPtr" to the String type. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C Tcl_Obj * sl@0: Tcl_GetRange(objPtr, first, last) sl@0: Tcl_Obj *objPtr; /* The Tcl object to find the range of. */ sl@0: int first; /* First index of the range. */ sl@0: int last; /* Last index of the range. */ sl@0: { sl@0: Tcl_Obj *newObjPtr; /* The Tcl object to find the range of. */ sl@0: String *stringPtr; sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: if (stringPtr->numChars == -1) { sl@0: sl@0: /* sl@0: * We haven't yet calculated the length, so we don't have the sl@0: * Unicode str. We need to know the number of chars before we sl@0: * can do indexing. sl@0: */ sl@0: sl@0: Tcl_GetCharLength(objPtr); sl@0: sl@0: /* sl@0: * We need to fetch the pointer again because we may have just sl@0: * reallocated the structure. sl@0: */ sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: } sl@0: sl@0: if (objPtr->bytes && stringPtr->numChars == objPtr->length) { sl@0: char *str = Tcl_GetString(objPtr); sl@0: sl@0: /* sl@0: * All of the characters in the Utf string are 1 byte chars, sl@0: * so we don't store the unicode char. Create a new string sl@0: * object containing the specified range of chars. sl@0: */ sl@0: sl@0: newObjPtr = Tcl_NewStringObj(&str[first], last-first+1); sl@0: sl@0: /* sl@0: * Since we know the new string only has 1-byte chars, we sl@0: * can set it's numChars field. sl@0: */ sl@0: sl@0: SetStringFromAny(NULL, newObjPtr); sl@0: stringPtr = GET_STRING(newObjPtr); sl@0: stringPtr->numChars = last-first+1; sl@0: } else { sl@0: newObjPtr = Tcl_NewUnicodeObj(stringPtr->unicode + first, sl@0: last-first+1); sl@0: } sl@0: return newObjPtr; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_SetStringObj -- sl@0: * sl@0: * Modify an object to hold a string that is a copy of the bytes sl@0: * indicated by the byte pointer and length arguments. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * The object's string representation will be set to a copy of sl@0: * the "length" bytes starting at "bytes". If "length" is negative, use sl@0: * bytes up to the first NULL byte; i.e., assume "bytes" points to a sl@0: * C-style NULL-terminated string. The object's old string and internal sl@0: * representations are freed and the object's type is set NULL. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_SetStringObj(objPtr, bytes, length) sl@0: register Tcl_Obj *objPtr; /* Object whose internal rep to init. */ sl@0: CONST char *bytes; /* Points to the first of the length bytes sl@0: * used to initialize the object. */ sl@0: register int length; /* The number of bytes to copy from "bytes" sl@0: * when initializing the object. If sl@0: * negative, use bytes up to the first sl@0: * NULL byte.*/ sl@0: { sl@0: register Tcl_ObjType *oldTypePtr = objPtr->typePtr; sl@0: sl@0: /* sl@0: * Free any old string rep, then set the string rep to a copy of sl@0: * the length bytes starting at "bytes". sl@0: */ sl@0: sl@0: if (Tcl_IsShared(objPtr)) { sl@0: panic("Tcl_SetStringObj called with shared object"); sl@0: } sl@0: sl@0: /* sl@0: * Set the type to NULL and free any internal rep for the old type. sl@0: */ sl@0: sl@0: if ((oldTypePtr != NULL) && (oldTypePtr->freeIntRepProc != NULL)) { sl@0: oldTypePtr->freeIntRepProc(objPtr); sl@0: } sl@0: objPtr->typePtr = NULL; sl@0: sl@0: Tcl_InvalidateStringRep(objPtr); sl@0: if (length < 0) { sl@0: length = (bytes? strlen(bytes) : 0); sl@0: } sl@0: TclInitStringRep(objPtr, bytes, length); sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_SetObjLength -- sl@0: * sl@0: * This procedure changes the length of the string representation sl@0: * of an object. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * If the size of objPtr's string representation is greater than sl@0: * length, then it is reduced to length and a new terminating null sl@0: * byte is stored in the strength. If the length of the string sl@0: * representation is greater than length, the storage space is sl@0: * reallocated to the given length; a null byte is stored at the sl@0: * end, but other bytes past the end of the original string sl@0: * representation are undefined. The object's internal sl@0: * representation is changed to "expendable string". sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_SetObjLength(objPtr, length) sl@0: register Tcl_Obj *objPtr; /* Pointer to object. This object must sl@0: * not currently be shared. */ sl@0: register int length; /* Number of bytes desired for string sl@0: * representation of object, not including sl@0: * terminating null byte. */ sl@0: { sl@0: String *stringPtr; sl@0: sl@0: if (Tcl_IsShared(objPtr)) { sl@0: panic("Tcl_SetObjLength called with shared object"); sl@0: } sl@0: SetStringFromAny(NULL, objPtr); sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: /* Check that we're not extending a pure unicode string */ sl@0: sl@0: if (length > (int) stringPtr->allocated && sl@0: (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { sl@0: char *new; sl@0: sl@0: /* sl@0: * Not enough space in current string. Reallocate the string sl@0: * space and free the old string. sl@0: */ sl@0: if (objPtr->bytes != tclEmptyStringRep && objPtr->bytes != NULL) { sl@0: new = (char *) ckrealloc((char *)objPtr->bytes, sl@0: (unsigned)(length+1)); sl@0: } else { sl@0: new = (char *) ckalloc((unsigned) (length+1)); sl@0: if (objPtr->bytes != NULL && objPtr->length != 0) { sl@0: memcpy((VOID *) new, (VOID *) objPtr->bytes, sl@0: (size_t) objPtr->length); sl@0: Tcl_InvalidateStringRep(objPtr); sl@0: } sl@0: } sl@0: objPtr->bytes = new; sl@0: stringPtr->allocated = length; sl@0: /* Invalidate the unicode data. */ sl@0: stringPtr->hasUnicode = 0; sl@0: } sl@0: sl@0: if (objPtr->bytes != NULL) { sl@0: objPtr->length = length; sl@0: if (objPtr->bytes != tclEmptyStringRep) { sl@0: /* Ensure the string is NULL-terminated */ sl@0: objPtr->bytes[length] = 0; sl@0: } sl@0: /* Invalidate the unicode data. */ sl@0: stringPtr->numChars = -1; sl@0: stringPtr->hasUnicode = 0; sl@0: } else { sl@0: /* Changing length of pure unicode string */ sl@0: size_t uallocated = STRING_UALLOC(length); sl@0: if (uallocated > stringPtr->uallocated) { sl@0: stringPtr = (String *) ckrealloc((char*) stringPtr, sl@0: STRING_SIZE(uallocated)); sl@0: SET_STRING(objPtr, stringPtr); sl@0: stringPtr->uallocated = uallocated; sl@0: } sl@0: stringPtr->numChars = length; sl@0: stringPtr->hasUnicode = (length > 0); sl@0: /* Ensure the string is NULL-terminated */ sl@0: stringPtr->unicode[length] = 0; sl@0: stringPtr->allocated = 0; sl@0: objPtr->length = 0; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_AttemptSetObjLength -- sl@0: * sl@0: * This procedure changes the length of the string representation sl@0: * of an object. It uses the attempt* (non-panic'ing) memory allocators. sl@0: * sl@0: * Results: sl@0: * 1 if the requested memory was allocated, 0 otherwise. sl@0: * sl@0: * Side effects: sl@0: * If the size of objPtr's string representation is greater than sl@0: * length, then it is reduced to length and a new terminating null sl@0: * byte is stored in the strength. If the length of the string sl@0: * representation is greater than length, the storage space is sl@0: * reallocated to the given length; a null byte is stored at the sl@0: * end, but other bytes past the end of the original string sl@0: * representation are undefined. The object's internal sl@0: * representation is changed to "expendable string". sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C int sl@0: Tcl_AttemptSetObjLength(objPtr, length) sl@0: register Tcl_Obj *objPtr; /* Pointer to object. This object must sl@0: * not currently be shared. */ sl@0: register int length; /* Number of bytes desired for string sl@0: * representation of object, not including sl@0: * terminating null byte. */ sl@0: { sl@0: String *stringPtr; sl@0: sl@0: if (Tcl_IsShared(objPtr)) { sl@0: panic("Tcl_AttemptSetObjLength called with shared object"); sl@0: } sl@0: SetStringFromAny(NULL, objPtr); sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: /* Check that we're not extending a pure unicode string */ sl@0: sl@0: if (length > (int) stringPtr->allocated && sl@0: (objPtr->bytes != NULL || stringPtr->hasUnicode == 0)) { sl@0: char *new; sl@0: sl@0: /* sl@0: * Not enough space in current string. Reallocate the string sl@0: * space and free the old string. sl@0: */ sl@0: if (objPtr->bytes != tclEmptyStringRep && objPtr->bytes != NULL) { sl@0: new = (char *) attemptckrealloc((char *)objPtr->bytes, sl@0: (unsigned)(length+1)); sl@0: if (new == NULL) { sl@0: return 0; sl@0: } sl@0: } else { sl@0: new = (char *) attemptckalloc((unsigned) (length+1)); sl@0: if (new == NULL) { sl@0: return 0; sl@0: } sl@0: if (objPtr->bytes != NULL && objPtr->length != 0) { sl@0: memcpy((VOID *) new, (VOID *) objPtr->bytes, sl@0: (size_t) objPtr->length); sl@0: Tcl_InvalidateStringRep(objPtr); sl@0: } sl@0: } sl@0: objPtr->bytes = new; sl@0: stringPtr->allocated = length; sl@0: /* Invalidate the unicode data. */ sl@0: stringPtr->hasUnicode = 0; sl@0: } sl@0: sl@0: if (objPtr->bytes != NULL) { sl@0: objPtr->length = length; sl@0: if (objPtr->bytes != tclEmptyStringRep) { sl@0: /* Ensure the string is NULL-terminated */ sl@0: objPtr->bytes[length] = 0; sl@0: } sl@0: /* Invalidate the unicode data. */ sl@0: stringPtr->numChars = -1; sl@0: stringPtr->hasUnicode = 0; sl@0: } else { sl@0: /* Changing length of pure unicode string */ sl@0: size_t uallocated = STRING_UALLOC(length); sl@0: if (uallocated > stringPtr->uallocated) { sl@0: stringPtr = (String *) attemptckrealloc((char*) stringPtr, sl@0: STRING_SIZE(uallocated)); sl@0: if (stringPtr == NULL) { sl@0: return 0; sl@0: } sl@0: SET_STRING(objPtr, stringPtr); sl@0: stringPtr->uallocated = uallocated; sl@0: } sl@0: stringPtr->numChars = length; sl@0: stringPtr->hasUnicode = (length > 0); sl@0: /* Ensure the string is NULL-terminated */ sl@0: stringPtr->unicode[length] = 0; sl@0: stringPtr->allocated = 0; sl@0: objPtr->length = 0; sl@0: } sl@0: return 1; sl@0: } sl@0: sl@0: /* sl@0: *--------------------------------------------------------------------------- sl@0: * sl@0: * TclSetUnicodeObj -- sl@0: * sl@0: * Modify an object to hold the Unicode string indicated by "unicode". sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * Memory allocated for new "String" internal rep. sl@0: * sl@0: *--------------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_SetUnicodeObj(objPtr, unicode, numChars) sl@0: Tcl_Obj *objPtr; /* The object to set the string of. */ sl@0: CONST Tcl_UniChar *unicode; /* The unicode string used to initialize sl@0: * the object. */ sl@0: int numChars; /* Number of characters in the unicode sl@0: * string. */ sl@0: { sl@0: Tcl_ObjType *typePtr; sl@0: String *stringPtr; sl@0: size_t uallocated; sl@0: sl@0: if (numChars < 0) { sl@0: numChars = 0; sl@0: if (unicode) { sl@0: while (unicode[numChars] != 0) { numChars++; } sl@0: } sl@0: } sl@0: uallocated = STRING_UALLOC(numChars); sl@0: sl@0: /* sl@0: * Free the internal rep if one exists, and invalidate the string rep. sl@0: */ sl@0: sl@0: typePtr = objPtr->typePtr; sl@0: if ((typePtr != NULL) && (typePtr->freeIntRepProc) != NULL) { sl@0: (*typePtr->freeIntRepProc)(objPtr); sl@0: } sl@0: objPtr->typePtr = &tclStringType; sl@0: sl@0: /* sl@0: * Allocate enough space for the String structure + Unicode string. sl@0: */ sl@0: sl@0: stringPtr = (String *) ckalloc(STRING_SIZE(uallocated)); sl@0: stringPtr->numChars = numChars; sl@0: stringPtr->uallocated = uallocated; sl@0: stringPtr->hasUnicode = (numChars > 0); sl@0: stringPtr->allocated = 0; sl@0: memcpy((VOID *) stringPtr->unicode, (VOID *) unicode, uallocated); sl@0: stringPtr->unicode[numChars] = 0; sl@0: SET_STRING(objPtr, stringPtr); sl@0: Tcl_InvalidateStringRep(objPtr); sl@0: return; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_AppendToObj -- sl@0: * sl@0: * This procedure appends a sequence of bytes to an object. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * The bytes at *bytes are appended to the string representation sl@0: * of objPtr. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_AppendToObj(objPtr, bytes, length) sl@0: register Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: CONST char *bytes; /* Points to the bytes to append to the sl@0: * object. */ sl@0: register int length; /* The number of bytes to append from sl@0: * "bytes". If < 0, then append all bytes sl@0: * up to NULL byte. */ sl@0: { sl@0: String *stringPtr; sl@0: sl@0: if (Tcl_IsShared(objPtr)) { sl@0: panic("Tcl_AppendToObj called with shared object"); sl@0: } sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: sl@0: if (length < 0) { sl@0: length = (bytes ? strlen(bytes) : 0); sl@0: } sl@0: if (length == 0) { sl@0: return; sl@0: } sl@0: sl@0: /* sl@0: * If objPtr has a valid Unicode rep, then append the Unicode sl@0: * conversion of "bytes" to the objPtr's Unicode rep, otherwise sl@0: * append "bytes" to objPtr's string rep. sl@0: */ sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: if (stringPtr->hasUnicode != 0) { sl@0: AppendUtfToUnicodeRep(objPtr, bytes, length); sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: } else { sl@0: AppendUtfToUtfRep(objPtr, bytes, length); sl@0: } sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_AppendUnicodeToObj -- sl@0: * sl@0: * This procedure appends a Unicode string to an object in the sl@0: * most efficient manner possible. Length must be >= 0. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * Invalidates the string rep and creates a new Unicode string. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_AppendUnicodeToObj(objPtr, unicode, length) sl@0: register Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: CONST Tcl_UniChar *unicode; /* The unicode string to append to the sl@0: * object. */ sl@0: int length; /* Number of chars in "unicode". */ sl@0: { sl@0: String *stringPtr; sl@0: sl@0: if (Tcl_IsShared(objPtr)) { sl@0: panic("Tcl_AppendUnicodeToObj called with shared object"); sl@0: } sl@0: sl@0: if (length == 0) { sl@0: return; sl@0: } sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: /* sl@0: * If objPtr has a valid Unicode rep, then append the "unicode" sl@0: * to the objPtr's Unicode rep, otherwise the UTF conversion of sl@0: * "unicode" to objPtr's string rep. sl@0: */ sl@0: sl@0: if (stringPtr->hasUnicode != 0) { sl@0: AppendUnicodeToUnicodeRep(objPtr, unicode, length); sl@0: } else { sl@0: AppendUnicodeToUtfRep(objPtr, unicode, length); sl@0: } sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_AppendObjToObj -- sl@0: * sl@0: * This procedure appends the string rep of one object to another. sl@0: * "objPtr" cannot be a shared object. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * The string rep of appendObjPtr is appended to the string sl@0: * representation of objPtr. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_AppendObjToObj(objPtr, appendObjPtr) sl@0: Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: Tcl_Obj *appendObjPtr; /* Object to append. */ sl@0: { sl@0: String *stringPtr; sl@0: int length, numChars, allOneByteChars; sl@0: char *bytes; sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: sl@0: /* sl@0: * If objPtr has a valid Unicode rep, then get a Unicode string sl@0: * from appendObjPtr and append it. sl@0: */ sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: if (stringPtr->hasUnicode != 0) { sl@0: sl@0: /* sl@0: * If appendObjPtr is not of the "String" type, don't convert it. sl@0: */ sl@0: sl@0: if (appendObjPtr->typePtr == &tclStringType) { sl@0: stringPtr = GET_STRING(appendObjPtr); sl@0: if ((stringPtr->numChars == -1) sl@0: || (stringPtr->hasUnicode == 0)) { sl@0: sl@0: /* sl@0: * If appendObjPtr is a string obj with no valid Unicode sl@0: * rep, then fill its unicode rep. sl@0: */ sl@0: sl@0: FillUnicodeRep(appendObjPtr); sl@0: stringPtr = GET_STRING(appendObjPtr); sl@0: } sl@0: AppendUnicodeToUnicodeRep(objPtr, stringPtr->unicode, sl@0: stringPtr->numChars); sl@0: } else { sl@0: bytes = Tcl_GetStringFromObj(appendObjPtr, &length); sl@0: AppendUtfToUnicodeRep(objPtr, bytes, length); sl@0: } sl@0: return; sl@0: } sl@0: sl@0: /* sl@0: * Append to objPtr's UTF string rep. If we know the number of sl@0: * characters in both objects before appending, then set the combined sl@0: * number of characters in the final (appended-to) object. sl@0: */ sl@0: sl@0: bytes = Tcl_GetStringFromObj(appendObjPtr, &length); sl@0: sl@0: allOneByteChars = 0; sl@0: numChars = stringPtr->numChars; sl@0: if ((numChars >= 0) && (appendObjPtr->typePtr == &tclStringType)) { sl@0: stringPtr = GET_STRING(appendObjPtr); sl@0: if ((stringPtr->numChars >= 0) && (stringPtr->numChars == length)) { sl@0: numChars += stringPtr->numChars; sl@0: allOneByteChars = 1; sl@0: } sl@0: } sl@0: sl@0: AppendUtfToUtfRep(objPtr, bytes, length); sl@0: sl@0: if (allOneByteChars) { sl@0: stringPtr = GET_STRING(objPtr); sl@0: stringPtr->numChars = numChars; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * AppendUnicodeToUnicodeRep -- sl@0: * sl@0: * This procedure appends the contents of "unicode" to the Unicode sl@0: * rep of "objPtr". objPtr must already have a valid Unicode rep. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * objPtr's internal rep is reallocated. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: AppendUnicodeToUnicodeRep(objPtr, unicode, appendNumChars) sl@0: Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: CONST Tcl_UniChar *unicode; /* String to append. */ sl@0: int appendNumChars; /* Number of chars of "unicode" to append. */ sl@0: { sl@0: String *stringPtr, *tmpString; sl@0: size_t numChars; sl@0: sl@0: if (appendNumChars < 0) { sl@0: appendNumChars = 0; sl@0: if (unicode) { sl@0: while (unicode[appendNumChars] != 0) { appendNumChars++; } sl@0: } sl@0: } sl@0: if (appendNumChars == 0) { sl@0: return; sl@0: } sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: stringPtr = GET_STRING(objPtr); sl@0: sl@0: /* sl@0: * If not enough space has been allocated for the unicode rep, sl@0: * reallocate the internal rep object with additional space. First sl@0: * try to double the required allocation; if that fails, try a more sl@0: * modest increase. See the "TCL STRING GROWTH ALGORITHM" comment at sl@0: * the top of this file for an explanation of this growth algorithm. sl@0: */ sl@0: sl@0: numChars = stringPtr->numChars + appendNumChars; sl@0: sl@0: if (STRING_UALLOC(numChars) >= stringPtr->uallocated) { sl@0: stringPtr->uallocated = STRING_UALLOC(2 * numChars); sl@0: tmpString = (String *) attemptckrealloc((char *)stringPtr, sl@0: STRING_SIZE(stringPtr->uallocated)); sl@0: if (tmpString == NULL) { sl@0: stringPtr->uallocated = sl@0: STRING_UALLOC(numChars + appendNumChars) sl@0: + TCL_GROWTH_MIN_ALLOC; sl@0: tmpString = (String *) ckrealloc((char *)stringPtr, sl@0: STRING_SIZE(stringPtr->uallocated)); sl@0: } sl@0: stringPtr = tmpString; sl@0: SET_STRING(objPtr, stringPtr); sl@0: } sl@0: sl@0: /* sl@0: * Copy the new string onto the end of the old string, then add the sl@0: * trailing null. sl@0: */ sl@0: sl@0: memcpy((VOID*) (stringPtr->unicode + stringPtr->numChars), unicode, sl@0: appendNumChars * sizeof(Tcl_UniChar)); sl@0: stringPtr->unicode[numChars] = 0; sl@0: stringPtr->numChars = numChars; sl@0: sl@0: Tcl_InvalidateStringRep(objPtr); sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * AppendUnicodeToUtfRep -- sl@0: * sl@0: * This procedure converts the contents of "unicode" to UTF and sl@0: * appends the UTF to the string rep of "objPtr". sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * objPtr's internal rep is reallocated. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: AppendUnicodeToUtfRep(objPtr, unicode, numChars) sl@0: Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: CONST Tcl_UniChar *unicode; /* String to convert to UTF. */ sl@0: int numChars; /* Number of chars of "unicode" to convert. */ sl@0: { sl@0: Tcl_DString dsPtr; sl@0: CONST char *bytes; sl@0: sl@0: if (numChars < 0) { sl@0: numChars = 0; sl@0: if (unicode) { sl@0: while (unicode[numChars] != 0) { numChars++; } sl@0: } sl@0: } sl@0: if (numChars == 0) { sl@0: return; sl@0: } sl@0: sl@0: Tcl_DStringInit(&dsPtr); sl@0: bytes = Tcl_UniCharToUtfDString(unicode, numChars, &dsPtr); sl@0: AppendUtfToUtfRep(objPtr, bytes, Tcl_DStringLength(&dsPtr)); sl@0: Tcl_DStringFree(&dsPtr); sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * AppendUtfToUnicodeRep -- sl@0: * sl@0: * This procedure converts the contents of "bytes" to Unicode and sl@0: * appends the Unicode to the Unicode rep of "objPtr". objPtr must sl@0: * already have a valid Unicode rep. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * objPtr's internal rep is reallocated. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: AppendUtfToUnicodeRep(objPtr, bytes, numBytes) sl@0: Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: CONST char *bytes; /* String to convert to Unicode. */ sl@0: int numBytes; /* Number of bytes of "bytes" to convert. */ sl@0: { sl@0: Tcl_DString dsPtr; sl@0: int numChars; sl@0: Tcl_UniChar *unicode; sl@0: sl@0: if (numBytes < 0) { sl@0: numBytes = (bytes ? strlen(bytes) : 0); sl@0: } sl@0: if (numBytes == 0) { sl@0: return; sl@0: } sl@0: sl@0: Tcl_DStringInit(&dsPtr); sl@0: numChars = Tcl_NumUtfChars(bytes, numBytes); sl@0: unicode = (Tcl_UniChar *)Tcl_UtfToUniCharDString(bytes, numBytes, &dsPtr); sl@0: AppendUnicodeToUnicodeRep(objPtr, unicode, numChars); sl@0: Tcl_DStringFree(&dsPtr); sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * AppendUtfToUtfRep -- sl@0: * sl@0: * This procedure appends "numBytes" bytes of "bytes" to the UTF string sl@0: * rep of "objPtr". objPtr must already have a valid String rep. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * objPtr's internal rep is reallocated. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: AppendUtfToUtfRep(objPtr, bytes, numBytes) sl@0: Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: CONST char *bytes; /* String to append. */ sl@0: int numBytes; /* Number of bytes of "bytes" to append. */ sl@0: { sl@0: String *stringPtr; sl@0: int newLength, oldLength; sl@0: sl@0: if (numBytes < 0) { sl@0: numBytes = (bytes ? strlen(bytes) : 0); sl@0: } sl@0: if (numBytes == 0) { sl@0: return; sl@0: } sl@0: sl@0: /* sl@0: * Copy the new string onto the end of the old string, then add the sl@0: * trailing null. sl@0: */ sl@0: sl@0: oldLength = objPtr->length; sl@0: newLength = numBytes + oldLength; sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: if (newLength > (int) stringPtr->allocated) { sl@0: sl@0: /* sl@0: * There isn't currently enough space in the string representation sl@0: * so allocate additional space. First, try to double the length sl@0: * required. If that fails, try a more modest allocation. See the sl@0: * "TCL STRING GROWTH ALGORITHM" comment at the top of this file for an sl@0: * explanation of this growth algorithm. sl@0: */ sl@0: sl@0: if (Tcl_AttemptSetObjLength(objPtr, 2 * newLength) == 0) { sl@0: Tcl_SetObjLength(objPtr, sl@0: newLength + numBytes + TCL_GROWTH_MIN_ALLOC); sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Invalidate the unicode data. sl@0: */ sl@0: sl@0: stringPtr->numChars = -1; sl@0: stringPtr->hasUnicode = 0; sl@0: sl@0: memcpy((VOID *) (objPtr->bytes + oldLength), (VOID *) bytes, sl@0: (size_t) numBytes); sl@0: objPtr->bytes[newLength] = 0; sl@0: objPtr->length = newLength; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_AppendStringsToObjVA -- sl@0: * sl@0: * This procedure appends one or more null-terminated strings sl@0: * to an object. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * The contents of all the string arguments are appended to the sl@0: * string representation of objPtr. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_AppendStringsToObjVA (objPtr, argList) sl@0: Tcl_Obj *objPtr; /* Points to the object to append to. */ sl@0: va_list argList; /* Variable argument list. */ sl@0: { sl@0: #define STATIC_LIST_SIZE 16 sl@0: String *stringPtr; sl@0: int newLength, oldLength, attemptLength; sl@0: register char *string, *dst; sl@0: char *static_list[STATIC_LIST_SIZE]; sl@0: char **args = static_list; sl@0: int nargs_space = STATIC_LIST_SIZE; sl@0: int nargs, i; sl@0: sl@0: if (Tcl_IsShared(objPtr)) { sl@0: panic("Tcl_AppendStringsToObj called with shared object"); sl@0: } sl@0: sl@0: SetStringFromAny(NULL, objPtr); sl@0: sl@0: /* sl@0: * Figure out how much space is needed for all the strings, and sl@0: * expand the string representation if it isn't big enough. If no sl@0: * bytes would be appended, just return. Note that on some platforms sl@0: * (notably OS/390) the argList is an array so we need to use memcpy. sl@0: */ sl@0: sl@0: nargs = 0; sl@0: newLength = 0; sl@0: oldLength = objPtr->length; sl@0: while (1) { sl@0: string = va_arg(argList, char *); sl@0: if (string == NULL) { sl@0: break; sl@0: } sl@0: if (nargs >= nargs_space) { sl@0: /* sl@0: * Expand the args buffer sl@0: */ sl@0: nargs_space += STATIC_LIST_SIZE; sl@0: if (args == static_list) { sl@0: args = (void *)ckalloc(nargs_space * sizeof(char *)); sl@0: for (i = 0; i < nargs; ++i) { sl@0: args[i] = static_list[i]; sl@0: } sl@0: } else { sl@0: args = (void *)ckrealloc((void *)args, sl@0: nargs_space * sizeof(char *)); sl@0: } sl@0: } sl@0: newLength += strlen(string); sl@0: args[nargs++] = string; sl@0: } sl@0: if (newLength == 0) { sl@0: goto done; sl@0: } sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: if (oldLength + newLength > (int) stringPtr->allocated) { sl@0: sl@0: /* sl@0: * There isn't currently enough space in the string sl@0: * representation, so allocate additional space. If the current sl@0: * string representation isn't empty (i.e. it looks like we're sl@0: * doing a series of appends) then try to allocate extra space to sl@0: * accomodate future growth: first try to double the required memory; sl@0: * if that fails, try a more modest allocation. See the "TCL STRING sl@0: * GROWTH ALGORITHM" comment at the top of this file for an explanation sl@0: * of this growth algorithm. Otherwise, if the current string sl@0: * representation is empty, exactly enough memory is allocated. sl@0: */ sl@0: sl@0: if (oldLength == 0) { sl@0: Tcl_SetObjLength(objPtr, newLength); sl@0: } else { sl@0: attemptLength = 2 * (oldLength + newLength); sl@0: if (Tcl_AttemptSetObjLength(objPtr, attemptLength) == 0) { sl@0: attemptLength = oldLength + (2 * newLength) + sl@0: TCL_GROWTH_MIN_ALLOC; sl@0: Tcl_SetObjLength(objPtr, attemptLength); sl@0: } sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Make a second pass through the arguments, appending all the sl@0: * strings to the object. sl@0: */ sl@0: sl@0: dst = objPtr->bytes + oldLength; sl@0: for (i = 0; i < nargs; ++i) { sl@0: string = args[i]; sl@0: if (string == NULL) { sl@0: break; sl@0: } sl@0: while (*string != 0) { sl@0: *dst = *string; sl@0: dst++; sl@0: string++; sl@0: } sl@0: } sl@0: sl@0: /* sl@0: * Add a null byte to terminate the string. However, be careful: sl@0: * it's possible that the object is totally empty (if it was empty sl@0: * originally and there was nothing to append). In this case dst is sl@0: * NULL; just leave everything alone. sl@0: */ sl@0: sl@0: if (dst != NULL) { sl@0: *dst = 0; sl@0: } sl@0: objPtr->length = oldLength + newLength; sl@0: sl@0: done: sl@0: /* sl@0: * If we had to allocate a buffer from the heap, sl@0: * free it now. sl@0: */ sl@0: sl@0: if (args != static_list) { sl@0: ckfree((void *)args); sl@0: } sl@0: #undef STATIC_LIST_SIZE sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * Tcl_AppendStringsToObj -- sl@0: * sl@0: * This procedure appends one or more null-terminated strings sl@0: * to an object. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * The contents of all the string arguments are appended to the sl@0: * string representation of objPtr. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: EXPORT_C void sl@0: Tcl_AppendStringsToObj TCL_VARARGS_DEF(Tcl_Obj *,arg1) sl@0: { sl@0: register Tcl_Obj *objPtr; sl@0: va_list argList; sl@0: sl@0: objPtr = TCL_VARARGS_START(Tcl_Obj *,arg1,argList); sl@0: Tcl_AppendStringsToObjVA(objPtr, argList); sl@0: va_end(argList); sl@0: } sl@0: sl@0: /* sl@0: *--------------------------------------------------------------------------- sl@0: * sl@0: * FillUnicodeRep -- sl@0: * sl@0: * Populate the Unicode internal rep with the Unicode form of its string sl@0: * rep. The object must alread have a "String" internal rep. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * Reallocates the String internal rep. sl@0: * sl@0: *--------------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: FillUnicodeRep(objPtr) sl@0: Tcl_Obj *objPtr; /* The object in which to fill the unicode rep. */ sl@0: { sl@0: String *stringPtr; sl@0: size_t uallocated; sl@0: char *src, *srcEnd; sl@0: Tcl_UniChar *dst; sl@0: src = objPtr->bytes; sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: if (stringPtr->numChars == -1) { sl@0: stringPtr->numChars = Tcl_NumUtfChars(src, objPtr->length); sl@0: } sl@0: stringPtr->hasUnicode = (stringPtr->numChars > 0); sl@0: sl@0: uallocated = STRING_UALLOC(stringPtr->numChars); sl@0: if (uallocated > stringPtr->uallocated) { sl@0: sl@0: /* sl@0: * If not enough space has been allocated for the unicode rep, sl@0: * reallocate the internal rep object. sl@0: */ sl@0: sl@0: /* sl@0: * There isn't currently enough space in the Unicode sl@0: * representation so allocate additional space. If the current sl@0: * Unicode representation isn't empty (i.e. it looks like we've sl@0: * done some appends) then overallocate the space so sl@0: * that we won't have to do as much reallocation in the future. sl@0: */ sl@0: sl@0: if (stringPtr->uallocated > 0) { sl@0: uallocated *= 2; sl@0: } sl@0: stringPtr = (String *) ckrealloc((char*) stringPtr, sl@0: STRING_SIZE(uallocated)); sl@0: stringPtr->uallocated = uallocated; sl@0: } sl@0: sl@0: /* sl@0: * Convert src to Unicode and store the coverted data in "unicode". sl@0: */ sl@0: sl@0: srcEnd = src + objPtr->length; sl@0: for (dst = stringPtr->unicode; src < srcEnd; dst++) { sl@0: src += TclUtfToUniChar(src, dst); sl@0: } sl@0: *dst = 0; sl@0: sl@0: SET_STRING(objPtr, stringPtr); sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * DupStringInternalRep -- sl@0: * sl@0: * Initialize the internal representation of a new Tcl_Obj to a sl@0: * copy of the internal representation of an existing string object. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * copyPtr's internal rep is set to a copy of srcPtr's internal sl@0: * representation. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: DupStringInternalRep(srcPtr, copyPtr) sl@0: register Tcl_Obj *srcPtr; /* Object with internal rep to copy. Must sl@0: * have an internal rep of type "String". */ sl@0: register Tcl_Obj *copyPtr; /* Object with internal rep to set. Must sl@0: * not currently have an internal rep.*/ sl@0: { sl@0: String *srcStringPtr = GET_STRING(srcPtr); sl@0: String *copyStringPtr = NULL; sl@0: sl@0: /* sl@0: * If the src obj is a string of 1-byte Utf chars, then copy the sl@0: * string rep of the source object and create an "empty" Unicode sl@0: * internal rep for the new object. Otherwise, copy Unicode sl@0: * internal rep, and invalidate the string rep of the new object. sl@0: */ sl@0: sl@0: if (srcStringPtr->hasUnicode == 0) { sl@0: copyStringPtr = (String *) ckalloc(STRING_SIZE(STRING_UALLOC(0))); sl@0: copyStringPtr->uallocated = STRING_UALLOC(0); sl@0: } else { sl@0: copyStringPtr = (String *) ckalloc( sl@0: STRING_SIZE(srcStringPtr->uallocated)); sl@0: copyStringPtr->uallocated = srcStringPtr->uallocated; sl@0: sl@0: memcpy((VOID *) copyStringPtr->unicode, sl@0: (VOID *) srcStringPtr->unicode, sl@0: (size_t) srcStringPtr->numChars * sizeof(Tcl_UniChar)); sl@0: copyStringPtr->unicode[srcStringPtr->numChars] = 0; sl@0: } sl@0: copyStringPtr->numChars = srcStringPtr->numChars; sl@0: copyStringPtr->hasUnicode = srcStringPtr->hasUnicode; sl@0: copyStringPtr->allocated = srcStringPtr->allocated; sl@0: sl@0: /* sl@0: * Tricky point: the string value was copied by generic object sl@0: * management code, so it doesn't contain any extra bytes that sl@0: * might exist in the source object. sl@0: */ sl@0: sl@0: copyStringPtr->allocated = copyPtr->length; sl@0: sl@0: SET_STRING(copyPtr, copyStringPtr); sl@0: copyPtr->typePtr = &tclStringType; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * SetStringFromAny -- sl@0: * sl@0: * Create an internal representation of type "String" for an object. sl@0: * sl@0: * Results: sl@0: * This operation always succeeds and returns TCL_OK. sl@0: * sl@0: * Side effects: sl@0: * Any old internal reputation for objPtr is freed and the sl@0: * internal representation is set to "String". sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static int sl@0: SetStringFromAny(interp, objPtr) sl@0: Tcl_Interp *interp; /* Used for error reporting if not NULL. */ sl@0: register Tcl_Obj *objPtr; /* The object to convert. */ sl@0: { sl@0: /* sl@0: * The Unicode object is optimized for the case where each UTF char sl@0: * in a string is only one byte. In this case, we store the value of sl@0: * numChars, but we don't copy the bytes to the unicodeObj->unicode. sl@0: */ sl@0: sl@0: if (objPtr->typePtr != &tclStringType) { sl@0: String *stringPtr; sl@0: sl@0: if (objPtr->typePtr != NULL) { sl@0: if (objPtr->bytes == NULL) { sl@0: objPtr->typePtr->updateStringProc(objPtr); sl@0: } sl@0: if ((objPtr->typePtr->freeIntRepProc) != NULL) { sl@0: (*objPtr->typePtr->freeIntRepProc)(objPtr); sl@0: } sl@0: } sl@0: objPtr->typePtr = &tclStringType; sl@0: sl@0: /* sl@0: * Allocate enough space for the basic String structure. sl@0: */ sl@0: sl@0: stringPtr = (String *) ckalloc(STRING_SIZE(STRING_UALLOC(0))); sl@0: stringPtr->numChars = -1; sl@0: stringPtr->uallocated = STRING_UALLOC(0); sl@0: stringPtr->hasUnicode = 0; sl@0: sl@0: if (objPtr->bytes != NULL) { sl@0: stringPtr->allocated = objPtr->length; sl@0: objPtr->bytes[objPtr->length] = 0; sl@0: } else { sl@0: objPtr->length = 0; sl@0: } sl@0: SET_STRING(objPtr, stringPtr); sl@0: } sl@0: return TCL_OK; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * UpdateStringOfString -- sl@0: * sl@0: * Update the string representation for an object whose internal sl@0: * representation is "String". sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * The object's string may be set by converting its Unicode sl@0: * represention to UTF format. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: UpdateStringOfString(objPtr) sl@0: Tcl_Obj *objPtr; /* Object with string rep to update. */ sl@0: { sl@0: int i, size; sl@0: Tcl_UniChar *unicode; sl@0: char dummy[TCL_UTF_MAX]; sl@0: char *dst; sl@0: String *stringPtr; sl@0: sl@0: stringPtr = GET_STRING(objPtr); sl@0: if ((objPtr->bytes == NULL) || (stringPtr->allocated == 0)) { sl@0: sl@0: if (stringPtr->numChars <= 0) { sl@0: sl@0: /* sl@0: * If there is no Unicode rep, or the string has 0 chars, sl@0: * then set the string rep to an empty string. sl@0: */ sl@0: sl@0: objPtr->bytes = tclEmptyStringRep; sl@0: objPtr->length = 0; sl@0: return; sl@0: } sl@0: sl@0: unicode = stringPtr->unicode; sl@0: sl@0: /* sl@0: * Translate the Unicode string to UTF. "size" will hold the sl@0: * amount of space the UTF string needs. sl@0: */ sl@0: sl@0: size = 0; sl@0: for (i = 0; i < stringPtr->numChars; i++) { sl@0: size += Tcl_UniCharToUtf((int) unicode[i], dummy); sl@0: } sl@0: sl@0: dst = (char *) ckalloc((unsigned) (size + 1)); sl@0: objPtr->bytes = dst; sl@0: objPtr->length = size; sl@0: stringPtr->allocated = size; sl@0: sl@0: for (i = 0; i < stringPtr->numChars; i++) { sl@0: dst += Tcl_UniCharToUtf(unicode[i], dst); sl@0: } sl@0: *dst = '\0'; sl@0: } sl@0: return; sl@0: } sl@0: sl@0: /* sl@0: *---------------------------------------------------------------------- sl@0: * sl@0: * FreeStringInternalRep -- sl@0: * sl@0: * Deallocate the storage associated with a String data object's sl@0: * internal representation. sl@0: * sl@0: * Results: sl@0: * None. sl@0: * sl@0: * Side effects: sl@0: * Frees memory. sl@0: * sl@0: *---------------------------------------------------------------------- sl@0: */ sl@0: sl@0: static void sl@0: FreeStringInternalRep(objPtr) sl@0: Tcl_Obj *objPtr; /* Object with internal rep to free. */ sl@0: { sl@0: ckfree((char *) GET_STRING(objPtr)); sl@0: }