sl@0: /* sl@0: ********************************************************************** sl@0: * Copyright (C) 1999-2002, International Business Machines sl@0: * Corporation and others. All Rights Reserved. sl@0: ********************************************************************** sl@0: * sl@0: * File USC_IMPL.H sl@0: * sl@0: * Modification History: sl@0: * sl@0: * Date Name Description sl@0: * 07/08/2002 Eric Mader Creation. sl@0: ****************************************************************************** sl@0: */ sl@0: sl@0: #ifndef USC_IMPL_H sl@0: #define USC_IMPL_H sl@0: #include "unicode/utypes.h" sl@0: #include "unicode/uscript.h" sl@0: sl@0: /** sl@0: * UScriptRun is used to find runs of characters in sl@0: * the same script. It implements a simple iterator over an array sl@0: * of characters. The iterator will resolve script-neutral characters sl@0: * like punctuation into the script of the surrounding characters. sl@0: * sl@0: * The iterator will try to match paired punctuation. If it sees an sl@0: * opening punctuation character, it will remember the script that sl@0: * was assigned to that character, and assign the same script to the sl@0: * matching closing punctuation. sl@0: * sl@0: * Scripts are chosen based on the UScriptCode enumeration. sl@0: * No attempt is made to combine related scripts into a single run. In sl@0: * particular, Hiragana, Katakana, and Han characters will appear in seperate sl@0: * runs. sl@0: sl@0: * Here is an example of how to iterate over script runs: sl@0: *
sl@0:  * \code
sl@0:  * void printScriptRuns(const UChar *text, int32_t length)
sl@0:  * {
sl@0:  *     UErrorCode error = U_ZERO_ERROR;
sl@0:  *     UScriptRun *scriptRun = uscript_openRun(text, testLength, &error);
sl@0:  *     int32_t start = 0, limit = 0;
sl@0:  *     UScriptCode code = USCRIPT_INVALID_CODE;
sl@0:  *
sl@0:  *     while (uscript_nextRun(&start, &limit, &code)) {
sl@0:  *         printf("Script '%s' from %d to %d.\n", uscript_getName(code), start, limit);
sl@0:  *     }
sl@0:  *
sl@0:  *     uscript_closeRun(scriptRun);
sl@0:  *  }
sl@0:  * 
sl@0: * sl@0: * @draft ICU 2.2 sl@0: */ sl@0: struct UScriptRun; sl@0: sl@0: typedef struct UScriptRun UScriptRun; sl@0: sl@0: /** sl@0: * Create a UScriptRun object for iterating over the given text. This object must sl@0: * be freed using uscript_closeRun(). Note that this object does not copy the source text, sl@0: * only the pointer to it. You must make sure that the pointer remains valid until you call sl@0: * uscript_closeRun() or uscript_setRunText(). sl@0: * sl@0: * @param src is the address of the array of characters over which to iterate. sl@0: * if src == NULL and length == 0, sl@0: * an empty UScriptRun object will be returned. sl@0: * sl@0: * @param length is the number of characters over which to iterate. sl@0: * sl@0: * @param pErrorCode is a pointer to a valid UErrorCode value. If this value sl@0: * indicates a failure on entry, the function will immediately return. sl@0: * On exit the value will indicate the success of the operation. sl@0: * sl@0: * @return the address of UScriptRun object which will iterate over the text, sl@0: * or NULL if the operation failed. sl@0: * sl@0: * @draft ICU 2.2 sl@0: */ sl@0: U_CAPI UScriptRun * U_EXPORT2 sl@0: uscript_openRun(const UChar *src, int32_t length, UErrorCode *pErrorCode); sl@0: sl@0: /** sl@0: * Frees the given UScriptRun object and any storage associated with it. sl@0: * On return, scriptRun no longer points to a valid UScriptRun object. sl@0: * sl@0: * @param scriptRun is the UScriptRun object which will be freed. sl@0: * sl@0: * @draft ICU 2.2 sl@0: */ sl@0: U_CAPI void U_EXPORT2 sl@0: uscript_closeRun(UScriptRun *scriptRun); sl@0: sl@0: /** sl@0: * Reset the UScriptRun object so that it will start iterating from sl@0: * the beginning. sl@0: * sl@0: * @param scriptRun is the address of the UScriptRun object to be reset. sl@0: * sl@0: * @draft ICU 2.2 sl@0: */ sl@0: U_CAPI void U_EXPORT2 sl@0: uscript_resetRun(UScriptRun *scriptRun); sl@0: sl@0: /** sl@0: * Change the text over which the given UScriptRun object iterates. sl@0: * sl@0: * @param scriptRun is the UScriptRun object which will be changed. sl@0: * sl@0: * @param src is the address of the new array of characters over which to iterate. sl@0: * If src == NULL and length == 0, sl@0: * the UScriptRun object will become empty. sl@0: * sl@0: * @param length is the new number of characters over which to iterate sl@0: * sl@0: * @param pErrorCode is a pointer to a valid UErrorCode value. If this value sl@0: * indicates a failure on entry, the function will immediately return. sl@0: * On exit the value will indicate the success of the operation. sl@0: * sl@0: * @draft ICU 2.2 sl@0: */ sl@0: U_CAPI void U_EXPORT2 sl@0: uscript_setRunText(UScriptRun *scriptRun, const UChar *src, int32_t length, UErrorCode *pErrorCode); sl@0: sl@0: /** sl@0: * Advance the UScriptRun object to the next script run, return the start and limit sl@0: * offsets, and the script of the run. sl@0: * sl@0: * @param scriptRun is the address of the UScriptRun object. sl@0: * sl@0: * @param pRunStart is a pointer to the variable to receive the starting offset of the next run. sl@0: * This pointer can be NULL if the value is not needed. sl@0: * sl@0: * @param pRunLimit is a pointer to the variable to receive the limit offset of the next run. sl@0: * This pointer can be NULL if the value is not needed. sl@0: * sl@0: * @param pRunScript is a pointer to the variable to receive the UScriptCode for the sl@0: * script of the current run. This pointer can be NULL if the value is not needed. sl@0: * sl@0: * @return true if there was another script run. sl@0: * sl@0: * @draft ICU 2.2 sl@0: */ sl@0: U_CAPI UBool U_EXPORT2 sl@0: uscript_nextRun(UScriptRun *scriptRun, int32_t *pRunStart, int32_t *pRunLimit, UScriptCode *pRunScript); sl@0: sl@0: #endif