sl@0: /* sl@0: ******************************************************************************* sl@0: * sl@0: * Copyright (C) 2003, International Business Machines sl@0: * Corporation and others. All Rights Reserved. sl@0: * sl@0: ******************************************************************************* sl@0: * file name: unorm_it.h sl@0: * encoding: US-ASCII sl@0: * tab size: 8 (not used) sl@0: * indentation:4 sl@0: * sl@0: * created on: 2003jan21 sl@0: * created by: Markus W. Scherer sl@0: */ sl@0: sl@0: #ifndef __UNORM_IT_H__ sl@0: #define __UNORM_IT_H__ sl@0: sl@0: #include "unicode/utypes.h" sl@0: sl@0: #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_NORMALIZATION sl@0: sl@0: #include "unicode/uiter.h" sl@0: #include "unicode/unorm.h" sl@0: sl@0: /** sl@0: * Normalizing UCharIterator wrapper. sl@0: * This internal API basically duplicates the functionality of the C++ Normalizer sl@0: * but sl@0: * - it actually implements a character iterator (UCharIterator) sl@0: * with few restrictions (see unorm_setIter()) sl@0: * - it supports UCharIterator getState()/setState() sl@0: * - it uses lower-level APIs and buffers more text and states, sl@0: * hopefully resulting in higher performance sl@0: * sl@0: * Usage example: sl@0: * \code sl@0: * function(UCharIterator *srcIter) { sl@0: * UNormIterator *uni; sl@0: * UCharIterator *iter; sl@0: * UErrorCode errorCode; sl@0: * sl@0: * errorCode=U_ZERO_ERROR; sl@0: * uni=unorm_openIter(&errorCode); sl@0: * if(U_FAILURE(errorCode)) { sl@0: * // report error sl@0: * return; sl@0: * } sl@0: * sl@0: * iter=unorm_setIter(uni, srcIter, UNORM_FCD, &errorCode); sl@0: * if(U_FAILURE(errorCode)) { sl@0: * // report error sl@0: * } else { sl@0: * // use iter to iterate over the canonically ordered sl@0: * // version of srcIter's text sl@0: * uint32_t state; sl@0: * sl@0: * ... sl@0: * sl@0: * state=uiter_getState(iter); sl@0: * if(state!=UITER_NO_STATE) { sl@0: * // use valid state, store it, use iter some more sl@0: * ... sl@0: * sl@0: * // later restore iter to the saved state: sl@0: * uiter_setState(iter, state, &errorCode); sl@0: * sl@0: * ... sl@0: * } sl@0: * sl@0: * ... sl@0: * } sl@0: * unorm_closeIter(uni); sl@0: * } sl@0: * \endcode sl@0: * sl@0: * See also the ICU test suites. sl@0: * sl@0: * @internal sl@0: */ sl@0: struct UNormIterator; sl@0: typedef struct UNormIterator UNormIterator; sl@0: sl@0: /** sl@0: * Size of a stack buffer to hold a UNormIterator, see the stackMem parameter sl@0: * of unorm_openIter(). sl@0: * sl@0: * @internal sl@0: */ sl@0: #define UNORM_ITER_SIZE 1024 sl@0: sl@0: /** sl@0: * Open a normalizing iterator. Must be closed later. sl@0: * Use unorm_setIter(). sl@0: * sl@0: * @param stackMem Pointer to preallocated (stack-allocated) buffer to hold sl@0: * the UNormIterator if possible; can be NULL. sl@0: * @param stackMemSize Number of bytes at stackMem; can be 0, sl@0: * or should be >= UNORM_ITER_SIZE for a non-NULL stackMem. sl@0: * @param pErrorCode ICU error code sl@0: * @return an allocated and pre-initialized UNormIterator sl@0: * @internal sl@0: */ sl@0: U_CAPI UNormIterator * U_EXPORT2 sl@0: unorm_openIter(void *stackMem, int32_t stackMemSize, UErrorCode *pErrorCode); sl@0: sl@0: /** sl@0: * Close a normalizing iterator. sl@0: * sl@0: * @param uni UNormIterator from unorm_openIter() sl@0: * @internal sl@0: */ sl@0: U_CAPI void U_EXPORT2 sl@0: unorm_closeIter(UNormIterator *uni); sl@0: sl@0: /** sl@0: * Set a UCharIterator and a normalization mode for the normalizing iterator sl@0: * to wrap. The normalizing iterator will read from the character iterator, sl@0: * normalize the text, and in turn deliver it with its own wrapper UCharIterator sl@0: * interface which it returns. sl@0: * sl@0: * The source iterator remains at its current position through the unorm_setIter() sl@0: * call but will be used and moved as soon as the sl@0: * the returned normalizing iterator is. sl@0: * sl@0: * The returned interface pointer is valid for as long as the normalizing iterator sl@0: * is open and until another unorm_setIter() call is made on it. sl@0: * sl@0: * The normalizing iterator's UCharIterator interface has the following properties: sl@0: * - getIndex() and move() will almost always return UITER_UNKNOWN_INDEX sl@0: * - getState() will return UITER_NO_STATE for unknown states for positions sl@0: * that are not at normalization boundaries sl@0: * sl@0: * @param uni UNormIterator from unorm_openIter() sl@0: * @param iter The source text UCharIterator to be wrapped. It is aliases into the normalizing iterator. sl@0: * Must support getState() and setState(). sl@0: * @param mode The normalization mode. sl@0: * @param pErrorCode ICU error code sl@0: * @return an alias to the normalizing iterator's UCharIterator interface sl@0: * @internal sl@0: */ sl@0: U_CAPI UCharIterator * U_EXPORT2 sl@0: unorm_setIter(UNormIterator *uni, UCharIterator *iter, UNormalizationMode mode, UErrorCode *pErrorCode); sl@0: sl@0: #endif /* uconfig.h switches */ sl@0: sl@0: #endif