os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/normlzr.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2  ********************************************************************
     3  * COPYRIGHT:
     4  * Copyright (c) 1996-2005, International Business Machines Corporation and
     5  * others. All Rights Reserved.
     6  ********************************************************************
     7  */
     8 
     9 #ifndef NORMLZR_H
    10 #define NORMLZR_H
    11 
    12 #include "unicode/utypes.h"
    13 
    14 /**
    15  * \file 
    16  * \brief C++ API: Unicode Normalization
    17  */
    18  
    19 #if !UCONFIG_NO_NORMALIZATION
    20 
    21 #include "unicode/uobject.h"
    22 #include "unicode/unistr.h"
    23 #include "unicode/chariter.h"
    24 #include "unicode/unorm.h"
    25 
    26 
    27 struct UCharIterator;
    28 typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
    29 
    30 U_NAMESPACE_BEGIN
    31 /**
    32  *
    33  * The Normalizer class consists of two parts:
    34  * - static functions that normalize strings or test if strings are normalized
    35  * - a Normalizer object is an iterator that takes any kind of text and
    36  *   provides iteration over its normalized form
    37  *
    38  * The Normalizer class is not suitable for subclassing.
    39  *
    40  * The static functions are basically wrappers around the C implementation,
    41  * using UnicodeString instead of UChar*.
    42  * For basic information about normalization forms and details about the C API
    43  * please see the documentation in unorm.h.
    44  *
    45  * The iterator API with the Normalizer constructors and the non-static functions
    46  * uses a CharacterIterator as input. It is possible to pass a string which
    47  * is then internally wrapped in a CharacterIterator.
    48  * The input text is not normalized all at once, but incrementally where needed
    49  * (providing efficient random access).
    50  * This allows to pass in a large text but spend only a small amount of time
    51  * normalizing a small part of that text.
    52  * However, if the entire text is normalized, then the iterator will be
    53  * slower than normalizing the entire text at once and iterating over the result.
    54  * A possible use of the Normalizer iterator is also to report an index into the
    55  * original text that is close to where the normalized characters come from.
    56  *
    57  * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
    58  * The earlier implementation reported the getIndex() inconsistently,
    59  * and previous() could not be used after setIndex(), next(), first(), and current().
    60  *
    61  * Normalizer allows to start normalizing from anywhere in the input text by
    62  * calling setIndexOnly(), first(), or last().
    63  * Without calling any of these, the iterator will start at the beginning of the text.
    64  *
    65  * At any time, next() returns the next normalized code point (UChar32),
    66  * with post-increment semantics (like CharacterIterator::next32PostInc()).
    67  * previous() returns the previous normalized code point (UChar32),
    68  * with pre-decrement semantics (like CharacterIterator::previous32()).
    69  *
    70  * current() returns the current code point
    71  * (respectively the one at the newly set index) without moving
    72  * the getIndex(). Note that if the text at the current position
    73  * needs to be normalized, then these functions will do that.
    74  * (This is why current() is not const.)
    75  * It is more efficient to call setIndexOnly() instead, which does not
    76  * normalize.
    77  *
    78  * getIndex() always refers to the position in the input text where the normalized
    79  * code points are returned from. It does not always change with each returned
    80  * code point.
    81  * The code point that is returned from any of the functions
    82  * corresponds to text at or after getIndex(), according to the
    83  * function's iteration semantics (post-increment or pre-decrement).
    84  *
    85  * next() returns a code point from at or after the getIndex()
    86  * from before the next() call. After the next() call, the getIndex()
    87  * might have moved to where the next code point will be returned from
    88  * (from a next() or current() call).
    89  * This is semantically equivalent to array access with array[index++]
    90  * (post-increment semantics).
    91  *
    92  * previous() returns a code point from at or after the getIndex()
    93  * from after the previous() call.
    94  * This is semantically equivalent to array access with array[--index]
    95  * (pre-decrement semantics).
    96  *
    97  * Internally, the Normalizer iterator normalizes a small piece of text
    98  * starting at the getIndex() and ending at a following "safe" index.
    99  * The normalized results is stored in an internal string buffer, and
   100  * the code points are iterated from there.
   101  * With multiple iteration calls, this is repeated until the next piece
   102  * of text needs to be normalized, and the getIndex() needs to be moved.
   103  *
   104  * The following "safe" index, the internal buffer, and the secondary
   105  * iteration index into that buffer are not exposed on the API.
   106  * This also means that it is currently not practical to return to
   107  * a particular, arbitrary position in the text because one would need to
   108  * know, and be able to set, in addition to the getIndex(), at least also the
   109  * current index into the internal buffer.
   110  * It is currently only possible to observe when getIndex() changes
   111  * (with careful consideration of the iteration semantics),
   112  * at which time the internal index will be 0.
   113  * For example, if getIndex() is different after next() than before it,
   114  * then the internal index is 0 and one can return to this getIndex()
   115  * later with setIndexOnly().
   116  *
   117  * @author Laura Werner, Mark Davis, Markus Scherer
   118  * @stable ICU 2.0
   119  */
   120 class U_COMMON_API Normalizer : public UObject {
   121 public:
   122   /**
   123    * If DONE is returned from an iteration function that returns a code point,
   124    * then there are no more normalization results available.
   125    * @stable ICU 2.0
   126    */
   127   enum {
   128       DONE=0xffff
   129   };
   130 
   131   // Constructors
   132 
   133   /**
   134    * Creates a new <code>Normalizer</code> object for iterating over the
   135    * normalized form of a given string.
   136    * <p>
   137    * @param str   The string to be normalized.  The normalization
   138    *              will start at the beginning of the string.
   139    *
   140    * @param mode  The normalization mode.
   141    * @stable ICU 2.0
   142    */
   143   Normalizer(const UnicodeString& str, UNormalizationMode mode);
   144 
   145   /**
   146    * Creates a new <code>Normalizer</code> object for iterating over the
   147    * normalized form of a given string.
   148    * <p>
   149    * @param str   The string to be normalized.  The normalization
   150    *              will start at the beginning of the string.
   151    *
   152    * @param length Length of the string, or -1 if NUL-terminated.
   153    * @param mode  The normalization mode.
   154    * @stable ICU 2.0
   155    */
   156   Normalizer(const UChar* str, int32_t length, UNormalizationMode mode);
   157 
   158   /**
   159    * Creates a new <code>Normalizer</code> object for iterating over the
   160    * normalized form of the given text.
   161    * <p>
   162    * @param iter  The input text to be normalized.  The normalization
   163    *              will start at the beginning of the string.
   164    *
   165    * @param mode  The normalization mode.
   166    * @stable ICU 2.0
   167    */
   168   Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
   169 
   170   /**
   171    * Copy constructor.
   172    * @param copy The object to be copied.
   173    * @stable ICU 2.0
   174    */
   175   Normalizer(const Normalizer& copy);
   176 
   177   /**
   178    * Destructor
   179    * @stable ICU 2.0
   180    */
   181   virtual ~Normalizer();
   182 
   183 
   184   //-------------------------------------------------------------------------
   185   // Static utility methods
   186   //-------------------------------------------------------------------------
   187 
   188   /**
   189    * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
   190    * This is a wrapper for unorm_normalize(), using UnicodeString's.
   191    *
   192    * The <code>options</code> parameter specifies which optional
   193    * <code>Normalizer</code> features are to be enabled for this operation.
   194    *
   195    * @param source    the input string to be normalized.
   196    * @param mode      the normalization mode
   197    * @param options   the optional features to be enabled (0 for no options)
   198    * @param result    The normalized string (on output).
   199    * @param status    The error code.
   200    * @stable ICU 2.0
   201    */
   202   static void U_EXPORT2 normalize(const UnicodeString& source,
   203                         UNormalizationMode mode, int32_t options,
   204                         UnicodeString& result,
   205                         UErrorCode &status);
   206 
   207   /**
   208    * Compose a <code>UnicodeString</code>.
   209    * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
   210    * This is a wrapper for unorm_normalize(), using UnicodeString's.
   211    *
   212    * The <code>options</code> parameter specifies which optional
   213    * <code>Normalizer</code> features are to be enabled for this operation.
   214    *
   215    * @param source    the string to be composed.
   216    * @param compat    Perform compatibility decomposition before composition.
   217    *                  If this argument is <code>FALSE</code>, only canonical
   218    *                  decomposition will be performed.
   219    * @param options   the optional features to be enabled (0 for no options)
   220    * @param result    The composed string (on output).
   221    * @param status    The error code.
   222    * @stable ICU 2.0
   223    */
   224   static void U_EXPORT2 compose(const UnicodeString& source,
   225                       UBool compat, int32_t options,
   226                       UnicodeString& result,
   227                       UErrorCode &status);
   228 
   229   /**
   230    * Static method to decompose a <code>UnicodeString</code>.
   231    * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
   232    * This is a wrapper for unorm_normalize(), using UnicodeString's.
   233    *
   234    * The <code>options</code> parameter specifies which optional
   235    * <code>Normalizer</code> features are to be enabled for this operation.
   236    *
   237    * @param source    the string to be decomposed.
   238    * @param compat    Perform compatibility decomposition.
   239    *                  If this argument is <code>FALSE</code>, only canonical
   240    *                  decomposition will be performed.
   241    * @param options   the optional features to be enabled (0 for no options)
   242    * @param result    The decomposed string (on output).
   243    * @param status    The error code.
   244    * @stable ICU 2.0
   245    */
   246   static void U_EXPORT2 decompose(const UnicodeString& source,
   247                         UBool compat, int32_t options,
   248                         UnicodeString& result,
   249                         UErrorCode &status);
   250 
   251   /**
   252    * Performing quick check on a string, to quickly determine if the string is
   253    * in a particular normalization format.
   254    * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
   255    *
   256    * Three types of result can be returned UNORM_YES, UNORM_NO or
   257    * UNORM_MAYBE. Result UNORM_YES indicates that the argument
   258    * string is in the desired normalized format, UNORM_NO determines that
   259    * argument string is not in the desired normalized format. A
   260    * UNORM_MAYBE result indicates that a more thorough check is required,
   261    * the user may have to put the string in its normalized form and compare the
   262    * results.
   263    * @param source       string for determining if it is in a normalized format
   264    * @param mode         normalization format
   265    * @param status A reference to a UErrorCode to receive any errors
   266    * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
   267    *
   268    * @see isNormalized
   269    * @stable ICU 2.0
   270    */
   271   static inline UNormalizationCheckResult
   272   quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
   273 
   274   /**
   275    * Performing quick check on a string; same as the other version of quickCheck
   276    * but takes an extra options parameter like most normalization functions.
   277    *
   278    * @param source       string for determining if it is in a normalized format
   279    * @param mode         normalization format
   280    * @param options      the optional features to be enabled (0 for no options)
   281    * @param status A reference to a UErrorCode to receive any errors
   282    * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
   283    *
   284    * @see isNormalized
   285    * @stable ICU 2.6
   286    */
   287   static inline UNormalizationCheckResult
   288   quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
   289 
   290   /**
   291    * Test if a string is in a given normalization form.
   292    * This is semantically equivalent to source.equals(normalize(source, mode)) .
   293    *
   294    * Unlike unorm_quickCheck(), this function returns a definitive result,
   295    * never a "maybe".
   296    * For NFD, NFKD, and FCD, both functions work exactly the same.
   297    * For NFC and NFKC where quickCheck may return "maybe", this function will
   298    * perform further tests to arrive at a TRUE/FALSE result.
   299    *
   300    * @param src        String that is to be tested if it is in a normalization format.
   301    * @param mode       Which normalization form to test for.
   302    * @param errorCode  ICU error code in/out parameter.
   303    *                   Must fulfill U_SUCCESS before the function call.
   304    * @return Boolean value indicating whether the source string is in the
   305    *         "mode" normalization form.
   306    *
   307    * @see quickCheck
   308    * @stable ICU 2.2
   309    */
   310   static inline UBool
   311   isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
   312 
   313   /**
   314    * Test if a string is in a given normalization form; same as the other version of isNormalized
   315    * but takes an extra options parameter like most normalization functions.
   316    *
   317    * @param src        String that is to be tested if it is in a normalization format.
   318    * @param mode       Which normalization form to test for.
   319    * @param options      the optional features to be enabled (0 for no options)
   320    * @param errorCode  ICU error code in/out parameter.
   321    *                   Must fulfill U_SUCCESS before the function call.
   322    * @return Boolean value indicating whether the source string is in the
   323    *         "mode" normalization form.
   324    *
   325    * @see quickCheck
   326    * @stable ICU 2.6
   327    */
   328   static inline UBool
   329   isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
   330 
   331   /**
   332    * Concatenate normalized strings, making sure that the result is normalized as well.
   333    *
   334    * If both the left and the right strings are in
   335    * the normalization form according to "mode/options",
   336    * then the result will be
   337    *
   338    * \code
   339    *     dest=normalize(left+right, mode, options)
   340    * \endcode
   341    *
   342    * For details see unorm_concatenate in unorm.h.
   343    *
   344    * @param left Left source string.
   345    * @param right Right source string.
   346    * @param result The output string.
   347    * @param mode The normalization mode.
   348    * @param options A bit set of normalization options.
   349    * @param errorCode ICU error code in/out parameter.
   350    *                   Must fulfill U_SUCCESS before the function call.
   351    * @return result
   352    *
   353    * @see unorm_concatenate
   354    * @see normalize
   355    * @see unorm_next
   356    * @see unorm_previous
   357    *
   358    * @stable ICU 2.1
   359    */
   360   static UnicodeString &
   361   U_EXPORT2 concatenate(UnicodeString &left, UnicodeString &right,
   362               UnicodeString &result,
   363               UNormalizationMode mode, int32_t options,
   364               UErrorCode &errorCode);
   365 
   366   /**
   367    * Compare two strings for canonical equivalence.
   368    * Further options include case-insensitive comparison and
   369    * code point order (as opposed to code unit order).
   370    *
   371    * Canonical equivalence between two strings is defined as their normalized
   372    * forms (NFD or NFC) being identical.
   373    * This function compares strings incrementally instead of normalizing
   374    * (and optionally case-folding) both strings entirely,
   375    * improving performance significantly.
   376    *
   377    * Bulk normalization is only necessary if the strings do not fulfill the FCD
   378    * conditions. Only in this case, and only if the strings are relatively long,
   379    * is memory allocated temporarily.
   380    * For FCD strings and short non-FCD strings there is no memory allocation.
   381    *
   382    * Semantically, this is equivalent to
   383    *   strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
   384    * where code point order and foldCase are all optional.
   385    *
   386    * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
   387    * the case folding must be performed first, then the normalization.
   388    *
   389    * @param s1 First source string.
   390    * @param s2 Second source string.
   391    *
   392    * @param options A bit set of options:
   393    *   - U_FOLD_CASE_DEFAULT or 0 is used for default options:
   394    *     Case-sensitive comparison in code unit order, and the input strings
   395    *     are quick-checked for FCD.
   396    *
   397    *   - UNORM_INPUT_IS_FCD
   398    *     Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
   399    *     If not set, the function will quickCheck for FCD
   400    *     and normalize if necessary.
   401    *
   402    *   - U_COMPARE_CODE_POINT_ORDER
   403    *     Set to choose code point order instead of code unit order
   404    *     (see u_strCompare for details).
   405    *
   406    *   - U_COMPARE_IGNORE_CASE
   407    *     Set to compare strings case-insensitively using case folding,
   408    *     instead of case-sensitively.
   409    *     If set, then the following case folding options are used.
   410    *
   411    *   - Options as used with case-insensitive comparisons, currently:
   412    *
   413    *   - U_FOLD_CASE_EXCLUDE_SPECIAL_I
   414    *    (see u_strCaseCompare for details)
   415    *
   416    *   - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
   417    *
   418    * @param errorCode ICU error code in/out parameter.
   419    *                  Must fulfill U_SUCCESS before the function call.
   420    * @return <0 or 0 or >0 as usual for string comparisons
   421    *
   422    * @see unorm_compare
   423    * @see normalize
   424    * @see UNORM_FCD
   425    * @see u_strCompare
   426    * @see u_strCaseCompare
   427    *
   428    * @stable ICU 2.2
   429    */
   430   static inline int32_t
   431   compare(const UnicodeString &s1, const UnicodeString &s2,
   432           uint32_t options,
   433           UErrorCode &errorCode);
   434 
   435   //-------------------------------------------------------------------------
   436   // Iteration API
   437   //-------------------------------------------------------------------------
   438 
   439   /**
   440    * Return the current character in the normalized text.
   441    * current() may need to normalize some text at getIndex().
   442    * The getIndex() is not changed.
   443    *
   444    * @return the current normalized code point
   445    * @stable ICU 2.0
   446    */
   447   UChar32              current(void);
   448 
   449   /**
   450    * Return the first character in the normalized text.
   451    * This is equivalent to setIndexOnly(startIndex()) followed by next().
   452    * (Post-increment semantics.)
   453    *
   454    * @return the first normalized code point
   455    * @stable ICU 2.0
   456    */
   457   UChar32              first(void);
   458 
   459   /**
   460    * Return the last character in the normalized text.
   461    * This is equivalent to setIndexOnly(endIndex()) followed by previous().
   462    * (Pre-decrement semantics.)
   463    *
   464    * @return the last normalized code point
   465    * @stable ICU 2.0
   466    */
   467   UChar32              last(void);
   468 
   469   /**
   470    * Return the next character in the normalized text.
   471    * (Post-increment semantics.)
   472    * If the end of the text has already been reached, DONE is returned.
   473    * The DONE value could be confused with a U+FFFF non-character code point
   474    * in the text. If this is possible, you can test getIndex()<endIndex()
   475    * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
   476    * after calling next(). (Calling last() will change the iterator state!)
   477    *
   478    * The C API unorm_next() is more efficient and does not have this ambiguity.
   479    *
   480    * @return the next normalized code point
   481    * @stable ICU 2.0
   482    */
   483   UChar32              next(void);
   484 
   485   /**
   486    * Return the previous character in the normalized text and decrement.
   487    * (Pre-decrement semantics.)
   488    * If the beginning of the text has already been reached, DONE is returned.
   489    * The DONE value could be confused with a U+FFFF non-character code point
   490    * in the text. If this is possible, you can test
   491    * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
   492    * the iterator state!)
   493    *
   494    * The C API unorm_previous() is more efficient and does not have this ambiguity.
   495    *
   496    * @return the previous normalized code point
   497    * @stable ICU 2.0
   498    */
   499   UChar32              previous(void);
   500 
   501   /**
   502    * Set the iteration position in the input text that is being normalized,
   503    * without any immediate normalization.
   504    * After setIndexOnly(), getIndex() will return the same index that is
   505    * specified here.
   506    *
   507    * @param index the desired index in the input text.
   508    * @stable ICU 2.0
   509    */
   510   void                 setIndexOnly(int32_t index);
   511 
   512   /**
   513    * Reset the index to the beginning of the text.
   514    * This is equivalent to setIndexOnly(startIndex)).
   515    * @stable ICU 2.0
   516    */
   517   void                reset(void);
   518 
   519   /**
   520    * Retrieve the current iteration position in the input text that is
   521    * being normalized.
   522    *
   523    * A following call to next() will return a normalized code point from
   524    * the input text at or after this index.
   525    *
   526    * After a call to previous(), getIndex() will point at or before the
   527    * position in the input text where the normalized code point
   528    * was returned from with previous().
   529    *
   530    * @return the current index in the input text
   531    * @stable ICU 2.0
   532    */
   533   int32_t            getIndex(void) const;
   534 
   535   /**
   536    * Retrieve the index of the start of the input text. This is the begin index
   537    * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
   538    * over which this <code>Normalizer</code> is iterating.
   539    *
   540    * @return the smallest index in the input text where the Normalizer operates
   541    * @stable ICU 2.0
   542    */
   543   int32_t            startIndex(void) const;
   544 
   545   /**
   546    * Retrieve the index of the end of the input text. This is the end index
   547    * of the <code>CharacterIterator</code> or the length of the string
   548    * over which this <code>Normalizer</code> is iterating.
   549    * This end index is exclusive, i.e., the Normalizer operates only on characters
   550    * before this index.
   551    *
   552    * @return the first index in the input text where the Normalizer does not operate
   553    * @stable ICU 2.0
   554    */
   555   int32_t            endIndex(void) const;
   556 
   557   /**
   558    * Returns TRUE when both iterators refer to the same character in the same
   559    * input text.
   560    *
   561    * @param that a Normalizer object to compare this one to
   562    * @return comparison result
   563    * @stable ICU 2.0
   564    */
   565   UBool        operator==(const Normalizer& that) const;
   566 
   567   /**
   568    * Returns FALSE when both iterators refer to the same character in the same
   569    * input text.
   570    *
   571    * @param that a Normalizer object to compare this one to
   572    * @return comparison result
   573    * @stable ICU 2.0
   574    */
   575   inline UBool        operator!=(const Normalizer& that) const;
   576 
   577   /**
   578    * Returns a pointer to a new Normalizer that is a clone of this one.
   579    * The caller is responsible for deleting the new clone.
   580    * @return a pointer to a new Normalizer
   581    * @stable ICU 2.0
   582    */
   583   Normalizer*        clone(void) const;
   584 
   585   /**
   586    * Generates a hash code for this iterator.
   587    *
   588    * @return the hash code
   589    * @stable ICU 2.0
   590    */
   591   int32_t                hashCode(void) const;
   592 
   593   //-------------------------------------------------------------------------
   594   // Property access methods
   595   //-------------------------------------------------------------------------
   596 
   597   /**
   598    * Set the normalization mode for this object.
   599    * <p>
   600    * <b>Note:</b>If the normalization mode is changed while iterating
   601    * over a string, calls to {@link #next() } and {@link #previous() } may
   602    * return previously buffers characters in the old normalization mode
   603    * until the iteration is able to re-sync at the next base character.
   604    * It is safest to call {@link #setIndexOnly }, {@link #reset() },
   605    * {@link #setText }, {@link #first() },
   606    * {@link #last() }, etc. after calling <code>setMode</code>.
   607    * <p>
   608    * @param newMode the new mode for this <code>Normalizer</code>.
   609    * @see #getUMode
   610    * @stable ICU 2.0
   611    */
   612   void setMode(UNormalizationMode newMode);
   613 
   614   /**
   615    * Return the normalization mode for this object.
   616    *
   617    * This is an unusual name because there used to be a getMode() that
   618    * returned a different type.
   619    *
   620    * @return the mode for this <code>Normalizer</code>
   621    * @see #setMode
   622    * @stable ICU 2.0
   623    */
   624   UNormalizationMode getUMode(void) const;
   625 
   626   /**
   627    * Set options that affect this <code>Normalizer</code>'s operation.
   628    * Options do not change the basic composition or decomposition operation
   629    * that is being performed, but they control whether
   630    * certain optional portions of the operation are done.
   631    * Currently the only available option is obsolete.
   632    *
   633    * It is possible to specify multiple options that are all turned on or off.
   634    *
   635    * @param   option  the option(s) whose value is/are to be set.
   636    * @param   value   the new setting for the option.  Use <code>TRUE</code> to
   637    *                  turn the option(s) on and <code>FALSE</code> to turn it/them off.
   638    *
   639    * @see #getOption
   640    * @stable ICU 2.0
   641    */
   642   void setOption(int32_t option,
   643          UBool value);
   644 
   645   /**
   646    * Determine whether an option is turned on or off.
   647    * If multiple options are specified, then the result is TRUE if any
   648    * of them are set.
   649    * <p>
   650    * @param option the option(s) that are to be checked
   651    * @return TRUE if any of the option(s) are set
   652    * @see #setOption
   653    * @stable ICU 2.0
   654    */
   655   UBool getOption(int32_t option) const;
   656 
   657   /**
   658    * Set the input text over which this <code>Normalizer</code> will iterate.
   659    * The iteration position is set to the beginning.
   660    *
   661    * @param newText a string that replaces the current input text
   662    * @param status a UErrorCode
   663    * @stable ICU 2.0
   664    */
   665   void setText(const UnicodeString& newText,
   666            UErrorCode &status);
   667 
   668   /**
   669    * Set the input text over which this <code>Normalizer</code> will iterate.
   670    * The iteration position is set to the beginning.
   671    *
   672    * @param newText a CharacterIterator object that replaces the current input text
   673    * @param status a UErrorCode
   674    * @stable ICU 2.0
   675    */
   676   void setText(const CharacterIterator& newText,
   677            UErrorCode &status);
   678 
   679   /**
   680    * Set the input text over which this <code>Normalizer</code> will iterate.
   681    * The iteration position is set to the beginning.
   682    *
   683    * @param newText a string that replaces the current input text
   684    * @param length the length of the string, or -1 if NUL-terminated
   685    * @param status a UErrorCode
   686    * @stable ICU 2.0
   687    */
   688   void setText(const UChar* newText,
   689                     int32_t length,
   690             UErrorCode &status);
   691   /**
   692    * Copies the input text into the UnicodeString argument.
   693    *
   694    * @param result Receives a copy of the text under iteration.
   695    * @stable ICU 2.0
   696    */
   697   void            getText(UnicodeString&  result);
   698 
   699   /**
   700    * ICU "poor man's RTTI", returns a UClassID for this class.
   701    * @returns a UClassID for this class.
   702    * @stable ICU 2.2
   703    */
   704   static UClassID U_EXPORT2 getStaticClassID();
   705 
   706   /**
   707    * ICU "poor man's RTTI", returns a UClassID for the actual class.
   708    * @return a UClassID for the actual class.
   709    * @stable ICU 2.2
   710    */
   711   virtual UClassID getDynamicClassID() const;
   712 
   713 private:
   714   //-------------------------------------------------------------------------
   715   // Private functions
   716   //-------------------------------------------------------------------------
   717 
   718   Normalizer(); // default constructor not implemented
   719   Normalizer &operator=(const Normalizer &that); // assignment operator not implemented
   720 
   721   // Private utility methods for iteration
   722   // For documentation, see the source code
   723   UBool nextNormalize();
   724   UBool previousNormalize();
   725 
   726   void    init(CharacterIterator *iter);
   727   void    clearBuffer(void);
   728 
   729   //-------------------------------------------------------------------------
   730   // Private data
   731   //-------------------------------------------------------------------------
   732 
   733   UNormalizationMode  fUMode;
   734   int32_t             fOptions;
   735 
   736   // The input text and our position in it
   737   UCharIterator       *text;
   738 
   739   // The normalization buffer is the result of normalization
   740   // of the source in [currentIndex..nextIndex[ .
   741   int32_t         currentIndex, nextIndex;
   742 
   743   // A buffer for holding intermediate results
   744   UnicodeString       buffer;
   745   int32_t         bufferPos;
   746 
   747 };
   748 
   749 //-------------------------------------------------------------------------
   750 // Inline implementations
   751 //-------------------------------------------------------------------------
   752 
   753 inline UBool
   754 Normalizer::operator!= (const Normalizer& other) const
   755 { return ! operator==(other); }
   756 
   757 inline UNormalizationCheckResult
   758 Normalizer::quickCheck(const UnicodeString& source,
   759                        UNormalizationMode mode,
   760                        UErrorCode &status) {
   761     if(U_FAILURE(status)) {
   762         return UNORM_MAYBE;
   763     }
   764 
   765     return unorm_quickCheck(source.getBuffer(), source.length(),
   766                             mode, &status);
   767 }
   768 
   769 inline UNormalizationCheckResult
   770 Normalizer::quickCheck(const UnicodeString& source,
   771                        UNormalizationMode mode, int32_t options,
   772                        UErrorCode &status) {
   773     if(U_FAILURE(status)) {
   774         return UNORM_MAYBE;
   775     }
   776 
   777     return unorm_quickCheckWithOptions(source.getBuffer(), source.length(),
   778                                        mode, options, &status);
   779 }
   780 
   781 inline UBool
   782 Normalizer::isNormalized(const UnicodeString& source,
   783                          UNormalizationMode mode,
   784                          UErrorCode &status) {
   785     if(U_FAILURE(status)) {
   786         return FALSE;
   787     }
   788 
   789     return unorm_isNormalized(source.getBuffer(), source.length(),
   790                               mode, &status);
   791 }
   792 
   793 inline UBool
   794 Normalizer::isNormalized(const UnicodeString& source,
   795                          UNormalizationMode mode, int32_t options,
   796                          UErrorCode &status) {
   797     if(U_FAILURE(status)) {
   798         return FALSE;
   799     }
   800 
   801     return unorm_isNormalizedWithOptions(source.getBuffer(), source.length(),
   802                                          mode, options, &status);
   803 }
   804 
   805 inline int32_t
   806 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
   807                     uint32_t options,
   808                     UErrorCode &errorCode) {
   809   // all argument checking is done in unorm_compare
   810   return unorm_compare(s1.getBuffer(), s1.length(),
   811                        s2.getBuffer(), s2.length(),
   812                        options,
   813                        &errorCode);
   814 }
   815 
   816 U_NAMESPACE_END
   817 
   818 #endif /* #if !UCONFIG_NO_NORMALIZATION */
   819 
   820 #endif // NORMLZR_H