os/textandloc/fontservices/textshaperplugin/IcuSource/common/unicode/unimatch.h
author sl
Tue, 10 Jun 2014 14:32:02 +0200
changeset 1 260cb5ec6c19
permissions -rw-r--r--
Update contrib.
     1 /*
     2 * Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
     3 **********************************************************************
     4 *   Date        Name        Description
     5 *   07/18/01    aliu        Creation.
     6 **********************************************************************
     7 */
     8 #ifndef UNIMATCH_H
     9 #define UNIMATCH_H
    10 
    11 #include "unicode/utypes.h"
    12 
    13 /**
    14  * \file 
    15  * \brief C++ API: Unicode Matcher
    16  */
    17 
    18 
    19 U_NAMESPACE_BEGIN
    20 
    21 class Replaceable;
    22 class UnicodeString;
    23 class UnicodeSet;
    24 
    25 /**
    26  * Constants returned by <code>UnicodeMatcher::matches()</code>
    27  * indicating the degree of match.
    28  * @stable ICU 2.4
    29  */
    30 enum UMatchDegree {
    31     /**
    32      * Constant returned by <code>matches()</code> indicating a
    33      * mismatch between the text and this matcher.  The text contains
    34      * a character which does not match, or the text does not contain
    35      * all desired characters for a non-incremental match.
    36      * @stable ICU 2.4
    37      */
    38     U_MISMATCH,
    39     
    40     /**
    41      * Constant returned by <code>matches()</code> indicating a
    42      * partial match between the text and this matcher.  This value is
    43      * only returned for incremental match operations.  All characters
    44      * of the text match, but more characters are required for a
    45      * complete match.  Alternatively, for variable-length matchers,
    46      * all characters of the text match, and if more characters were
    47      * supplied at limit, they might also match.
    48      * @stable ICU 2.4
    49      */
    50     U_PARTIAL_MATCH,
    51     
    52     /**
    53      * Constant returned by <code>matches()</code> indicating a
    54      * complete match between the text and this matcher.  For an
    55      * incremental variable-length match, this value is returned if
    56      * the given text matches, and it is known that additional
    57      * characters would not alter the extent of the match.
    58      * @stable ICU 2.4
    59      */
    60     U_MATCH
    61 };
    62 
    63 /**
    64  * <code>UnicodeMatcher</code> defines a protocol for objects that can
    65  * match a range of characters in a Replaceable string.
    66  * @stable ICU 2.4
    67  */
    68 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
    69 
    70 public:
    71     /**
    72      * Destructor.
    73      * @stable ICU 2.4
    74      */
    75     virtual ~UnicodeMatcher();
    76 
    77     /**
    78      * Return a UMatchDegree value indicating the degree of match for
    79      * the given text at the given offset.  Zero, one, or more
    80      * characters may be matched.
    81      *
    82      * Matching in the forward direction is indicated by limit >
    83      * offset.  Characters from offset forwards to limit-1 will be
    84      * considered for matching.
    85      * 
    86      * Matching in the reverse direction is indicated by limit <
    87      * offset.  Characters from offset backwards to limit+1 will be
    88      * considered for matching.
    89      *
    90      * If limit == offset then the only match possible is a zero
    91      * character match (which subclasses may implement if desired).
    92      *
    93      * As a side effect, advance the offset parameter to the limit of
    94      * the matched substring.  In the forward direction, this will be
    95      * the index of the last matched character plus one.  In the
    96      * reverse direction, this will be the index of the last matched
    97      * character minus one.
    98      *
    99      * <p>Note:  This method is not const because some classes may
   100      * modify their state as the result of a match.
   101      *
   102      * @param text the text to be matched
   103      * @param offset on input, the index into text at which to begin
   104      * matching.  On output, the limit of the matched text.  The
   105      * number of matched characters is the output value of offset
   106      * minus the input value.  Offset should always point to the
   107      * HIGH SURROGATE (leading code unit) of a pair of surrogates,
   108      * both on entry and upon return.
   109      * @param limit the limit index of text to be matched.  Greater
   110      * than offset for a forward direction match, less than offset for
   111      * a backward direction match.  The last character to be
   112      * considered for matching will be text.charAt(limit-1) in the
   113      * forward direction or text.charAt(limit+1) in the backward
   114      * direction.
   115      * @param incremental if TRUE, then assume further characters may
   116      * be inserted at limit and check for partial matching.  Otherwise
   117      * assume the text as given is complete.
   118      * @return a match degree value indicating a full match, a partial
   119      * match, or a mismatch.  If incremental is FALSE then
   120      * U_PARTIAL_MATCH should never be returned.
   121      * @stable ICU 2.4
   122      */
   123     virtual UMatchDegree matches(const Replaceable& text,
   124                                  int32_t& offset,
   125                                  int32_t limit,
   126                                  UBool incremental) = 0;
   127 
   128     /**
   129      * Returns a string representation of this matcher.  If the result of
   130      * calling this function is passed to the appropriate parser, it
   131      * will produce another matcher that is equal to this one.
   132      * @param result the string to receive the pattern.  Previous
   133      * contents will be deleted.
   134      * @param escapeUnprintable if TRUE then convert unprintable
   135      * character to their hex escape representations, \\uxxxx or
   136      * \\Uxxxxxxxx.  Unprintable characters are those other than
   137      * U+000A, U+0020..U+007E.
   138      * @stable ICU 2.4
   139      */
   140     virtual UnicodeString& toPattern(UnicodeString& result,
   141                                      UBool escapeUnprintable = FALSE) const = 0;
   142 
   143     /**
   144      * Returns TRUE if this matcher will match a character c, where c
   145      * & 0xFF == v, at offset, in the forward direction (with limit >
   146      * offset).  This is used by <tt>RuleBasedTransliterator</tt> for
   147      * indexing.
   148      * @stable ICU 2.4
   149      */
   150     virtual UBool matchesIndexValue(uint8_t v) const = 0;
   151 
   152     /**
   153      * Union the set of all characters that may be matched by this object
   154      * into the given set.
   155      * @param toUnionTo the set into which to union the source characters
   156      * @stable ICU 2.4
   157      */
   158     virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
   159 };
   160 
   161 U_NAMESPACE_END
   162 
   163 #endif