sl@0
|
1 |
/*
|
sl@0
|
2 |
* Copyright (C) 1999-2005, International Business Machines Corporation and others.
|
sl@0
|
3 |
* All Rights Reserved.
|
sl@0
|
4 |
**********************************************************************
|
sl@0
|
5 |
* Date Name Description
|
sl@0
|
6 |
* 11/17/99 aliu Creation.
|
sl@0
|
7 |
**********************************************************************
|
sl@0
|
8 |
*/
|
sl@0
|
9 |
#ifndef UNIFILT_H
|
sl@0
|
10 |
#define UNIFILT_H
|
sl@0
|
11 |
|
sl@0
|
12 |
#include "unicode/unifunct.h"
|
sl@0
|
13 |
#include "unicode/unimatch.h"
|
sl@0
|
14 |
|
sl@0
|
15 |
/**
|
sl@0
|
16 |
* \file
|
sl@0
|
17 |
* \brief C++ API: Unicode Filter
|
sl@0
|
18 |
*/
|
sl@0
|
19 |
|
sl@0
|
20 |
U_NAMESPACE_BEGIN
|
sl@0
|
21 |
|
sl@0
|
22 |
/**
|
sl@0
|
23 |
* U_ETHER is used to represent character values for positions outside
|
sl@0
|
24 |
* a range. For example, transliterator uses this to represent
|
sl@0
|
25 |
* characters outside the range contextStart..contextLimit-1. This
|
sl@0
|
26 |
* allows explicit matching by rules and UnicodeSets of text outside a
|
sl@0
|
27 |
* defined range.
|
sl@0
|
28 |
* @draft ICU 3.0
|
sl@0
|
29 |
*/
|
sl@0
|
30 |
#define U_ETHER ((UChar)0xFFFF)
|
sl@0
|
31 |
|
sl@0
|
32 |
/**
|
sl@0
|
33 |
*
|
sl@0
|
34 |
* <code>UnicodeFilter</code> defines a protocol for selecting a
|
sl@0
|
35 |
* subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
|
sl@0
|
36 |
* Currently, filters are used in conjunction with classes like {@link
|
sl@0
|
37 |
* Transliterator} to only process selected characters through a
|
sl@0
|
38 |
* transformation.
|
sl@0
|
39 |
*
|
sl@0
|
40 |
* <p>Note: UnicodeFilter currently stubs out two pure virtual methods
|
sl@0
|
41 |
* of its base class, UnicodeMatcher. These methods are toPattern()
|
sl@0
|
42 |
* and matchesIndexValue(). This is done so that filter classes that
|
sl@0
|
43 |
* are not actually used as matchers -- specifically, those in the
|
sl@0
|
44 |
* UnicodeFilterLogic component, and those in tests -- can continue to
|
sl@0
|
45 |
* work without defining these methods. As long as a filter is not
|
sl@0
|
46 |
* used in an RBT during real transliteration, these methods will not
|
sl@0
|
47 |
* be called. However, this breaks the UnicodeMatcher base class
|
sl@0
|
48 |
* protocol, and it is not a correct solution.
|
sl@0
|
49 |
*
|
sl@0
|
50 |
* <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
|
sl@0
|
51 |
* hierarchy and either redesign it, or simply remove the stubs in
|
sl@0
|
52 |
* UnicodeFilter and force subclasses to implement the full
|
sl@0
|
53 |
* UnicodeMatcher protocol.
|
sl@0
|
54 |
*
|
sl@0
|
55 |
* @see UnicodeFilterLogic
|
sl@0
|
56 |
* @stable ICU 2.0
|
sl@0
|
57 |
*/
|
sl@0
|
58 |
class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
|
sl@0
|
59 |
|
sl@0
|
60 |
public:
|
sl@0
|
61 |
/**
|
sl@0
|
62 |
* Destructor
|
sl@0
|
63 |
* @stable ICU 2.0
|
sl@0
|
64 |
*/
|
sl@0
|
65 |
virtual ~UnicodeFilter();
|
sl@0
|
66 |
|
sl@0
|
67 |
/**
|
sl@0
|
68 |
* Returns <tt>true</tt> for characters that are in the selected
|
sl@0
|
69 |
* subset. In other words, if a character is <b>to be
|
sl@0
|
70 |
* filtered</b>, then <tt>contains()</tt> returns
|
sl@0
|
71 |
* <b><tt>false</tt></b>.
|
sl@0
|
72 |
* @stable ICU 2.0
|
sl@0
|
73 |
*/
|
sl@0
|
74 |
virtual UBool contains(UChar32 c) const = 0;
|
sl@0
|
75 |
|
sl@0
|
76 |
/**
|
sl@0
|
77 |
* UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
|
sl@0
|
78 |
* and return the pointer.
|
sl@0
|
79 |
* @stable ICU 2.4
|
sl@0
|
80 |
*/
|
sl@0
|
81 |
virtual UnicodeMatcher* toMatcher() const;
|
sl@0
|
82 |
|
sl@0
|
83 |
/**
|
sl@0
|
84 |
* Implement UnicodeMatcher API.
|
sl@0
|
85 |
* @stable ICU 2.4
|
sl@0
|
86 |
*/
|
sl@0
|
87 |
virtual UMatchDegree matches(const Replaceable& text,
|
sl@0
|
88 |
int32_t& offset,
|
sl@0
|
89 |
int32_t limit,
|
sl@0
|
90 |
UBool incremental);
|
sl@0
|
91 |
|
sl@0
|
92 |
/**
|
sl@0
|
93 |
* UnicodeFunctor API. Nothing to do.
|
sl@0
|
94 |
* @stable ICU 2.4
|
sl@0
|
95 |
*/
|
sl@0
|
96 |
virtual void setData(const TransliterationRuleData*);
|
sl@0
|
97 |
|
sl@0
|
98 |
/**
|
sl@0
|
99 |
* ICU "poor man's RTTI", returns a UClassID for the actual class.
|
sl@0
|
100 |
*
|
sl@0
|
101 |
* @stable ICU 2.2
|
sl@0
|
102 |
*/
|
sl@0
|
103 |
virtual UClassID getDynamicClassID() const = 0;
|
sl@0
|
104 |
|
sl@0
|
105 |
/**
|
sl@0
|
106 |
* ICU "poor man's RTTI", returns a UClassID for this class.
|
sl@0
|
107 |
*
|
sl@0
|
108 |
* @stable ICU 2.2
|
sl@0
|
109 |
*/
|
sl@0
|
110 |
static UClassID U_EXPORT2 getStaticClassID();
|
sl@0
|
111 |
|
sl@0
|
112 |
protected:
|
sl@0
|
113 |
|
sl@0
|
114 |
/*
|
sl@0
|
115 |
* Since this class has pure virtual functions,
|
sl@0
|
116 |
* a constructor can't be used.
|
sl@0
|
117 |
* @stable ICU 2.0
|
sl@0
|
118 |
*/
|
sl@0
|
119 |
/* UnicodeFilter();*/
|
sl@0
|
120 |
};
|
sl@0
|
121 |
|
sl@0
|
122 |
/*inline UnicodeFilter::UnicodeFilter() {}*/
|
sl@0
|
123 |
|
sl@0
|
124 |
U_NAMESPACE_END
|
sl@0
|
125 |
|
sl@0
|
126 |
#endif
|