sl@0: /*
sl@0: * Copyright (C) 1999-2005, International Business Machines Corporation and others.
sl@0: * All Rights Reserved.
sl@0: **********************************************************************
sl@0: * Date Name Description
sl@0: * 11/17/99 aliu Creation.
sl@0: **********************************************************************
sl@0: */
sl@0: #ifndef UNIFILT_H
sl@0: #define UNIFILT_H
sl@0:
sl@0: #include "unicode/unifunct.h"
sl@0: #include "unicode/unimatch.h"
sl@0:
sl@0: /**
sl@0: * \file
sl@0: * \brief C++ API: Unicode Filter
sl@0: */
sl@0:
sl@0: U_NAMESPACE_BEGIN
sl@0:
sl@0: /**
sl@0: * U_ETHER is used to represent character values for positions outside
sl@0: * a range. For example, transliterator uses this to represent
sl@0: * characters outside the range contextStart..contextLimit-1. This
sl@0: * allows explicit matching by rules and UnicodeSets of text outside a
sl@0: * defined range.
sl@0: * @draft ICU 3.0
sl@0: */
sl@0: #define U_ETHER ((UChar)0xFFFF)
sl@0:
sl@0: /**
sl@0: *
sl@0: * UnicodeFilter
defines a protocol for selecting a
sl@0: * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
sl@0: * Currently, filters are used in conjunction with classes like {@link
sl@0: * Transliterator} to only process selected characters through a
sl@0: * transformation.
sl@0: *
sl@0: *
Note: UnicodeFilter currently stubs out two pure virtual methods sl@0: * of its base class, UnicodeMatcher. These methods are toPattern() sl@0: * and matchesIndexValue(). This is done so that filter classes that sl@0: * are not actually used as matchers -- specifically, those in the sl@0: * UnicodeFilterLogic component, and those in tests -- can continue to sl@0: * work without defining these methods. As long as a filter is not sl@0: * used in an RBT during real transliteration, these methods will not sl@0: * be called. However, this breaks the UnicodeMatcher base class sl@0: * protocol, and it is not a correct solution. sl@0: * sl@0: *
In the future we may revisit the UnicodeMatcher / UnicodeFilter sl@0: * hierarchy and either redesign it, or simply remove the stubs in sl@0: * UnicodeFilter and force subclasses to implement the full sl@0: * UnicodeMatcher protocol. sl@0: * sl@0: * @see UnicodeFilterLogic sl@0: * @stable ICU 2.0 sl@0: */ sl@0: class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { sl@0: sl@0: public: sl@0: /** sl@0: * Destructor sl@0: * @stable ICU 2.0 sl@0: */ sl@0: virtual ~UnicodeFilter(); sl@0: sl@0: /** sl@0: * Returns true for characters that are in the selected sl@0: * subset. In other words, if a character is to be sl@0: * filtered, then contains() returns sl@0: * false. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: virtual UBool contains(UChar32 c) const = 0; sl@0: sl@0: /** sl@0: * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer sl@0: * and return the pointer. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: virtual UnicodeMatcher* toMatcher() const; sl@0: sl@0: /** sl@0: * Implement UnicodeMatcher API. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: virtual UMatchDegree matches(const Replaceable& text, sl@0: int32_t& offset, sl@0: int32_t limit, sl@0: UBool incremental); sl@0: sl@0: /** sl@0: * UnicodeFunctor API. Nothing to do. sl@0: * @stable ICU 2.4 sl@0: */ sl@0: virtual void setData(const TransliterationRuleData*); sl@0: sl@0: /** sl@0: * ICU "poor man's RTTI", returns a UClassID for the actual class. sl@0: * sl@0: * @stable ICU 2.2 sl@0: */ sl@0: virtual UClassID getDynamicClassID() const = 0; sl@0: sl@0: /** sl@0: * ICU "poor man's RTTI", returns a UClassID for this class. sl@0: * sl@0: * @stable ICU 2.2 sl@0: */ sl@0: static UClassID U_EXPORT2 getStaticClassID(); sl@0: sl@0: protected: sl@0: sl@0: /* sl@0: * Since this class has pure virtual functions, sl@0: * a constructor can't be used. sl@0: * @stable ICU 2.0 sl@0: */ sl@0: /* UnicodeFilter();*/ sl@0: }; sl@0: sl@0: /*inline UnicodeFilter::UnicodeFilter() {}*/ sl@0: sl@0: U_NAMESPACE_END sl@0: sl@0: #endif