// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ********************************************************************** * Copyright (C) 1999-2010, International Business Machines Corporation and others. * All Rights Reserved. ********************************************************************** * Date Name Description * 11/17/99 aliu Creation. ********************************************************************** */ #ifndef UNIFILT_H #define UNIFILT_H #include "unicode/unifunct.h" #include "unicode/unimatch.h" /** * \file * \brief C++ API: Unicode Filter */ U_NAMESPACE_BEGIN /** * U_ETHER is used to represent character values for positions outside * a range. For example, transliterator uses this to represent * characters outside the range contextStart..contextLimit-1. This * allows explicit matching by rules and UnicodeSets of text outside a * defined range. * @stable ICU 3.0 */ #define U_ETHER ((char16_t)0xFFFF) /** * * UnicodeFilter defines a protocol for selecting a * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. * Currently, filters are used in conjunction with classes like {@link * Transliterator} to only process selected characters through a * transformation. * *

Note: UnicodeFilter currently stubs out two pure virtual methods * of its base class, UnicodeMatcher. These methods are toPattern() * and matchesIndexValue(). This is done so that filter classes that * are not actually used as matchers -- specifically, those in the * UnicodeFilterLogic component, and those in tests -- can continue to * work without defining these methods. As long as a filter is not * used in an RBT during real transliteration, these methods will not * be called. However, this breaks the UnicodeMatcher base class * protocol, and it is not a correct solution. * *

In the future we may revisit the UnicodeMatcher / UnicodeFilter * hierarchy and either redesign it, or simply remove the stubs in * UnicodeFilter and force subclasses to implement the full * UnicodeMatcher protocol. * * @see UnicodeFilterLogic * @stable ICU 2.0 */ class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { public: /** * Destructor * @stable ICU 2.0 */ virtual ~UnicodeFilter(); /** * Returns true for characters that are in the selected * subset. In other words, if a character is to be * filtered, then contains() returns * false. * @stable ICU 2.0 */ virtual UBool contains(UChar32 c) const = 0; /** * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer * and return the pointer. * @stable ICU 2.4 */ virtual UnicodeMatcher* toMatcher() const; /** * Implement UnicodeMatcher API. * @stable ICU 2.4 */ virtual UMatchDegree matches(const Replaceable& text, int32_t& offset, int32_t limit, UBool incremental); /** * UnicodeFunctor API. Nothing to do. * @stable ICU 2.4 */ virtual void setData(const TransliterationRuleData*); /** * ICU "poor man's RTTI", returns a UClassID for this class. * * @stable ICU 2.2 */ static UClassID U_EXPORT2 getStaticClassID(); protected: /* * Since this class has pure virtual functions, * a constructor can't be used. * @stable ICU 2.0 */ /* UnicodeFilter();*/ }; /*inline UnicodeFilter::UnicodeFilter() {}*/ U_NAMESPACE_END #endif