diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/i18n/numparse_affixes.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/i18n/numparse_affixes.cpp | 470 |
1 files changed, 0 insertions, 470 deletions
diff --git a/deps/node/deps/icu-small/source/i18n/numparse_affixes.cpp b/deps/node/deps/icu-small/source/i18n/numparse_affixes.cpp deleted file mode 100644 index c30d2416..00000000 --- a/deps/node/deps/icu-small/source/i18n/numparse_affixes.cpp +++ /dev/null @@ -1,470 +0,0 @@ -// © 2018 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -// Allow implicit conversion from char16_t* to UnicodeString for this file: -// Helpful in toString methods and elsewhere. -#define UNISTR_FROM_STRING_EXPLICIT - -#include "numparse_types.h" -#include "numparse_affixes.h" -#include "numparse_utils.h" -#include "number_utils.h" - -using namespace icu; -using namespace icu::numparse; -using namespace icu::numparse::impl; -using namespace icu::number; -using namespace icu::number::impl; - - -namespace { - -/** - * Helper method to return whether the given AffixPatternMatcher equals the given pattern string. - * Either both arguments must be null or the pattern string inside the AffixPatternMatcher must equal - * the given pattern string. - */ -static bool matched(const AffixPatternMatcher* affix, const UnicodeString& patternString) { - return (affix == nullptr && patternString.isBogus()) || - (affix != nullptr && affix->getPattern() == patternString); -} - -/** - * Helper method to return the length of the given AffixPatternMatcher. Returns 0 for null. - */ -static int32_t length(const AffixPatternMatcher* matcher) { - return matcher == nullptr ? 0 : matcher->getPattern().length(); -} - -/** - * Helper method to return whether (1) both lhs and rhs are null/invalid, or (2) if they are both - * valid, whether they are equal according to operator==. Similar to Java Objects.equals() - */ -static bool equals(const AffixPatternMatcher* lhs, const AffixPatternMatcher* rhs) { - if (lhs == nullptr && rhs == nullptr) { - return true; - } - if (lhs == nullptr || rhs == nullptr) { - return false; - } - return *lhs == *rhs; -} - -} - - -AffixPatternMatcherBuilder::AffixPatternMatcherBuilder(const UnicodeString& pattern, - AffixTokenMatcherWarehouse& warehouse, - IgnorablesMatcher* ignorables) - : fMatchersLen(0), - fLastTypeOrCp(0), - fPattern(pattern), - fWarehouse(warehouse), - fIgnorables(ignorables) {} - -void AffixPatternMatcherBuilder::consumeToken(AffixPatternType type, UChar32 cp, UErrorCode& status) { - // This is called by AffixUtils.iterateWithConsumer() for each token. - - // Add an ignorables matcher between tokens except between two literals, and don't put two - // ignorables matchers in a row. - if (fIgnorables != nullptr && fMatchersLen > 0 && - (fLastTypeOrCp < 0 || !fIgnorables->getSet()->contains(fLastTypeOrCp))) { - addMatcher(*fIgnorables); - } - - if (type != TYPE_CODEPOINT) { - // Case 1: the token is a symbol. - switch (type) { - case TYPE_MINUS_SIGN: - addMatcher(fWarehouse.minusSign()); - break; - case TYPE_PLUS_SIGN: - addMatcher(fWarehouse.plusSign()); - break; - case TYPE_PERCENT: - addMatcher(fWarehouse.percent()); - break; - case TYPE_PERMILLE: - addMatcher(fWarehouse.permille()); - break; - case TYPE_CURRENCY_SINGLE: - case TYPE_CURRENCY_DOUBLE: - case TYPE_CURRENCY_TRIPLE: - case TYPE_CURRENCY_QUAD: - case TYPE_CURRENCY_QUINT: - // All currency symbols use the same matcher - addMatcher(fWarehouse.currency(status)); - break; - default: - U_ASSERT(FALSE); - } - - } else if (fIgnorables != nullptr && fIgnorables->getSet()->contains(cp)) { - // Case 2: the token is an ignorable literal. - // No action necessary: the ignorables matcher has already been added. - - } else { - // Case 3: the token is a non-ignorable literal. - addMatcher(fWarehouse.nextCodePointMatcher(cp)); - } - fLastTypeOrCp = type != TYPE_CODEPOINT ? type : cp; -} - -void AffixPatternMatcherBuilder::addMatcher(NumberParseMatcher& matcher) { - if (fMatchersLen >= fMatchers.getCapacity()) { - fMatchers.resize(fMatchersLen * 2, fMatchersLen); - } - fMatchers[fMatchersLen++] = &matcher; -} - -AffixPatternMatcher AffixPatternMatcherBuilder::build() { - return AffixPatternMatcher(fMatchers, fMatchersLen, fPattern); -} - - -CodePointMatcherWarehouse::CodePointMatcherWarehouse() - : codePointCount(0), codePointNumBatches(0) {} - -CodePointMatcherWarehouse::~CodePointMatcherWarehouse() { - // Delete the variable number of batches of code point matchers - for (int32_t i = 0; i < codePointNumBatches; i++) { - delete[] codePointsOverflow[i]; - } -} - -CodePointMatcherWarehouse::CodePointMatcherWarehouse(CodePointMatcherWarehouse&& src) U_NOEXCEPT - : codePoints(std::move(src.codePoints)), - codePointsOverflow(std::move(src.codePointsOverflow)), - codePointCount(src.codePointCount), - codePointNumBatches(src.codePointNumBatches) {} - -CodePointMatcherWarehouse& -CodePointMatcherWarehouse::operator=(CodePointMatcherWarehouse&& src) U_NOEXCEPT { - codePoints = std::move(src.codePoints); - codePointsOverflow = std::move(src.codePointsOverflow); - codePointCount = src.codePointCount; - codePointNumBatches = src.codePointNumBatches; - return *this; -} - -NumberParseMatcher& CodePointMatcherWarehouse::nextCodePointMatcher(UChar32 cp) { - if (codePointCount < CODE_POINT_STACK_CAPACITY) { - return codePoints[codePointCount++] = {cp}; - } - int32_t totalCapacity = CODE_POINT_STACK_CAPACITY + codePointNumBatches * CODE_POINT_BATCH_SIZE; - if (codePointCount >= totalCapacity) { - // Need a new batch - auto* nextBatch = new CodePointMatcher[CODE_POINT_BATCH_SIZE]; - if (codePointNumBatches >= codePointsOverflow.getCapacity()) { - // Need more room for storing pointers to batches - codePointsOverflow.resize(codePointNumBatches * 2, codePointNumBatches); - } - codePointsOverflow[codePointNumBatches++] = nextBatch; - } - return codePointsOverflow[codePointNumBatches - 1][(codePointCount++ - CODE_POINT_STACK_CAPACITY) % - CODE_POINT_BATCH_SIZE] = {cp}; -} - - -AffixTokenMatcherWarehouse::AffixTokenMatcherWarehouse(const AffixTokenMatcherSetupData* setupData) - : fSetupData(setupData) {} - -NumberParseMatcher& AffixTokenMatcherWarehouse::minusSign() { - return fMinusSign = {fSetupData->dfs, true}; -} - -NumberParseMatcher& AffixTokenMatcherWarehouse::plusSign() { - return fPlusSign = {fSetupData->dfs, true}; -} - -NumberParseMatcher& AffixTokenMatcherWarehouse::percent() { - return fPercent = {fSetupData->dfs}; -} - -NumberParseMatcher& AffixTokenMatcherWarehouse::permille() { - return fPermille = {fSetupData->dfs}; -} - -NumberParseMatcher& AffixTokenMatcherWarehouse::currency(UErrorCode& status) { - return fCurrency = {fSetupData->currencySymbols, fSetupData->dfs, fSetupData->parseFlags, status}; -} - -IgnorablesMatcher& AffixTokenMatcherWarehouse::ignorables() { - return fSetupData->ignorables; -} - -NumberParseMatcher& AffixTokenMatcherWarehouse::nextCodePointMatcher(UChar32 cp) { - return fCodePoints.nextCodePointMatcher(cp); -} - - -CodePointMatcher::CodePointMatcher(UChar32 cp) - : fCp(cp) {} - -bool CodePointMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode&) const { - if (segment.startsWith(fCp)) { - segment.adjustOffsetByCodePoint(); - result.setCharsConsumed(segment); - } - return false; -} - -bool CodePointMatcher::smokeTest(const StringSegment& segment) const { - return segment.startsWith(fCp); -} - -UnicodeString CodePointMatcher::toString() const { - return u"<CodePoint>"; -} - - -AffixPatternMatcher AffixPatternMatcher::fromAffixPattern(const UnicodeString& affixPattern, - AffixTokenMatcherWarehouse& tokenWarehouse, - parse_flags_t parseFlags, bool* success, - UErrorCode& status) { - if (affixPattern.isEmpty()) { - *success = false; - return {}; - } - *success = true; - - IgnorablesMatcher* ignorables; - if (0 != (parseFlags & PARSE_FLAG_EXACT_AFFIX)) { - ignorables = nullptr; - } else { - ignorables = &tokenWarehouse.ignorables(); - } - - AffixPatternMatcherBuilder builder(affixPattern, tokenWarehouse, ignorables); - AffixUtils::iterateWithConsumer(affixPattern, builder, status); - return builder.build(); -} - -AffixPatternMatcher::AffixPatternMatcher(MatcherArray& matchers, int32_t matchersLen, - const UnicodeString& pattern) - : ArraySeriesMatcher(matchers, matchersLen), fPattern(pattern) {} - -UnicodeString AffixPatternMatcher::getPattern() const { - return fPattern.toAliasedUnicodeString(); -} - -bool AffixPatternMatcher::operator==(const AffixPatternMatcher& other) const { - return fPattern == other.fPattern; -} - - -AffixMatcherWarehouse::AffixMatcherWarehouse(AffixTokenMatcherWarehouse* tokenWarehouse) - : fTokenWarehouse(tokenWarehouse) { -} - -bool AffixMatcherWarehouse::isInteresting(const AffixPatternProvider& patternInfo, - const IgnorablesMatcher& ignorables, parse_flags_t parseFlags, - UErrorCode& status) { - UnicodeString posPrefixString = patternInfo.getString(AffixPatternProvider::AFFIX_POS_PREFIX); - UnicodeString posSuffixString = patternInfo.getString(AffixPatternProvider::AFFIX_POS_SUFFIX); - UnicodeString negPrefixString; - UnicodeString negSuffixString; - if (patternInfo.hasNegativeSubpattern()) { - negPrefixString = patternInfo.getString(AffixPatternProvider::AFFIX_NEG_PREFIX); - negSuffixString = patternInfo.getString(AffixPatternProvider::AFFIX_NEG_SUFFIX); - } - - if (0 == (parseFlags & PARSE_FLAG_USE_FULL_AFFIXES) && - AffixUtils::containsOnlySymbolsAndIgnorables(posPrefixString, *ignorables.getSet(), status) && - AffixUtils::containsOnlySymbolsAndIgnorables(posSuffixString, *ignorables.getSet(), status) && - AffixUtils::containsOnlySymbolsAndIgnorables(negPrefixString, *ignorables.getSet(), status) && - AffixUtils::containsOnlySymbolsAndIgnorables(negSuffixString, *ignorables.getSet(), status) - // HACK: Plus and minus sign are a special case: we accept them trailing only if they are - // trailing in the pattern string. - && !AffixUtils::containsType(posSuffixString, TYPE_PLUS_SIGN, status) && - !AffixUtils::containsType(posSuffixString, TYPE_MINUS_SIGN, status) && - !AffixUtils::containsType(negSuffixString, TYPE_PLUS_SIGN, status) && - !AffixUtils::containsType(negSuffixString, TYPE_MINUS_SIGN, status)) { - // The affixes contain only symbols and ignorables. - // No need to generate affix matchers. - return false; - } - return true; -} - -void AffixMatcherWarehouse::createAffixMatchers(const AffixPatternProvider& patternInfo, - MutableMatcherCollection& output, - const IgnorablesMatcher& ignorables, - parse_flags_t parseFlags, UErrorCode& status) { - if (!isInteresting(patternInfo, ignorables, parseFlags, status)) { - return; - } - - // The affixes have interesting characters, or we are in strict mode. - // Use initial capacity of 6, the highest possible number of AffixMatchers. - UnicodeString sb; - bool includeUnpaired = 0 != (parseFlags & PARSE_FLAG_INCLUDE_UNPAIRED_AFFIXES); - UNumberSignDisplay signDisplay = (0 != (parseFlags & PARSE_FLAG_PLUS_SIGN_ALLOWED)) ? UNUM_SIGN_ALWAYS - : UNUM_SIGN_AUTO; - - int32_t numAffixMatchers = 0; - int32_t numAffixPatternMatchers = 0; - - AffixPatternMatcher* posPrefix = nullptr; - AffixPatternMatcher* posSuffix = nullptr; - - // Pre-process the affix strings to resolve LDML rules like sign display. - for (int8_t signum = 1; signum >= -1; signum--) { - // Generate Prefix - bool hasPrefix = false; - PatternStringUtils::patternInfoToStringBuilder( - patternInfo, true, signum, signDisplay, StandardPlural::OTHER, false, sb); - fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( - sb, *fTokenWarehouse, parseFlags, &hasPrefix, status); - AffixPatternMatcher* prefix = hasPrefix ? &fAffixPatternMatchers[numAffixPatternMatchers++] - : nullptr; - - // Generate Suffix - bool hasSuffix = false; - PatternStringUtils::patternInfoToStringBuilder( - patternInfo, false, signum, signDisplay, StandardPlural::OTHER, false, sb); - fAffixPatternMatchers[numAffixPatternMatchers] = AffixPatternMatcher::fromAffixPattern( - sb, *fTokenWarehouse, parseFlags, &hasSuffix, status); - AffixPatternMatcher* suffix = hasSuffix ? &fAffixPatternMatchers[numAffixPatternMatchers++] - : nullptr; - - if (signum == 1) { - posPrefix = prefix; - posSuffix = suffix; - } else if (equals(prefix, posPrefix) && equals(suffix, posSuffix)) { - // Skip adding these matchers (we already have equivalents) - continue; - } - - // Flags for setting in the ParsedNumber; the token matchers may add more. - int flags = (signum == -1) ? FLAG_NEGATIVE : 0; - - // Note: it is indeed possible for posPrefix and posSuffix to both be null. - // We still need to add that matcher for strict mode to work. - fAffixMatchers[numAffixMatchers++] = {prefix, suffix, flags}; - if (includeUnpaired && prefix != nullptr && suffix != nullptr) { - // The following if statements are designed to prevent adding two identical matchers. - if (signum == 1 || !equals(prefix, posPrefix)) { - fAffixMatchers[numAffixMatchers++] = {prefix, nullptr, flags}; - } - if (signum == 1 || !equals(suffix, posSuffix)) { - fAffixMatchers[numAffixMatchers++] = {nullptr, suffix, flags}; - } - } - } - - // Put the AffixMatchers in order, and then add them to the output. - // Since there are at most 9 elements, do a simple-to-implement bubble sort. - bool madeChanges; - do { - madeChanges = false; - for (int32_t i = 1; i < numAffixMatchers; i++) { - if (fAffixMatchers[i - 1].compareTo(fAffixMatchers[i]) > 0) { - madeChanges = true; - AffixMatcher temp = std::move(fAffixMatchers[i - 1]); - fAffixMatchers[i - 1] = std::move(fAffixMatchers[i]); - fAffixMatchers[i] = std::move(temp); - } - } - } while (madeChanges); - - for (int32_t i = 0; i < numAffixMatchers; i++) { - // Enable the following line to debug affixes - //std::cout << "Adding affix matcher: " << CStr(fAffixMatchers[i].toString())() << std::endl; - output.addMatcher(fAffixMatchers[i]); - } -} - - -AffixMatcher::AffixMatcher(AffixPatternMatcher* prefix, AffixPatternMatcher* suffix, result_flags_t flags) - : fPrefix(prefix), fSuffix(suffix), fFlags(flags) {} - -bool AffixMatcher::match(StringSegment& segment, ParsedNumber& result, UErrorCode& status) const { - if (!result.seenNumber()) { - // Prefix - // Do not match if: - // 1. We have already seen a prefix (result.prefix != null) - // 2. The prefix in this AffixMatcher is empty (prefix == null) - if (!result.prefix.isBogus() || fPrefix == nullptr) { - return false; - } - - // Attempt to match the prefix. - int initialOffset = segment.getOffset(); - bool maybeMore = fPrefix->match(segment, result, status); - if (initialOffset != segment.getOffset()) { - result.prefix = fPrefix->getPattern(); - } - return maybeMore; - - } else { - // Suffix - // Do not match if: - // 1. We have already seen a suffix (result.suffix != null) - // 2. The suffix in this AffixMatcher is empty (suffix == null) - // 3. The matched prefix does not equal this AffixMatcher's prefix - if (!result.suffix.isBogus() || fSuffix == nullptr || !matched(fPrefix, result.prefix)) { - return false; - } - - // Attempt to match the suffix. - int initialOffset = segment.getOffset(); - bool maybeMore = fSuffix->match(segment, result, status); - if (initialOffset != segment.getOffset()) { - result.suffix = fSuffix->getPattern(); - } - return maybeMore; - } -} - -bool AffixMatcher::smokeTest(const StringSegment& segment) const { - return (fPrefix != nullptr && fPrefix->smokeTest(segment)) || - (fSuffix != nullptr && fSuffix->smokeTest(segment)); -} - -void AffixMatcher::postProcess(ParsedNumber& result) const { - // Check to see if our affix is the one that was matched. If so, set the flags in the result. - if (matched(fPrefix, result.prefix) && matched(fSuffix, result.suffix)) { - // Fill in the result prefix and suffix with non-null values (empty string). - // Used by strict mode to determine whether an entire affix pair was matched. - if (result.prefix.isBogus()) { - result.prefix = UnicodeString(); - } - if (result.suffix.isBogus()) { - result.suffix = UnicodeString(); - } - result.flags |= fFlags; - if (fPrefix != nullptr) { - fPrefix->postProcess(result); - } - if (fSuffix != nullptr) { - fSuffix->postProcess(result); - } - } -} - -int8_t AffixMatcher::compareTo(const AffixMatcher& rhs) const { - const AffixMatcher& lhs = *this; - if (length(lhs.fPrefix) != length(rhs.fPrefix)) { - return length(lhs.fPrefix) > length(rhs.fPrefix) ? -1 : 1; - } else if (length(lhs.fSuffix) != length(rhs.fSuffix)) { - return length(lhs.fSuffix) > length(rhs.fSuffix) ? -1 : 1; - } else { - return 0; - } -} - -UnicodeString AffixMatcher::toString() const { - bool isNegative = 0 != (fFlags & FLAG_NEGATIVE); - return UnicodeString(u"<Affix") + (isNegative ? u":negative " : u" ") + - (fPrefix ? fPrefix->getPattern() : u"null") + u"#" + - (fSuffix ? fSuffix->getPattern() : u"null") + u">"; - -} - - -#endif /* #if !UCONFIG_NO_FORMATTING */ |