diff options
Diffstat (limited to 'deps/icu-small/source/i18n/number_patternstring.cpp')
-rw-r--r-- | deps/icu-small/source/i18n/number_patternstring.cpp | 323 |
1 files changed, 275 insertions, 48 deletions
diff --git a/deps/icu-small/source/i18n/number_patternstring.cpp b/deps/icu-small/source/i18n/number_patternstring.cpp index 20178824b0..63195eed98 100644 --- a/deps/icu-small/source/i18n/number_patternstring.cpp +++ b/deps/icu-small/source/i18n/number_patternstring.cpp @@ -3,36 +3,51 @@ #include "unicode/utypes.h" -#if !UCONFIG_NO_FORMATTING && !UPRV_INCOMPLETE_CPP11_SUPPORT +#if !UCONFIG_NO_FORMATTING + +// Allow implicit conversion from char16_t* to UnicodeString for this file: +// Helpful in toString methods and elsewhere. +#define UNISTR_FROM_STRING_EXPLICIT +#define UNISTR_FROM_CHAR_EXPLICIT #include "uassert.h" #include "number_patternstring.h" #include "unicode/utf16.h" #include "number_utils.h" +#include "number_roundingutils.h" using namespace icu; using namespace icu::number; using namespace icu::number::impl; -void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, UErrorCode &status) { + +void PatternParser::parseToPatternInfo(const UnicodeString& patternString, ParsedPatternInfo& patternInfo, + UErrorCode& status) { patternInfo.consumePattern(patternString, status); } DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, IgnoreRounding ignoreRounding, - UErrorCode &status) { + UErrorCode& status) { DecimalFormatProperties properties; parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); return properties; } -void PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, - IgnoreRounding ignoreRounding, UErrorCode &status) { +DecimalFormatProperties PatternParser::parseToProperties(const UnicodeString& pattern, + UErrorCode& status) { + return parseToProperties(pattern, IGNORE_ROUNDING_NEVER, status); +} + +void +PatternParser::parseToExistingProperties(const UnicodeString& pattern, DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status) { parseToExistingPropertiesImpl(pattern, properties, ignoreRounding, status); } + char16_t ParsedPatternInfo::charAt(int32_t flags, int32_t index) const { - const Endpoints &endpoints = getEndpoints(flags); + const Endpoints& endpoints = getEndpoints(flags); if (index < 0 || index >= endpoints.end - endpoints.start) { U_ASSERT(false); } @@ -43,12 +58,12 @@ int32_t ParsedPatternInfo::length(int32_t flags) const { return getLengthFromEndpoints(getEndpoints(flags)); } -int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints &endpoints) { +int32_t ParsedPatternInfo::getLengthFromEndpoints(const Endpoints& endpoints) { return endpoints.end - endpoints.start; } UnicodeString ParsedPatternInfo::getString(int32_t flags) const { - const Endpoints &endpoints = getEndpoints(flags); + const Endpoints& endpoints = getEndpoints(flags); if (endpoints.start == endpoints.end) { return UnicodeString(); } @@ -56,7 +71,7 @@ UnicodeString ParsedPatternInfo::getString(int32_t flags) const { return UnicodeString(pattern, endpoints.start, endpoints.end - endpoints.start); } -const Endpoints &ParsedPatternInfo::getEndpoints(int32_t flags) const { +const Endpoints& ParsedPatternInfo::getEndpoints(int32_t flags) const { bool prefix = (flags & AFFIX_PREFIX) != 0; bool isNegative = (flags & AFFIX_NEGATIVE_SUBPATTERN) != 0; bool padding = (flags & AFFIX_PADDING) != 0; @@ -91,8 +106,8 @@ bool ParsedPatternInfo::hasCurrencySign() const { return positive.hasCurrencySign || (fHasNegativeSubpattern && negative.hasCurrencySign); } -bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode &status) const { - return AffixUtils::containsType(UnicodeStringCharSequence(pattern), type, status); +bool ParsedPatternInfo::containsSymbolType(AffixPatternType type, UErrorCode& status) const { + return AffixUtils::containsType(pattern, type, status); } bool ParsedPatternInfo::hasBody() const { @@ -117,10 +132,14 @@ UChar32 ParsedPatternInfo::ParserState::next() { return codePoint; } -void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode &status) { +void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErrorCode& status) { if (U_FAILURE(status)) { return; } this->pattern = patternString; + // This class is not intended for writing twice! + // Use move assignment to overwrite instead. + U_ASSERT(state.offset == 0); + // pattern := subpattern (';' subpattern)? currentSubpattern = &positive; consumeSubpattern(status); @@ -141,7 +160,7 @@ void ParsedPatternInfo::consumePattern(const UnicodeString& patternString, UErro } } -void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) { +void ParsedPatternInfo::consumeSubpattern(UErrorCode& status) { // subpattern := literals? number exponent? literals? consumePadding(PadPosition::UNUM_PAD_BEFORE_PREFIX, status); if (U_FAILURE(status)) { return; } @@ -161,23 +180,24 @@ void ParsedPatternInfo::consumeSubpattern(UErrorCode &status) { if (U_FAILURE(status)) { return; } } -void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode &status) { +void ParsedPatternInfo::consumePadding(PadPosition paddingLocation, UErrorCode& status) { if (state.peek() != u'*') { return; } - if (!currentSubpattern->paddingLocation.isNull()) { + if (currentSubpattern->hasPadding) { state.toParseException(u"Cannot have multiple pad specifiers"); status = U_MULTIPLE_PAD_SPECIFIERS; return; } currentSubpattern->paddingLocation = paddingLocation; + currentSubpattern->hasPadding = true; state.next(); // consume the '*' currentSubpattern->paddingEndpoints.start = state.offset; consumeLiteral(status); currentSubpattern->paddingEndpoints.end = state.offset; } -void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) { +void ParsedPatternInfo::consumeAffix(Endpoints& endpoints, UErrorCode& status) { // literals := { literal } endpoints.start = state.offset; while (true) { @@ -233,7 +253,7 @@ void ParsedPatternInfo::consumeAffix(Endpoints &endpoints, UErrorCode &status) { endpoints.end = state.offset; } -void ParsedPatternInfo::consumeLiteral(UErrorCode &status) { +void ParsedPatternInfo::consumeLiteral(UErrorCode& status) { if (state.peek() == -1) { state.toParseException(u"Expected unquoted literal but found EOL"); status = U_PATTERN_SYNTAX_ERROR; @@ -256,7 +276,7 @@ void ParsedPatternInfo::consumeLiteral(UErrorCode &status) { } } -void ParsedPatternInfo::consumeFormat(UErrorCode &status) { +void ParsedPatternInfo::consumeFormat(UErrorCode& status) { consumeIntegerFormat(status); if (U_FAILURE(status)) { return; } if (state.peek() == u'.') { @@ -268,9 +288,9 @@ void ParsedPatternInfo::consumeFormat(UErrorCode &status) { } } -void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) { +void ParsedPatternInfo::consumeIntegerFormat(UErrorCode& status) { // Convenience reference: - ParsedSubpatternInfo &result = *currentSubpattern; + ParsedSubpatternInfo& result = *currentSubpattern; while (true) { switch (state.peek()) { @@ -359,9 +379,9 @@ void ParsedPatternInfo::consumeIntegerFormat(UErrorCode &status) { } } -void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) { +void ParsedPatternInfo::consumeFractionFormat(UErrorCode& status) { // Convenience reference: - ParsedSubpatternInfo &result = *currentSubpattern; + ParsedSubpatternInfo& result = *currentSubpattern; int32_t zeroCounter = 0; while (true) { @@ -407,9 +427,9 @@ void ParsedPatternInfo::consumeFractionFormat(UErrorCode &status) { } } -void ParsedPatternInfo::consumeExponent(UErrorCode &status) { +void ParsedPatternInfo::consumeExponent(UErrorCode& status) { // Convenience reference: - ParsedSubpatternInfo &result = *currentSubpattern; + ParsedSubpatternInfo& result = *currentSubpattern; if (state.peek() != u'E') { return; @@ -437,9 +457,9 @@ void ParsedPatternInfo::consumeExponent(UErrorCode &status) { /// END RECURSIVE DESCENT PARSER IMPLEMENTATION /// /////////////////////////////////////////////////// -void -PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, DecimalFormatProperties &properties, - IgnoreRounding ignoreRounding, UErrorCode &status) { +void PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, + DecimalFormatProperties& properties, + IgnoreRounding ignoreRounding, UErrorCode& status) { if (pattern.length() == 0) { // Backwards compatibility requires that we reset to the default values. // TODO: Only overwrite the properties that "saveToProperties" normally touches? @@ -453,13 +473,13 @@ PatternParser::parseToExistingPropertiesImpl(const UnicodeString& pattern, Decim patternInfoToProperties(properties, patternInfo, ignoreRounding, status); } -void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, - ParsedPatternInfo& patternInfo, - IgnoreRounding _ignoreRounding, UErrorCode &status) { +void +PatternParser::patternInfoToProperties(DecimalFormatProperties& properties, ParsedPatternInfo& patternInfo, + IgnoreRounding _ignoreRounding, UErrorCode& status) { // Translate from PatternParseResult to Properties. // Note that most data from "negative" is ignored per the specification of DecimalFormat. - const ParsedSubpatternInfo &positive = patternInfo.positive; + const ParsedSubpatternInfo& positive = patternInfo.positive; bool ignoreRounding; if (_ignoreRounding == IGNORE_ROUNDING_NEVER) { @@ -477,8 +497,10 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, auto grouping3 = static_cast<int16_t> ((positive.groupingSizes >> 32) & 0xffff); if (grouping2 != -1) { properties.groupingSize = grouping1; + properties.groupingUsed = true; } else { properties.groupingSize = -1; + properties.groupingUsed = false; } if (grouping3 != -1) { properties.secondaryGroupingSize = grouping2; @@ -508,8 +530,7 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, properties.maximumFractionDigits = -1; properties.roundingIncrement = 0.0; properties.minimumSignificantDigits = positive.integerAtSigns; - properties.maximumSignificantDigits = - positive.integerAtSigns + positive.integerTrailingHashSigns; + properties.maximumSignificantDigits = positive.integerAtSigns + positive.integerTrailingHashSigns; } else if (!positive.rounding.isZero()) { if (!ignoreRounding) { properties.minimumFractionDigits = minFrac; @@ -568,11 +589,11 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, UnicodeString posSuffix = patternInfo.getString(0); // Padding settings - if (!positive.paddingLocation.isNull()) { + if (positive.hasPadding) { // The width of the positive prefix and suffix templates are included in the padding - int paddingWidth = - positive.widthExceptAffixes + AffixUtils::estimateLength(UnicodeStringCharSequence(posPrefix), status) + - AffixUtils::estimateLength(UnicodeStringCharSequence(posSuffix), status); + int paddingWidth = positive.widthExceptAffixes + + AffixUtils::estimateLength(posPrefix, status) + + AffixUtils::estimateLength(posSuffix, status); properties.formatWidth = paddingWidth; UnicodeString rawPaddingString = patternInfo.getString(AffixPatternProvider::AFFIX_PADDING); if (rawPaddingString.length() == 1) { @@ -622,8 +643,8 @@ void PatternParser::patternInfoToProperties(DecimalFormatProperties &properties, /// End PatternStringParser.java; begin PatternStringUtils.java /// /////////////////////////////////////////////////////////////////// -UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties &properties, - UErrorCode &status) { +UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatProperties& properties, + UErrorCode& status) { UnicodeString sb; // Convenience references @@ -656,7 +677,7 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP if (!ppp.isBogus()) { sb.append(ppp); } - sb.append(AffixUtils::escape(UnicodeStringCharSequence(pp))); + sb.append(AffixUtils::escape(pp)); int afterPrefixPos = sb.length(); // Figure out the grouping sizes. @@ -695,11 +716,11 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP } } else if (roundingInterval != 0.0) { // Rounding Interval. - digitsStringScale = minFrac; + digitsStringScale = -roundingutils::doubleFractionLength(roundingInterval); // TODO: Check for DoS here? DecimalQuantity incrementQuantity; incrementQuantity.setToDouble(roundingInterval); - incrementQuantity.adjustMagnitude(minFrac); + incrementQuantity.adjustMagnitude(-digitsStringScale); incrementQuantity.roundToMagnitude(0, kDefaultMode, status); UnicodeString str = incrementQuantity.toPlainString(); if (str.charAt(0) == u'-') { @@ -753,7 +774,7 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP if (!psp.isBogus()) { sb.append(psp); } - sb.append(AffixUtils::escape(UnicodeStringCharSequence(ps))); + sb.append(AffixUtils::escape(ps)); // Resolve Padding if (paddingWidth != -1 && !paddingLocation.isNull()) { @@ -795,22 +816,25 @@ UnicodeString PatternStringUtils::propertiesToPatternString(const DecimalFormatP if (!npp.isBogus()) { sb.append(npp); } - sb.append(AffixUtils::escape(UnicodeStringCharSequence(np))); + sb.append(AffixUtils::escape(np)); // Copy the positive digit format into the negative. // This is optional; the pattern is the same as if '#' were appended here instead. - sb.append(sb, afterPrefixPos, beforeSuffixPos); + // NOTE: It is not safe to append the UnicodeString to itself, so we need to copy. + // See http://bugs.icu-project.org/trac/ticket/13707 + UnicodeString copy(sb); + sb.append(copy, afterPrefixPos, beforeSuffixPos - afterPrefixPos); if (!nsp.isBogus()) { sb.append(nsp); } - sb.append(AffixUtils::escape(UnicodeStringCharSequence(ns))); + sb.append(AffixUtils::escape(ns)); } return sb; } int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& output, int startIndex, - UErrorCode &status) { - (void)status; + UErrorCode& status) { + (void) status; if (input.length() == 0) { input.setTo(kFallbackPaddingString, -1); } @@ -840,4 +864,207 @@ int PatternStringUtils::escapePaddingString(UnicodeString input, UnicodeString& return output.length() - startLength; } +UnicodeString +PatternStringUtils::convertLocalized(const UnicodeString& input, const DecimalFormatSymbols& symbols, + bool toLocalized, UErrorCode& status) { + // Construct a table of strings to be converted between localized and standard. + static constexpr int32_t LEN = 21; + UnicodeString table[LEN][2]; + int standIdx = toLocalized ? 0 : 1; + int localIdx = toLocalized ? 1 : 0; + table[0][standIdx] = u"%"; + table[0][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); + table[1][standIdx] = u"‰"; + table[1][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); + table[2][standIdx] = u"."; + table[2][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDecimalSeparatorSymbol); + table[3][standIdx] = u","; + table[3][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kGroupingSeparatorSymbol); + table[4][standIdx] = u"-"; + table[4][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); + table[5][standIdx] = u"+"; + table[5][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); + table[6][standIdx] = u";"; + table[6][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPatternSeparatorSymbol); + table[7][standIdx] = u"@"; + table[7][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kSignificantDigitSymbol); + table[8][standIdx] = u"E"; + table[8][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kExponentialSymbol); + table[9][standIdx] = u"*"; + table[9][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kPadEscapeSymbol); + table[10][standIdx] = u"#"; + table[10][localIdx] = symbols.getConstSymbol(DecimalFormatSymbols::kDigitSymbol); + for (int i = 0; i < 10; i++) { + table[11 + i][standIdx] = u'0' + i; + table[11 + i][localIdx] = symbols.getConstDigitSymbol(i); + } + + // Special case: quotes are NOT allowed to be in any localIdx strings. + // Substitute them with '’' instead. + for (int32_t i = 0; i < LEN; i++) { + table[i][localIdx].findAndReplace(u'\'', u'’'); + } + + // Iterate through the string and convert. + // State table: + // 0 => base state + // 1 => first char inside a quoted sequence in input and output string + // 2 => inside a quoted sequence in input and output string + // 3 => first char after a close quote in input string; + // close quote still needs to be written to output string + // 4 => base state in input string; inside quoted sequence in output string + // 5 => first char inside a quoted sequence in input string; + // inside quoted sequence in output string + UnicodeString result; + int state = 0; + for (int offset = 0; offset < input.length(); offset++) { + UChar ch = input.charAt(offset); + + // Handle a quote character (state shift) + if (ch == u'\'') { + if (state == 0) { + result.append(u'\''); + state = 1; + continue; + } else if (state == 1) { + result.append(u'\''); + state = 0; + continue; + } else if (state == 2) { + state = 3; + continue; + } else if (state == 3) { + result.append(u'\''); + result.append(u'\''); + state = 1; + continue; + } else if (state == 4) { + state = 5; + continue; + } else { + U_ASSERT(state == 5); + result.append(u'\''); + result.append(u'\''); + state = 4; + continue; + } + } + + if (state == 0 || state == 3 || state == 4) { + for (auto& pair : table) { + // Perform a greedy match on this symbol string + UnicodeString temp = input.tempSubString(offset, pair[0].length()); + if (temp == pair[0]) { + // Skip ahead past this region for the next iteration + offset += pair[0].length() - 1; + if (state == 3 || state == 4) { + result.append(u'\''); + state = 0; + } + result.append(pair[1]); + goto continue_outer; + } + } + // No replacement found. Check if a special quote is necessary + for (auto& pair : table) { + UnicodeString temp = input.tempSubString(offset, pair[1].length()); + if (temp == pair[1]) { + if (state == 0) { + result.append(u'\''); + state = 4; + } + result.append(ch); + goto continue_outer; + } + } + // Still nothing. Copy the char verbatim. (Add a close quote if necessary) + if (state == 3 || state == 4) { + result.append(u'\''); + state = 0; + } + result.append(ch); + } else { + U_ASSERT(state == 1 || state == 2 || state == 5); + result.append(ch); + state = 2; + } + continue_outer:; + } + // Resolve final quotes + if (state == 3 || state == 4) { + result.append(u'\''); + state = 0; + } + if (state != 0) { + // Malformed localized pattern: unterminated quote + status = U_PATTERN_SYNTAX_ERROR; + } + return result; +} + +void PatternStringUtils::patternInfoToStringBuilder(const AffixPatternProvider& patternInfo, bool isPrefix, + int8_t signum, UNumberSignDisplay signDisplay, + StandardPlural::Form plural, + bool perMilleReplacesPercent, UnicodeString& output) { + + // Should the output render '+' where '-' would normally appear in the pattern? + bool plusReplacesMinusSign = signum != -1 && ( + signDisplay == UNUM_SIGN_ALWAYS || signDisplay == UNUM_SIGN_ACCOUNTING_ALWAYS || ( + signum == 1 && ( + signDisplay == UNUM_SIGN_EXCEPT_ZERO || + signDisplay == UNUM_SIGN_ACCOUNTING_EXCEPT_ZERO))) && + patternInfo.positiveHasPlusSign() == false; + + // Should we use the affix from the negative subpattern? (If not, we will use the positive + // subpattern.) + bool useNegativeAffixPattern = patternInfo.hasNegativeSubpattern() && ( + signum == -1 || (patternInfo.negativeHasMinusSign() && plusReplacesMinusSign)); + + // Resolve the flags for the affix pattern. + int flags = 0; + if (useNegativeAffixPattern) { + flags |= AffixPatternProvider::AFFIX_NEGATIVE_SUBPATTERN; + } + if (isPrefix) { + flags |= AffixPatternProvider::AFFIX_PREFIX; + } + if (plural != StandardPlural::Form::COUNT) { + U_ASSERT(plural == (AffixPatternProvider::AFFIX_PLURAL_MASK & plural)); + flags |= plural; + } + + // Should we prepend a sign to the pattern? + bool prependSign; + if (!isPrefix || useNegativeAffixPattern) { + prependSign = false; + } else if (signum == -1) { + prependSign = signDisplay != UNUM_SIGN_NEVER; + } else { + prependSign = plusReplacesMinusSign; + } + + // Compute the length of the affix pattern. + int length = patternInfo.length(flags) + (prependSign ? 1 : 0); + + // Finally, set the result into the StringBuilder. + output.remove(); + for (int index = 0; index < length; index++) { + char16_t candidate; + if (prependSign && index == 0) { + candidate = u'-'; + } else if (prependSign) { + candidate = patternInfo.charAt(flags, index - 1); + } else { + candidate = patternInfo.charAt(flags, index); + } + if (plusReplacesMinusSign && candidate == u'-') { + candidate = u'+'; + } + if (perMilleReplacesPercent && candidate == u'%') { + candidate = u'‰'; + } + output.append(candidate); + } +} + #endif /* #if !UCONFIG_NO_FORMATTING */ |