diff options
Diffstat (limited to 'deps/icu-small/source/i18n/affixpatternparser.cpp')
-rw-r--r-- | deps/icu-small/source/i18n/affixpatternparser.cpp | 698 |
1 files changed, 0 insertions, 698 deletions
diff --git a/deps/icu-small/source/i18n/affixpatternparser.cpp b/deps/icu-small/source/i18n/affixpatternparser.cpp deleted file mode 100644 index d9e122953a..0000000000 --- a/deps/icu-small/source/i18n/affixpatternparser.cpp +++ /dev/null @@ -1,698 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - * Copyright (C) 2015, International Business Machines - * Corporation and others. All Rights Reserved. - * - * file name: affixpatternparser.cpp - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_FORMATTING - -#include "unicode/dcfmtsym.h" -#include "unicode/plurrule.h" -#include "unicode/strenum.h" -#include "unicode/ucurr.h" -#include "unicode/ustring.h" -#include "affixpatternparser.h" -#include "charstr.h" -#include "precision.h" -#include "uassert.h" -#include "unistrappender.h" - -static const UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4}; - -static const UChar gPercent = 0x25; -static const UChar gPerMill = 0x2030; -static const UChar gNegative = 0x2D; -static const UChar gPositive = 0x2B; - -#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF))) - -#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F)) - -#define UNPACK_LONG(c) (((c) >> 8) & 0x80) - -#define UNPACK_LENGTH(c) ((c) & 0xFF) - -U_NAMESPACE_BEGIN - -static int32_t -nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { - if (buffer[idx] != 0x27 || idx + 1 == len) { - *token = buffer[idx]; - return 1; - } - *token = buffer[idx + 1]; - if (buffer[idx + 1] == 0xA4) { - int32_t i = 2; - for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i) - ; - return i; - } - return 2; -} - -static int32_t -nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) { - *token = buffer[idx]; - int32_t max; - switch (buffer[idx]) { - case 0x27: - max = 2; - break; - case 0xA4: - max = 3; - break; - default: - max = 1; - break; - } - int32_t i = 1; - for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i) - ; - return i; -} - -CurrencyAffixInfo::CurrencyAffixInfo() - : fSymbol(gDefaultSymbols, 1), - fISO(gDefaultSymbols, 2), - fLong(DigitAffix(gDefaultSymbols, 3)), - fIsDefault(TRUE) { -} - -void -CurrencyAffixInfo::set( - const char *locale, - const PluralRules *rules, - const UChar *currency, - UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - fIsDefault = FALSE; - if (currency == NULL) { - fSymbol.setTo(gDefaultSymbols, 1); - fISO.setTo(gDefaultSymbols, 2); - fLong.remove(); - fLong.append(gDefaultSymbols, 3); - fIsDefault = TRUE; - return; - } - int32_t len; - UBool unusedIsChoice; - const UChar *symbol = ucurr_getName( - currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice, - &len, &status); - if (U_FAILURE(status)) { - return; - } - fSymbol.setTo(symbol, len); - fISO.setTo(currency, u_strlen(currency)); - fLong.remove(); - StringEnumeration* keywords = rules->getKeywords(status); - if (U_FAILURE(status)) { - return; - } - const UnicodeString* pluralCount; - while ((pluralCount = keywords->snext(status)) != NULL) { - CharString pCount; - pCount.appendInvariantChars(*pluralCount, status); - const UChar *pluralName = ucurr_getPluralName( - currency, locale, &unusedIsChoice, pCount.data(), - &len, &status); - fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status); - } - delete keywords; -} - -void -CurrencyAffixInfo::adjustPrecision( - const UChar *currency, const UCurrencyUsage usage, - FixedPrecision &precision, UErrorCode &status) { - if (U_FAILURE(status)) { - return; - } - - int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage( - currency, usage, &status); - precision.fMin.setFracDigitCount(digitCount); - precision.fMax.setFracDigitCount(digitCount); - double increment = ucurr_getRoundingIncrementForUsage( - currency, usage, &status); - if (increment == 0.0) { - precision.fRoundingIncrement.clear(); - } else { - precision.fRoundingIncrement.set(increment); - // guard against round-off error - precision.fRoundingIncrement.round(6); - } -} - -void -AffixPattern::addLiteral( - const UChar *literal, int32_t start, int32_t len) { - char32Count += u_countChar32(literal + start, len); - literals.append(literal, start, len); - int32_t tlen = tokens.length(); - // Takes 4 UChars to encode maximum literal length. - UChar *tokenChars = tokens.getBuffer(tlen + 4); - - // find start of literal size. May be tlen if there is no literal. - // While finding start of literal size, compute literal length - int32_t literalLength = 0; - int32_t tLiteralStart = tlen; - while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) { - tLiteralStart--; - literalLength <<= 8; - literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]); - } - // Add number of chars we just added to literal - literalLength += len; - - // Now encode the new length starting at tLiteralStart - tlen = tLiteralStart; - tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF); - literalLength >>= 8; - while (literalLength) { - tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF); - literalLength >>= 8; - } - tokens.releaseBuffer(tlen); -} - -void -AffixPattern::add(ETokenType t) { - add(t, 1); -} - -void -AffixPattern::addCurrency(uint8_t count) { - add(kCurrency, count); -} - -void -AffixPattern::add(ETokenType t, uint8_t count) { - U_ASSERT(t != kLiteral); - char32Count += count; - switch (t) { - case kCurrency: - hasCurrencyToken = TRUE; - break; - case kPercent: - hasPercentToken = TRUE; - break; - case kPerMill: - hasPermillToken = TRUE; - break; - default: - // Do nothing - break; - } - tokens.append(PACK_TOKEN_AND_LENGTH(t, count)); -} - -AffixPattern & -AffixPattern::append(const AffixPattern &other) { - AffixPatternIterator iter; - other.iterator(iter); - UnicodeString literal; - while (iter.nextToken()) { - switch (iter.getTokenType()) { - case kLiteral: - iter.getLiteral(literal); - addLiteral(literal.getBuffer(), 0, literal.length()); - break; - case kCurrency: - addCurrency(static_cast<uint8_t>(iter.getTokenLength())); - break; - default: - add(iter.getTokenType()); - break; - } - } - return *this; -} - -void -AffixPattern::remove() { - tokens.remove(); - literals.remove(); - hasCurrencyToken = FALSE; - hasPercentToken = FALSE; - hasPermillToken = FALSE; - char32Count = 0; -} - -// escapes literals for strings where special characters are NOT escaped -// except for apostrophe. -static void escapeApostropheInLiteral( - const UnicodeString &literal, UnicodeStringAppender &appender) { - int32_t len = literal.length(); - const UChar *buffer = literal.getBuffer(); - for (int32_t i = 0; i < len; ++i) { - UChar ch = buffer[i]; - switch (ch) { - case 0x27: - appender.append((UChar) 0x27); - appender.append((UChar) 0x27); - break; - default: - appender.append(ch); - break; - } - } -} - - -// escapes literals for user strings where special characters in literals -// are escaped with apostrophe. -static void escapeLiteral( - const UnicodeString &literal, UnicodeStringAppender &appender) { - int32_t len = literal.length(); - const UChar *buffer = literal.getBuffer(); - for (int32_t i = 0; i < len; ++i) { - UChar ch = buffer[i]; - switch (ch) { - case 0x27: - appender.append((UChar) 0x27); - appender.append((UChar) 0x27); - break; - case 0x25: - appender.append((UChar) 0x27); - appender.append((UChar) 0x25); - appender.append((UChar) 0x27); - break; - case 0x2030: - appender.append((UChar) 0x27); - appender.append((UChar) 0x2030); - appender.append((UChar) 0x27); - break; - case 0xA4: - appender.append((UChar) 0x27); - appender.append((UChar) 0xA4); - appender.append((UChar) 0x27); - break; - case 0x2D: - appender.append((UChar) 0x27); - appender.append((UChar) 0x2D); - appender.append((UChar) 0x27); - break; - case 0x2B: - appender.append((UChar) 0x27); - appender.append((UChar) 0x2B); - appender.append((UChar) 0x27); - break; - default: - appender.append(ch); - break; - } - } -} - -UnicodeString & -AffixPattern::toString(UnicodeString &appendTo) const { - AffixPatternIterator iter; - iterator(iter); - UnicodeStringAppender appender(appendTo); - UnicodeString literal; - while (iter.nextToken()) { - switch (iter.getTokenType()) { - case kLiteral: - escapeApostropheInLiteral(iter.getLiteral(literal), appender); - break; - case kPercent: - appender.append((UChar) 0x27); - appender.append((UChar) 0x25); - break; - case kPerMill: - appender.append((UChar) 0x27); - appender.append((UChar) 0x2030); - break; - case kCurrency: - { - appender.append((UChar) 0x27); - int32_t cl = iter.getTokenLength(); - for (int32_t i = 0; i < cl; ++i) { - appender.append((UChar) 0xA4); - } - } - break; - case kNegative: - appender.append((UChar) 0x27); - appender.append((UChar) 0x2D); - break; - case kPositive: - appender.append((UChar) 0x27); - appender.append((UChar) 0x2B); - break; - default: - U_ASSERT(FALSE); - break; - } - } - return appendTo; -} - -UnicodeString & -AffixPattern::toUserString(UnicodeString &appendTo) const { - AffixPatternIterator iter; - iterator(iter); - UnicodeStringAppender appender(appendTo); - UnicodeString literal; - while (iter.nextToken()) { - switch (iter.getTokenType()) { - case kLiteral: - escapeLiteral(iter.getLiteral(literal), appender); - break; - case kPercent: - appender.append((UChar) 0x25); - break; - case kPerMill: - appender.append((UChar) 0x2030); - break; - case kCurrency: - { - int32_t cl = iter.getTokenLength(); - for (int32_t i = 0; i < cl; ++i) { - appender.append((UChar) 0xA4); - } - } - break; - case kNegative: - appender.append((UChar) 0x2D); - break; - case kPositive: - appender.append((UChar) 0x2B); - break; - default: - U_ASSERT(FALSE); - break; - } - } - return appendTo; -} - -class AffixPatternAppender : public UMemory { -public: - AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { } - - inline void append(UChar x) { - if (fIdx == UPRV_LENGTHOF(fBuffer)) { - fDest->addLiteral(fBuffer, 0, fIdx); - fIdx = 0; - } - fBuffer[fIdx++] = x; - } - - inline void append(UChar32 x) { - if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) { - fDest->addLiteral(fBuffer, 0, fIdx); - fIdx = 0; - } - U16_APPEND_UNSAFE(fBuffer, fIdx, x); - } - - inline void flush() { - if (fIdx) { - fDest->addLiteral(fBuffer, 0, fIdx); - } - fIdx = 0; - } - - /** - * flush the buffer when we go out of scope. - */ - ~AffixPatternAppender() { - flush(); - } -private: - AffixPattern *fDest; - int32_t fIdx; - UChar fBuffer[32]; - AffixPatternAppender(const AffixPatternAppender &other); - AffixPatternAppender &operator=(const AffixPatternAppender &other); -}; - - -AffixPattern & -AffixPattern::parseUserAffixString( - const UnicodeString &affixStr, - AffixPattern &appendTo, - UErrorCode &status) { - if (U_FAILURE(status)) { - return appendTo; - } - int32_t len = affixStr.length(); - const UChar *buffer = affixStr.getBuffer(); - // 0 = not quoted; 1 = quoted. - int32_t state = 0; - AffixPatternAppender appender(appendTo); - for (int32_t i = 0; i < len; ) { - UChar token; - int32_t tokenSize = nextUserToken(buffer, i, len, &token); - i += tokenSize; - if (token == 0x27 && tokenSize == 1) { // quote - state = 1 - state; - continue; - } - if (state == 0) { - switch (token) { - case 0x25: - appender.flush(); - appendTo.add(kPercent, 1); - break; - case 0x27: // double quote - appender.append((UChar) 0x27); - break; - case 0x2030: - appender.flush(); - appendTo.add(kPerMill, 1); - break; - case 0x2D: - appender.flush(); - appendTo.add(kNegative, 1); - break; - case 0x2B: - appender.flush(); - appendTo.add(kPositive, 1); - break; - case 0xA4: - appender.flush(); - appendTo.add(kCurrency, static_cast<uint8_t>(tokenSize)); - break; - default: - appender.append(token); - break; - } - } else { - switch (token) { - case 0x27: // double quote - appender.append((UChar) 0x27); - break; - case 0xA4: // included b/c tokenSize can be > 1 - for (int32_t j = 0; j < tokenSize; ++j) { - appender.append((UChar) 0xA4); - } - break; - default: - appender.append(token); - break; - } - } - } - return appendTo; -} - -AffixPattern & -AffixPattern::parseAffixString( - const UnicodeString &affixStr, - AffixPattern &appendTo, - UErrorCode &status) { - if (U_FAILURE(status)) { - return appendTo; - } - int32_t len = affixStr.length(); - const UChar *buffer = affixStr.getBuffer(); - for (int32_t i = 0; i < len; ) { - UChar token; - int32_t tokenSize = nextToken(buffer, i, len, &token); - if (tokenSize == 1) { - int32_t literalStart = i; - ++i; - while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) { - ++i; - } - appendTo.addLiteral(buffer, literalStart, i - literalStart); - - // If we reached end of string, we are done - if (i == len) { - return appendTo; - } - } - i += tokenSize; - switch (token) { - case 0x25: - appendTo.add(kPercent, 1); - break; - case 0x2030: - appendTo.add(kPerMill, 1); - break; - case 0x2D: - appendTo.add(kNegative, 1); - break; - case 0x2B: - appendTo.add(kPositive, 1); - break; - case 0xA4: - { - if (tokenSize - 1 > 3) { - status = U_PARSE_ERROR; - return appendTo; - } - appendTo.add(kCurrency, tokenSize - 1); - } - break; - default: - appendTo.addLiteral(&token, 0, 1); - break; - } - } - return appendTo; -} - -AffixPatternIterator & -AffixPattern::iterator(AffixPatternIterator &result) const { - result.nextLiteralIndex = 0; - result.lastLiteralLength = 0; - result.nextTokenIndex = 0; - result.tokens = &tokens; - result.literals = &literals; - return result; -} - -UBool -AffixPatternIterator::nextToken() { - int32_t tlen = tokens->length(); - if (nextTokenIndex == tlen) { - return FALSE; - } - ++nextTokenIndex; - const UChar *tokenBuffer = tokens->getBuffer(); - if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) == - AffixPattern::kLiteral) { - while (nextTokenIndex < tlen && - UNPACK_LONG(tokenBuffer[nextTokenIndex])) { - ++nextTokenIndex; - } - lastLiteralLength = 0; - int32_t i = nextTokenIndex - 1; - for (; UNPACK_LONG(tokenBuffer[i]); --i) { - lastLiteralLength <<= 8; - lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); - } - lastLiteralLength <<= 8; - lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]); - nextLiteralIndex += lastLiteralLength; - } - return TRUE; -} - -AffixPattern::ETokenType -AffixPatternIterator::getTokenType() const { - return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1)); -} - -UnicodeString & -AffixPatternIterator::getLiteral(UnicodeString &result) const { - const UChar *buffer = literals->getBuffer(); - result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength); - return result; -} - -int32_t -AffixPatternIterator::getTokenLength() const { - const UChar *tokenBuffer = tokens->getBuffer(); - AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]); - return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]); -} - -AffixPatternParser::AffixPatternParser() - : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) { -} - -AffixPatternParser::AffixPatternParser( - const DecimalFormatSymbols &symbols) { - setDecimalFormatSymbols(symbols); -} - -void -AffixPatternParser::setDecimalFormatSymbols( - const DecimalFormatSymbols &symbols) { - fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol); - fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol); - fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol); - fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol); -} - -PluralAffix & -AffixPatternParser::parse( - const AffixPattern &affixPattern, - const CurrencyAffixInfo ¤cyAffixInfo, - PluralAffix &appendTo, - UErrorCode &status) const { - if (U_FAILURE(status)) { - return appendTo; - } - AffixPatternIterator iter; - affixPattern.iterator(iter); - UnicodeString literal; - while (iter.nextToken()) { - switch (iter.getTokenType()) { - case AffixPattern::kPercent: - appendTo.append(fPercent, UNUM_PERCENT_FIELD); - break; - case AffixPattern::kPerMill: - appendTo.append(fPermill, UNUM_PERMILL_FIELD); - break; - case AffixPattern::kNegative: - appendTo.append(fNegative, UNUM_SIGN_FIELD); - break; - case AffixPattern::kPositive: - appendTo.append(fPositive, UNUM_SIGN_FIELD); - break; - case AffixPattern::kCurrency: - switch (iter.getTokenLength()) { - case 1: - appendTo.append( - currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD); - break; - case 2: - appendTo.append( - currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD); - break; - case 3: - appendTo.append( - currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status); - break; - default: - U_ASSERT(FALSE); - break; - } - break; - case AffixPattern::kLiteral: - appendTo.append(iter.getLiteral(literal)); - break; - default: - U_ASSERT(FALSE); - break; - } - } - return appendTo; -} - - -U_NAMESPACE_END -#endif /* #if !UCONFIG_NO_FORMATTING */ |