/* ******************************************************************************* * Copyright (C) 2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * affixpatternparser.h * * created on: 2015jan06 * created by: Travis Keep */ #ifndef __AFFIX_PATTERN_PARSER_H__ #define __AFFIX_PATTERN_PARSER_H__ #include "unicode/utypes.h" #if !UCONFIG_NO_FORMATTING #include "unicode/unistr.h" #include "unicode/uobject.h" #include "pluralaffix.h" U_NAMESPACE_BEGIN class PluralRules; class FixedPrecision; class DecimalFormatSymbols; /** * A representation of the various forms of a particular currency according * to some locale and usage context. * * Includes the symbol, ISO code form, and long form(s) of the currency name * for each plural variation. */ class U_I18N_API CurrencyAffixInfo : public UMemory { public: /** * Symbol is \u00a4; ISO form is \u00a4\u00a4; * long form is \u00a4\u00a4\u00a4. */ CurrencyAffixInfo(); const UnicodeString &getSymbol() const { return fSymbol; } const UnicodeString &getISO() const { return fISO; } const PluralAffix &getLong() const { return fLong; } void setSymbol(const UnicodeString &symbol) { fSymbol = symbol; fIsDefault = FALSE; } void setISO(const UnicodeString &iso) { fISO = iso; fIsDefault = FALSE; } UBool equals(const CurrencyAffixInfo &other) const { return (fSymbol == other.fSymbol) && (fISO == other.fISO) && (fLong.equals(other.fLong)) && (fIsDefault == other.fIsDefault); } /** * Intializes this instance. * * @param locale the locale for the currency forms. * @param rules The plural rules for the locale. * @param currency the null terminated, 3 character ISO code of the * currency. If NULL, resets this instance as if it were just created. * In this case, the first 2 parameters may be NULL as well. * @param status any error returned here. */ void set( const char *locale, const PluralRules *rules, const UChar *currency, UErrorCode &status); /** * Returns true if this instance is the default. That is has no real * currency. For instance never initialized with set() * or reset with set(NULL, NULL, NULL, status). */ UBool isDefault() const { return fIsDefault; } /** * Adjusts the precision used for a particular currency. * @param currency the null terminated, 3 character ISO code of the * currency. * @param usage the usage of the currency * @param precision min/max fraction digits and rounding increment * adjusted. * @params status any error reported here. */ static void adjustPrecision( const UChar *currency, const UCurrencyUsage usage, FixedPrecision &precision, UErrorCode &status); private: /** * The symbol form of the currency. */ UnicodeString fSymbol; /** * The ISO form of the currency, usually three letter abbreviation. */ UnicodeString fISO; /** * The long forms of the currency keyed by plural variation. */ PluralAffix fLong; UBool fIsDefault; }; class AffixPatternIterator; /** * A locale agnostic representation of an affix pattern. */ class U_I18N_API AffixPattern : public UMemory { public: /** * The token types that can appear in an affix pattern. */ enum ETokenType { kLiteral, kPercent, kPerMill, kCurrency, kNegative, kPositive }; /** * An empty affix pattern. */ AffixPattern() : tokens(), literals(), hasCurrencyToken(FALSE), hasPercentToken(FALSE), hasPermillToken(FALSE), char32Count(0) { } /** * Adds a string literal to this affix pattern. */ void addLiteral(const UChar *, int32_t start, int32_t len); /** * Adds a token to this affix pattern. t must not be kLiteral as * the addLiteral() method adds literals. * @param t the token type to add */ void add(ETokenType t); /** * Adds a currency token with specific count to this affix pattern. * @param count the token count. Used to distinguish between * one, two, or three currency symbols. Note that adding a currency * token with count=2 (Use ISO code) is different than adding two * currency tokens each with count=1 (two currency symbols). */ void addCurrency(uint8_t count); /** * Makes this instance be an empty affix pattern. */ void remove(); /** * Provides an iterator over the tokens in this instance. * @param result this is initialized to point just before the * first token of this instance. Caller must call nextToken() * on the iterator once it is set up to have it actually point * to the first token. This first call to nextToken() will return * FALSE if the AffixPattern being iterated over is empty. * @return result */ AffixPatternIterator &iterator(AffixPatternIterator &result) const; /** * Returns TRUE if this instance has currency tokens in it. */ UBool usesCurrency() const { return hasCurrencyToken; } UBool usesPercent() const { return hasPercentToken; } UBool usesPermill() const { return hasPermillToken; } /** * Returns the number of code points a string of this instance * would have if none of the special tokens were escaped. * Used to compute the padding size. */ int32_t countChar32() const { return char32Count; } /** * Appends other to this instance mutating this instance in place. * @param other The pattern appended to the end of this one. * @return a reference to this instance for chaining. */ AffixPattern &append(const AffixPattern &other); /** * Converts this AffixPattern back into a user string. * It is the inverse of parseUserAffixString. */ UnicodeString &toUserString(UnicodeString &appendTo) const; /** * Converts this AffixPattern back into a string. * It is the inverse of parseAffixString. */ UnicodeString &toString(UnicodeString &appendTo) const; /** * Parses an affix pattern string appending it to an AffixPattern. * Parses affix pattern strings produced from using * DecimalFormatPatternParser to parse a format pattern. Affix patterns * include the positive prefix and suffix and the negative prefix * and suffix. This method expects affix patterns strings to be in the * same format that DecimalFormatPatternParser produces. Namely special * characters in the affix that correspond to a field type must be * prefixed with an apostrophe ('). These special character sequences * inluce minus (-), percent (%), permile (U+2030), plus (+), * short currency (U+00a4), medium currency (u+00a4 * 2), * long currency (u+a4 * 3), and apostrophe (') * (apostrophe does not correspond to a field type but has to be escaped * because it itself is the escape character). * Since the expansion of these special character * sequences is locale dependent, these sequences are not expanded in * an AffixPattern instance. * If these special characters are not prefixed with an apostrophe in * the affix pattern string, then they are treated verbatim just as * any other character. If an apostrophe prefixes a non special * character in the affix pattern, the apostrophe is simply ignored. * * @param affixStr the string from DecimalFormatPatternParser * @param appendTo parsed result appended here. * @param status any error parsing returned here. */ static AffixPattern &parseAffixString( const UnicodeString &affixStr, AffixPattern &appendTo, UErrorCode &status); /** * Parses an affix pattern string appending it to an AffixPattern. * Parses affix pattern strings as the user would supply them. * In this function, quoting makes special characters like normal * characters whereas in parseAffixString, quoting makes special * characters special. * * @param affixStr the string from the user * @param appendTo parsed result appended here. * @param status any error parsing returned here. */ static AffixPattern &parseUserAffixString( const UnicodeString &affixStr, AffixPattern &appendTo, UErrorCode &status); UBool equals(const AffixPattern &other) const { return (tokens == other.tokens) && (literals == other.literals) && (hasCurrencyToken == other.hasCurrencyToken) && (hasPercentToken == other.hasPercentToken) && (hasPermillToken == other.hasPermillToken) && (char32Count == other.char32Count); } private: /* * Tokens stored here. Each UChar generally stands for one token. A * Each token is of form 'etttttttllllllll' llllllll is the length of * the token and ranges from 0-255. ttttttt is the token type and ranges * from 0-127. If e is set it means this is an extendo token (to be * described later). To accomodate token lengths above 255, each normal * token (e=0) can be followed by 0 or more extendo tokens (e=1) with * the same type. Right now only kLiteral Tokens have extendo tokens. * Each extendo token provides the next 8 higher bits for the length. * If a kLiteral token is followed by 2 extendo tokens then, then the * llllllll of the next extendo token contains bits 8-15 of the length * and the last extendo token contains bits 16-23 of the length. */ UnicodeString tokens; /* * The characters of the kLiteral tokens are concatenated together here. * The first characters go with the first kLiteral token, the next * characters go with the next kLiteral token etc. */ UnicodeString literals; UBool hasCurrencyToken; UBool hasPercentToken; UBool hasPermillToken; int32_t char32Count; void add(ETokenType t, uint8_t count); }; /** * An iterator over the tokens in an AffixPattern instance. */ class U_I18N_API AffixPatternIterator : public UMemory { public: /** * Using an iterator without first calling iterator on an AffixPattern * instance to initialize the iterator results in * undefined behavior. */ AffixPatternIterator() : nextLiteralIndex(0), lastLiteralLength(0), nextTokenIndex(0), tokens(NULL), literals(NULL) { } /** * Advances this iterator to the next token. Returns FALSE when there * are no more tokens. Calling the other methods after nextToken() * returns FALSE results in undefined behavior. */ UBool nextToken(); /** * Returns the type of token. */ AffixPattern::ETokenType getTokenType() const; /** * For literal tokens, returns the literal string. Calling this for * other token types results in undefined behavior. * @param result replaced with a read-only alias to the literal string. * @return result */ UnicodeString &getLiteral(UnicodeString &result) const; /** * Returns the token length. Usually 1, but for currency tokens may * be 2 for ISO code and 3 for long form. */ int32_t getTokenLength() const; private: int32_t nextLiteralIndex; int32_t lastLiteralLength; int32_t nextTokenIndex; const UnicodeString *tokens; const UnicodeString *literals; friend class AffixPattern; AffixPatternIterator(const AffixPatternIterator &); AffixPatternIterator &operator=(const AffixPatternIterator &); }; /** * A locale aware class that converts locale independent AffixPattern * instances into locale dependent PluralAffix instances. */ class U_I18N_API AffixPatternParser : public UMemory { public: AffixPatternParser(); AffixPatternParser(const DecimalFormatSymbols &symbols); void setDecimalFormatSymbols(const DecimalFormatSymbols &symbols); /** * Parses affixPattern appending the result to appendTo. * @param affixPattern The affix pattern. * @param currencyAffixInfo contains the currency forms. * @param appendTo The result of parsing affixPattern is appended here. * @param status any error returned here. * @return appendTo. */ PluralAffix &parse( const AffixPattern &affixPattern, const CurrencyAffixInfo ¤cyAffixInfo, PluralAffix &appendTo, UErrorCode &status) const; UBool equals(const AffixPatternParser &other) const { return (fPercent == other.fPercent) && (fPermill == other.fPermill) && (fNegative == other.fNegative) && (fPositive == other.fPositive); } private: UnicodeString fPercent; UnicodeString fPermill; UnicodeString fNegative; UnicodeString fPositive; }; U_NAMESPACE_END #endif /* #if !UCONFIG_NO_FORMATTING */ #endif // __AFFIX_PATTERN_PARSER_H__