diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/i18n/nfrule.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/i18n/nfrule.cpp | 1622 |
1 files changed, 0 insertions, 1622 deletions
diff --git a/deps/node/deps/icu-small/source/i18n/nfrule.cpp b/deps/node/deps/icu-small/source/i18n/nfrule.cpp deleted file mode 100644 index b5e7892d..00000000 --- a/deps/node/deps/icu-small/source/i18n/nfrule.cpp +++ /dev/null @@ -1,1622 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -****************************************************************************** -* Copyright (C) 1997-2015, International Business Machines -* Corporation and others. All Rights Reserved. -****************************************************************************** -* file name: nfrule.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* Modification history -* Date Name Comments -* 10/11/2001 Doug Ported from ICU4J -*/ - -#include "nfrule.h" - -#if U_HAVE_RBNF - -#include "unicode/localpointer.h" -#include "unicode/rbnf.h" -#include "unicode/tblcoll.h" -#include "unicode/plurfmt.h" -#include "unicode/upluralrules.h" -#include "unicode/coleitr.h" -#include "unicode/uchar.h" -#include "nfrs.h" -#include "nfrlist.h" -#include "nfsubs.h" -#include "patternprops.h" -#include "putilimp.h" - -U_NAMESPACE_BEGIN - -NFRule::NFRule(const RuleBasedNumberFormat* _rbnf, const UnicodeString &_ruleText, UErrorCode &status) - : baseValue((int32_t)0) - , radix(10) - , exponent(0) - , decimalPoint(0) - , fRuleText(_ruleText) - , sub1(NULL) - , sub2(NULL) - , formatter(_rbnf) - , rulePatternFormat(NULL) -{ - if (!fRuleText.isEmpty()) { - parseRuleDescriptor(fRuleText, status); - } -} - -NFRule::~NFRule() -{ - if (sub1 != sub2) { - delete sub2; - sub2 = NULL; - } - delete sub1; - sub1 = NULL; - delete rulePatternFormat; - rulePatternFormat = NULL; -} - -static const UChar gLeftBracket = 0x005b; -static const UChar gRightBracket = 0x005d; -static const UChar gColon = 0x003a; -static const UChar gZero = 0x0030; -static const UChar gNine = 0x0039; -static const UChar gSpace = 0x0020; -static const UChar gSlash = 0x002f; -static const UChar gGreaterThan = 0x003e; -static const UChar gLessThan = 0x003c; -static const UChar gComma = 0x002c; -static const UChar gDot = 0x002e; -static const UChar gTick = 0x0027; -//static const UChar gMinus = 0x002d; -static const UChar gSemicolon = 0x003b; -static const UChar gX = 0x0078; - -static const UChar gMinusX[] = {0x2D, 0x78, 0}; /* "-x" */ -static const UChar gInf[] = {0x49, 0x6E, 0x66, 0}; /* "Inf" */ -static const UChar gNaN[] = {0x4E, 0x61, 0x4E, 0}; /* "NaN" */ - -static const UChar gDollarOpenParenthesis[] = {0x24, 0x28, 0}; /* "$(" */ -static const UChar gClosedParenthesisDollar[] = {0x29, 0x24, 0}; /* ")$" */ - -static const UChar gLessLess[] = {0x3C, 0x3C, 0}; /* "<<" */ -static const UChar gLessPercent[] = {0x3C, 0x25, 0}; /* "<%" */ -static const UChar gLessHash[] = {0x3C, 0x23, 0}; /* "<#" */ -static const UChar gLessZero[] = {0x3C, 0x30, 0}; /* "<0" */ -static const UChar gGreaterGreater[] = {0x3E, 0x3E, 0}; /* ">>" */ -static const UChar gGreaterPercent[] = {0x3E, 0x25, 0}; /* ">%" */ -static const UChar gGreaterHash[] = {0x3E, 0x23, 0}; /* ">#" */ -static const UChar gGreaterZero[] = {0x3E, 0x30, 0}; /* ">0" */ -static const UChar gEqualPercent[] = {0x3D, 0x25, 0}; /* "=%" */ -static const UChar gEqualHash[] = {0x3D, 0x23, 0}; /* "=#" */ -static const UChar gEqualZero[] = {0x3D, 0x30, 0}; /* "=0" */ -static const UChar gGreaterGreaterGreater[] = {0x3E, 0x3E, 0x3E, 0}; /* ">>>" */ - -static const UChar * const RULE_PREFIXES[] = { - gLessLess, gLessPercent, gLessHash, gLessZero, - gGreaterGreater, gGreaterPercent,gGreaterHash, gGreaterZero, - gEqualPercent, gEqualHash, gEqualZero, NULL -}; - -void -NFRule::makeRules(UnicodeString& description, - NFRuleSet *owner, - const NFRule *predecessor, - const RuleBasedNumberFormat *rbnf, - NFRuleList& rules, - UErrorCode& status) -{ - // we know we're making at least one rule, so go ahead and - // new it up and initialize its basevalue and divisor - // (this also strips the rule descriptor, if any, off the - // descripton string) - NFRule* rule1 = new NFRule(rbnf, description, status); - /* test for NULL */ - if (rule1 == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - description = rule1->fRuleText; - - // check the description to see whether there's text enclosed - // in brackets - int32_t brack1 = description.indexOf(gLeftBracket); - int32_t brack2 = brack1 < 0 ? -1 : description.indexOf(gRightBracket); - - // if the description doesn't contain a matched pair of brackets, - // or if it's of a type that doesn't recognize bracketed text, - // then leave the description alone, initialize the rule's - // rule text and substitutions, and return that rule - if (brack2 < 0 || brack1 > brack2 - || rule1->getType() == kProperFractionRule - || rule1->getType() == kNegativeNumberRule - || rule1->getType() == kInfinityRule - || rule1->getType() == kNaNRule) - { - rule1->extractSubstitutions(owner, description, predecessor, status); - } - else { - // if the description does contain a matched pair of brackets, - // then it's really shorthand for two rules (with one exception) - NFRule* rule2 = NULL; - UnicodeString sbuf; - - // we'll actually only split the rule into two rules if its - // base value is an even multiple of its divisor (or it's one - // of the special rules) - if ((rule1->baseValue > 0 - && (rule1->baseValue % util64_pow(rule1->radix, rule1->exponent)) == 0) - || rule1->getType() == kImproperFractionRule - || rule1->getType() == kMasterRule) { - - // if it passes that test, new up the second rule. If the - // rule set both rules will belong to is a fraction rule - // set, they both have the same base value; otherwise, - // increment the original rule's base value ("rule1" actually - // goes SECOND in the rule set's rule list) - rule2 = new NFRule(rbnf, UnicodeString(), status); - /* test for NULL */ - if (rule2 == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - if (rule1->baseValue >= 0) { - rule2->baseValue = rule1->baseValue; - if (!owner->isFractionRuleSet()) { - ++rule1->baseValue; - } - } - - // if the description began with "x.x" and contains bracketed - // text, it describes both the improper fraction rule and - // the proper fraction rule - else if (rule1->getType() == kImproperFractionRule) { - rule2->setType(kProperFractionRule); - } - - // if the description began with "x.0" and contains bracketed - // text, it describes both the master rule and the - // improper fraction rule - else if (rule1->getType() == kMasterRule) { - rule2->baseValue = rule1->baseValue; - rule1->setType(kImproperFractionRule); - } - - // both rules have the same radix and exponent (i.e., the - // same divisor) - rule2->radix = rule1->radix; - rule2->exponent = rule1->exponent; - - // rule2's rule text omits the stuff in brackets: initalize - // its rule text and substitutions accordingly - sbuf.append(description, 0, brack1); - if (brack2 + 1 < description.length()) { - sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); - } - rule2->extractSubstitutions(owner, sbuf, predecessor, status); - } - - // rule1's text includes the text in the brackets but omits - // the brackets themselves: initialize _its_ rule text and - // substitutions accordingly - sbuf.setTo(description, 0, brack1); - sbuf.append(description, brack1 + 1, brack2 - brack1 - 1); - if (brack2 + 1 < description.length()) { - sbuf.append(description, brack2 + 1, description.length() - brack2 - 1); - } - rule1->extractSubstitutions(owner, sbuf, predecessor, status); - - // if we only have one rule, return it; if we have two, return - // a two-element array containing them (notice that rule2 goes - // BEFORE rule1 in the list: in all cases, rule2 OMITS the - // material in the brackets and rule1 INCLUDES the material - // in the brackets) - if (rule2 != NULL) { - if (rule2->baseValue >= kNoBase) { - rules.add(rule2); - } - else { - owner->setNonNumericalRule(rule2); - } - } - } - if (rule1->baseValue >= kNoBase) { - rules.add(rule1); - } - else { - owner->setNonNumericalRule(rule1); - } -} - -/** - * This function parses the rule's rule descriptor (i.e., the base - * value and/or other tokens that precede the rule's rule text - * in the description) and sets the rule's base value, radix, and - * exponent according to the descriptor. (If the description doesn't - * include a rule descriptor, then this function sets everything to - * default values and the rule set sets the rule's real base value). - * @param description The rule's description - * @return If "description" included a rule descriptor, this is - * "description" with the descriptor and any trailing whitespace - * stripped off. Otherwise; it's "descriptor" unchangd. - */ -void -NFRule::parseRuleDescriptor(UnicodeString& description, UErrorCode& status) -{ - // the description consists of a rule descriptor and a rule body, - // separated by a colon. The rule descriptor is optional. If - // it's omitted, just set the base value to 0. - int32_t p = description.indexOf(gColon); - if (p != -1) { - // copy the descriptor out into its own string and strip it, - // along with any trailing whitespace, out of the original - // description - UnicodeString descriptor; - descriptor.setTo(description, 0, p); - - ++p; - while (p < description.length() && PatternProps::isWhiteSpace(description.charAt(p))) { - ++p; - } - description.removeBetween(0, p); - - // check first to see if the rule descriptor matches the token - // for one of the special rules. If it does, set the base - // value to the correct identifier value - int descriptorLength = descriptor.length(); - UChar firstChar = descriptor.charAt(0); - UChar lastChar = descriptor.charAt(descriptorLength - 1); - if (firstChar >= gZero && firstChar <= gNine && lastChar != gX) { - // if the rule descriptor begins with a digit, it's a descriptor - // for a normal rule - // since we don't have Long.parseLong, and this isn't much work anyway, - // just build up the value as we encounter the digits. - int64_t val = 0; - p = 0; - UChar c = gSpace; - - // begin parsing the descriptor: copy digits - // into "tempValue", skip periods, commas, and spaces, - // stop on a slash or > sign (or at the end of the string), - // and throw an exception on any other character - int64_t ll_10 = 10; - while (p < descriptorLength) { - c = descriptor.charAt(p); - if (c >= gZero && c <= gNine) { - val = val * ll_10 + (int32_t)(c - gZero); - } - else if (c == gSlash || c == gGreaterThan) { - break; - } - else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { - } - else { - // throw new IllegalArgumentException("Illegal character in rule descriptor"); - status = U_PARSE_ERROR; - return; - } - ++p; - } - - // we have the base value, so set it - setBaseValue(val, status); - - // if we stopped the previous loop on a slash, we're - // now parsing the rule's radix. Again, accumulate digits - // in tempValue, skip punctuation, stop on a > mark, and - // throw an exception on anything else - if (c == gSlash) { - val = 0; - ++p; - ll_10 = 10; - while (p < descriptorLength) { - c = descriptor.charAt(p); - if (c >= gZero && c <= gNine) { - val = val * ll_10 + (int32_t)(c - gZero); - } - else if (c == gGreaterThan) { - break; - } - else if (PatternProps::isWhiteSpace(c) || c == gComma || c == gDot) { - } - else { - // throw new IllegalArgumentException("Illegal character is rule descriptor"); - status = U_PARSE_ERROR; - return; - } - ++p; - } - - // tempValue now contain's the rule's radix. Set it - // accordingly, and recalculate the rule's exponent - radix = (int32_t)val; - if (radix == 0) { - // throw new IllegalArgumentException("Rule can't have radix of 0"); - status = U_PARSE_ERROR; - } - - exponent = expectedExponent(); - } - - // if we stopped the previous loop on a > sign, then continue - // for as long as we still see > signs. For each one, - // decrement the exponent (unless the exponent is already 0). - // If we see another character before reaching the end of - // the descriptor, that's also a syntax error. - if (c == gGreaterThan) { - while (p < descriptor.length()) { - c = descriptor.charAt(p); - if (c == gGreaterThan && exponent > 0) { - --exponent; - } else { - // throw new IllegalArgumentException("Illegal character in rule descriptor"); - status = U_PARSE_ERROR; - return; - } - ++p; - } - } - } - else if (0 == descriptor.compare(gMinusX, 2)) { - setType(kNegativeNumberRule); - } - else if (descriptorLength == 3) { - if (firstChar == gZero && lastChar == gX) { - setBaseValue(kProperFractionRule, status); - decimalPoint = descriptor.charAt(1); - } - else if (firstChar == gX && lastChar == gX) { - setBaseValue(kImproperFractionRule, status); - decimalPoint = descriptor.charAt(1); - } - else if (firstChar == gX && lastChar == gZero) { - setBaseValue(kMasterRule, status); - decimalPoint = descriptor.charAt(1); - } - else if (descriptor.compare(gNaN, 3) == 0) { - setBaseValue(kNaNRule, status); - } - else if (descriptor.compare(gInf, 3) == 0) { - setBaseValue(kInfinityRule, status); - } - } - } - // else use the default base value for now. - - // finally, if the rule body begins with an apostrophe, strip it off - // (this is generally used to put whitespace at the beginning of - // a rule's rule text) - if (description.length() > 0 && description.charAt(0) == gTick) { - description.removeBetween(0, 1); - } - - // return the description with all the stuff we've just waded through - // stripped off the front. It now contains just the rule body. - // return description; -} - -/** -* Searches the rule's rule text for the substitution tokens, -* creates the substitutions, and removes the substitution tokens -* from the rule's rule text. -* @param owner The rule set containing this rule -* @param predecessor The rule preseding this one in "owners" rule list -* @param ownersOwner The RuleBasedFormat that owns this rule -*/ -void -NFRule::extractSubstitutions(const NFRuleSet* ruleSet, - const UnicodeString &ruleText, - const NFRule* predecessor, - UErrorCode& status) -{ - if (U_FAILURE(status)) { - return; - } - fRuleText = ruleText; - sub1 = extractSubstitution(ruleSet, predecessor, status); - if (sub1 == NULL) { - // Small optimization. There is no need to create a redundant NullSubstitution. - sub2 = NULL; - } - else { - sub2 = extractSubstitution(ruleSet, predecessor, status); - } - int32_t pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); - int32_t pluralRuleEnd = (pluralRuleStart >= 0 ? fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) : -1); - if (pluralRuleEnd >= 0) { - int32_t endType = fRuleText.indexOf(gComma, pluralRuleStart); - if (endType < 0) { - status = U_PARSE_ERROR; - return; - } - UnicodeString type(fRuleText.tempSubString(pluralRuleStart + 2, endType - pluralRuleStart - 2)); - UPluralType pluralType; - if (type.startsWith(UNICODE_STRING_SIMPLE("cardinal"))) { - pluralType = UPLURAL_TYPE_CARDINAL; - } - else if (type.startsWith(UNICODE_STRING_SIMPLE("ordinal"))) { - pluralType = UPLURAL_TYPE_ORDINAL; - } - else { - status = U_ILLEGAL_ARGUMENT_ERROR; - return; - } - rulePatternFormat = formatter->createPluralFormat(pluralType, - fRuleText.tempSubString(endType + 1, pluralRuleEnd - endType - 1), status); - } -} - -/** -* Searches the rule's rule text for the first substitution token, -* creates a substitution based on it, and removes the token from -* the rule's rule text. -* @param owner The rule set containing this rule -* @param predecessor The rule preceding this one in the rule set's -* rule list -* @param ownersOwner The RuleBasedNumberFormat that owns this rule -* @return The newly-created substitution. This is never null; if -* the rule text doesn't contain any substitution tokens, this will -* be a NullSubstitution. -*/ -NFSubstitution * -NFRule::extractSubstitution(const NFRuleSet* ruleSet, - const NFRule* predecessor, - UErrorCode& status) -{ - NFSubstitution* result = NULL; - - // search the rule's rule text for the first two characters of - // a substitution token - int32_t subStart = indexOfAnyRulePrefix(); - int32_t subEnd = subStart; - - // if we didn't find one, create a null substitution positioned - // at the end of the rule text - if (subStart == -1) { - return NULL; - } - - // special-case the ">>>" token, since searching for the > at the - // end will actually find the > in the middle - if (fRuleText.indexOf(gGreaterGreaterGreater, 3, 0) == subStart) { - subEnd = subStart + 2; - - // otherwise the substitution token ends with the same character - // it began with - } else { - UChar c = fRuleText.charAt(subStart); - subEnd = fRuleText.indexOf(c, subStart + 1); - // special case for '<%foo<<' - if (c == gLessThan && subEnd != -1 && subEnd < fRuleText.length() - 1 && fRuleText.charAt(subEnd+1) == c) { - // ordinals use "=#,##0==%abbrev=" as their rule. Notice that the '==' in the middle - // occurs because of the juxtaposition of two different rules. The check for '<' is a hack - // to get around this. Having the duplicate at the front would cause problems with - // rules like "<<%" to format, say, percents... - ++subEnd; - } - } - - // if we don't find the end of the token (i.e., if we're on a single, - // unmatched token character), create a null substitution positioned - // at the end of the rule - if (subEnd == -1) { - return NULL; - } - - // if we get here, we have a real substitution token (or at least - // some text bounded by substitution token characters). Use - // makeSubstitution() to create the right kind of substitution - UnicodeString subToken; - subToken.setTo(fRuleText, subStart, subEnd + 1 - subStart); - result = NFSubstitution::makeSubstitution(subStart, this, predecessor, ruleSet, - this->formatter, subToken, status); - - // remove the substitution from the rule text - fRuleText.removeBetween(subStart, subEnd+1); - - return result; -} - -/** - * Sets the rule's base value, and causes the radix and exponent - * to be recalculated. This is used during construction when we - * don't know the rule's base value until after it's been - * constructed. It should be used at any other time. - * @param The new base value for the rule. - */ -void -NFRule::setBaseValue(int64_t newBaseValue, UErrorCode& status) -{ - // set the base value - baseValue = newBaseValue; - radix = 10; - - // if this isn't a special rule, recalculate the radix and exponent - // (the radix always defaults to 10; if it's supposed to be something - // else, it's cleaned up by the caller and the exponent is - // recalculated again-- the only function that does this is - // NFRule.parseRuleDescriptor() ) - if (baseValue >= 1) { - exponent = expectedExponent(); - - // this function gets called on a fully-constructed rule whose - // description didn't specify a base value. This means it - // has substitutions, and some substitutions hold on to copies - // of the rule's divisor. Fix their copies of the divisor. - if (sub1 != NULL) { - sub1->setDivisor(radix, exponent, status); - } - if (sub2 != NULL) { - sub2->setDivisor(radix, exponent, status); - } - - // if this is a special rule, its radix and exponent are basically - // ignored. Set them to "safe" default values - } else { - exponent = 0; - } -} - -/** -* This calculates the rule's exponent based on its radix and base -* value. This will be the highest power the radix can be raised to -* and still produce a result less than or equal to the base value. -*/ -int16_t -NFRule::expectedExponent() const -{ - // since the log of 0, or the log base 0 of something, causes an - // error, declare the exponent in these cases to be 0 (we also - // deal with the special-rule identifiers here) - if (radix == 0 || baseValue < 1) { - return 0; - } - - // we get rounding error in some cases-- for example, log 1000 / log 10 - // gives us 1.9999999996 instead of 2. The extra logic here is to take - // that into account - int16_t tempResult = (int16_t)(uprv_log((double)baseValue) / uprv_log((double)radix)); - int64_t temp = util64_pow(radix, tempResult + 1); - if (temp <= baseValue) { - tempResult += 1; - } - return tempResult; -} - -/** - * Searches the rule's rule text for any of the specified strings. - * @return The index of the first match in the rule's rule text - * (i.e., the first substring in the rule's rule text that matches - * _any_ of the strings in "strings"). If none of the strings in - * "strings" is found in the rule's rule text, returns -1. - */ -int32_t -NFRule::indexOfAnyRulePrefix() const -{ - int result = -1; - for (int i = 0; RULE_PREFIXES[i]; i++) { - int32_t pos = fRuleText.indexOf(*RULE_PREFIXES[i]); - if (pos != -1 && (result == -1 || pos < result)) { - result = pos; - } - } - return result; -} - -//----------------------------------------------------------------------- -// boilerplate -//----------------------------------------------------------------------- - -static UBool -util_equalSubstitutions(const NFSubstitution* sub1, const NFSubstitution* sub2) -{ - if (sub1) { - if (sub2) { - return *sub1 == *sub2; - } - } else if (!sub2) { - return TRUE; - } - return FALSE; -} - -/** -* Tests two rules for equality. -* @param that The rule to compare this one against -* @return True is the two rules are functionally equivalent -*/ -UBool -NFRule::operator==(const NFRule& rhs) const -{ - return baseValue == rhs.baseValue - && radix == rhs.radix - && exponent == rhs.exponent - && fRuleText == rhs.fRuleText - && util_equalSubstitutions(sub1, rhs.sub1) - && util_equalSubstitutions(sub2, rhs.sub2); -} - -/** -* Returns a textual representation of the rule. This won't -* necessarily be the same as the description that this rule -* was created with, but it will produce the same result. -* @return A textual description of the rule -*/ -static void util_append64(UnicodeString& result, int64_t n) -{ - UChar buffer[256]; - int32_t len = util64_tou(n, buffer, sizeof(buffer)); - UnicodeString temp(buffer, len); - result.append(temp); -} - -void -NFRule::_appendRuleText(UnicodeString& result) const -{ - switch (getType()) { - case kNegativeNumberRule: result.append(gMinusX, 2); break; - case kImproperFractionRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break; - case kProperFractionRule: result.append(gZero).append(decimalPoint == 0 ? gDot : decimalPoint).append(gX); break; - case kMasterRule: result.append(gX).append(decimalPoint == 0 ? gDot : decimalPoint).append(gZero); break; - case kInfinityRule: result.append(gInf, 3); break; - case kNaNRule: result.append(gNaN, 3); break; - default: - // for a normal rule, write out its base value, and if the radix is - // something other than 10, write out the radix (with the preceding - // slash, of course). Then calculate the expected exponent and if - // if isn't the same as the actual exponent, write an appropriate - // number of > signs. Finally, terminate the whole thing with - // a colon. - util_append64(result, baseValue); - if (radix != 10) { - result.append(gSlash); - util_append64(result, radix); - } - int numCarets = expectedExponent() - exponent; - for (int i = 0; i < numCarets; i++) { - result.append(gGreaterThan); - } - break; - } - result.append(gColon); - result.append(gSpace); - - // if the rule text begins with a space, write an apostrophe - // (whitespace after the rule descriptor is ignored; the - // apostrophe is used to make the whitespace significant) - if (fRuleText.charAt(0) == gSpace && (sub1 == NULL || sub1->getPos() != 0)) { - result.append(gTick); - } - - // now, write the rule's rule text, inserting appropriate - // substitution tokens in the appropriate places - UnicodeString ruleTextCopy; - ruleTextCopy.setTo(fRuleText); - - UnicodeString temp; - if (sub2 != NULL) { - sub2->toString(temp); - ruleTextCopy.insert(sub2->getPos(), temp); - } - if (sub1 != NULL) { - sub1->toString(temp); - ruleTextCopy.insert(sub1->getPos(), temp); - } - - result.append(ruleTextCopy); - - // and finally, top the whole thing off with a semicolon and - // return the result - result.append(gSemicolon); -} - -int64_t NFRule::getDivisor() const -{ - return util64_pow(radix, exponent); -} - - -//----------------------------------------------------------------------- -// formatting -//----------------------------------------------------------------------- - -/** -* Formats the number, and inserts the resulting text into -* toInsertInto. -* @param number The number being formatted -* @param toInsertInto The string where the resultant text should -* be inserted -* @param pos The position in toInsertInto where the resultant text -* should be inserted -*/ -void -NFRule::doFormat(int64_t number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const -{ - // first, insert the rule's rule text into toInsertInto at the - // specified position, then insert the results of the substitutions - // into the right places in toInsertInto (notice we do the - // substitutions in reverse order so that the offsets don't get - // messed up) - int32_t pluralRuleStart = fRuleText.length(); - int32_t lengthOffset = 0; - if (!rulePatternFormat) { - toInsertInto.insert(pos, fRuleText); - } - else { - pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); - int pluralRuleEnd = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); - int initialLength = toInsertInto.length(); - if (pluralRuleEnd < fRuleText.length() - 1) { - toInsertInto.insert(pos, fRuleText.tempSubString(pluralRuleEnd + 2)); - } - toInsertInto.insert(pos, - rulePatternFormat->format((int32_t)(number/util64_pow(radix, exponent)), status)); - if (pluralRuleStart > 0) { - toInsertInto.insert(pos, fRuleText.tempSubString(0, pluralRuleStart)); - } - lengthOffset = fRuleText.length() - (toInsertInto.length() - initialLength); - } - - if (sub2 != NULL) { - sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); - } - if (sub1 != NULL) { - sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); - } -} - -/** -* Formats the number, and inserts the resulting text into -* toInsertInto. -* @param number The number being formatted -* @param toInsertInto The string where the resultant text should -* be inserted -* @param pos The position in toInsertInto where the resultant text -* should be inserted -*/ -void -NFRule::doFormat(double number, UnicodeString& toInsertInto, int32_t pos, int32_t recursionCount, UErrorCode& status) const -{ - // first, insert the rule's rule text into toInsertInto at the - // specified position, then insert the results of the substitutions - // into the right places in toInsertInto - // [again, we have two copies of this routine that do the same thing - // so that we don't sacrifice precision in a long by casting it - // to a double] - int32_t pluralRuleStart = fRuleText.length(); - int32_t lengthOffset = 0; - if (!rulePatternFormat) { - toInsertInto.insert(pos, fRuleText); - } - else { - pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); - int pluralRuleEnd = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart); - int initialLength = toInsertInto.length(); - if (pluralRuleEnd < fRuleText.length() - 1) { - toInsertInto.insert(pos, fRuleText.tempSubString(pluralRuleEnd + 2)); - } - double pluralVal = number; - if (0 <= pluralVal && pluralVal < 1) { - // We're in a fractional rule, and we have to match the NumeratorSubstitution behavior. - // 2.3 can become 0.2999999999999998 for the fraction due to rounding errors. - pluralVal = uprv_round(pluralVal * util64_pow(radix, exponent)); - } - else { - pluralVal = pluralVal / util64_pow(radix, exponent); - } - toInsertInto.insert(pos, rulePatternFormat->format((int32_t)(pluralVal), status)); - if (pluralRuleStart > 0) { - toInsertInto.insert(pos, fRuleText.tempSubString(0, pluralRuleStart)); - } - lengthOffset = fRuleText.length() - (toInsertInto.length() - initialLength); - } - - if (sub2 != NULL) { - sub2->doSubstitution(number, toInsertInto, pos - (sub2->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); - } - if (sub1 != NULL) { - sub1->doSubstitution(number, toInsertInto, pos - (sub1->getPos() > pluralRuleStart ? lengthOffset : 0), recursionCount, status); - } -} - -/** -* Used by the owning rule set to determine whether to invoke the -* rollback rule (i.e., whether this rule or the one that precedes -* it in the rule set's list should be used to format the number) -* @param The number being formatted -* @return True if the rule set should use the rule that precedes -* this one in its list; false if it should use this rule -*/ -UBool -NFRule::shouldRollBack(int64_t number) const -{ - // we roll back if the rule contains a modulus substitution, - // the number being formatted is an even multiple of the rule's - // divisor, and the rule's base value is NOT an even multiple - // of its divisor - // In other words, if the original description had - // 100: << hundred[ >>]; - // that expands into - // 100: << hundred; - // 101: << hundred >>; - // internally. But when we're formatting 200, if we use the rule - // at 101, which would normally apply, we get "two hundred zero". - // To prevent this, we roll back and use the rule at 100 instead. - // This is the logic that makes this happen: the rule at 101 has - // a modulus substitution, its base value isn't an even multiple - // of 100, and the value we're trying to format _is_ an even - // multiple of 100. This is called the "rollback rule." - if ((sub1 != NULL && sub1->isModulusSubstitution()) || (sub2 != NULL && sub2->isModulusSubstitution())) { - int64_t re = util64_pow(radix, exponent); - return (number % re) == 0 && (baseValue % re) != 0; - } - return FALSE; -} - -//----------------------------------------------------------------------- -// parsing -//----------------------------------------------------------------------- - -/** -* Attempts to parse the string with this rule. -* @param text The string being parsed -* @param parsePosition On entry, the value is ignored and assumed to -* be 0. On exit, this has been updated with the position of the first -* character not consumed by matching the text against this rule -* (if this rule doesn't match the text at all, the parse position -* if left unchanged (presumably at 0) and the function returns -* new Long(0)). -* @param isFractionRule True if this rule is contained within a -* fraction rule set. This is only used if the rule has no -* substitutions. -* @return If this rule matched the text, this is the rule's base value -* combined appropriately with the results of parsing the substitutions. -* If nothing matched, this is new Long(0) and the parse position is -* left unchanged. The result will be an instance of Long if the -* result is an integer and Double otherwise. The result is never null. -*/ -#ifdef RBNF_DEBUG -#include <stdio.h> - -static void dumpUS(FILE* f, const UnicodeString& us) { - int len = us.length(); - char* buf = (char *)uprv_malloc((len+1)*sizeof(char)); //new char[len+1]; - if (buf != NULL) { - us.extract(0, len, buf); - buf[len] = 0; - fprintf(f, "%s", buf); - uprv_free(buf); //delete[] buf; - } -} -#endif -UBool -NFRule::doParse(const UnicodeString& text, - ParsePosition& parsePosition, - UBool isFractionRule, - double upperBound, - uint32_t nonNumericalExecutedRuleMask, - Formattable& resVal) const -{ - // internally we operate on a copy of the string being parsed - // (because we're going to change it) and use our own ParsePosition - ParsePosition pp; - UnicodeString workText(text); - - int32_t sub1Pos = sub1 != NULL ? sub1->getPos() : fRuleText.length(); - int32_t sub2Pos = sub2 != NULL ? sub2->getPos() : fRuleText.length(); - - // check to see whether the text before the first substitution - // matches the text at the beginning of the string being - // parsed. If it does, strip that off the front of workText; - // otherwise, dump out with a mismatch - UnicodeString prefix; - prefix.setTo(fRuleText, 0, sub1Pos); - -#ifdef RBNF_DEBUG - fprintf(stderr, "doParse %p ", this); - { - UnicodeString rt; - _appendRuleText(rt); - dumpUS(stderr, rt); - } - - fprintf(stderr, " text: '"); - dumpUS(stderr, text); - fprintf(stderr, "' prefix: '"); - dumpUS(stderr, prefix); -#endif - stripPrefix(workText, prefix, pp); - int32_t prefixLength = text.length() - workText.length(); - -#ifdef RBNF_DEBUG - fprintf(stderr, "' pl: %d ppi: %d s1p: %d\n", prefixLength, pp.getIndex(), sub1Pos); -#endif - - if (pp.getIndex() == 0 && sub1Pos != 0) { - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port - parsePosition.setErrorIndex(pp.getErrorIndex()); - resVal.setLong(0); - return TRUE; - } - if (baseValue == kInfinityRule) { - // If you match this, don't try to perform any calculations on it. - parsePosition.setIndex(pp.getIndex()); - resVal.setDouble(uprv_getInfinity()); - return TRUE; - } - if (baseValue == kNaNRule) { - // If you match this, don't try to perform any calculations on it. - parsePosition.setIndex(pp.getIndex()); - resVal.setDouble(uprv_getNaN()); - return TRUE; - } - - // this is the fun part. The basic guts of the rule-matching - // logic is matchToDelimiter(), which is called twice. The first - // time it searches the input string for the rule text BETWEEN - // the substitutions and tries to match the intervening text - // in the input string with the first substitution. If that - // succeeds, it then calls it again, this time to look for the - // rule text after the second substitution and to match the - // intervening input text against the second substitution. - // - // For example, say we have a rule that looks like this: - // first << middle >> last; - // and input text that looks like this: - // first one middle two last - // First we use stripPrefix() to match "first " in both places and - // strip it off the front, leaving - // one middle two last - // Then we use matchToDelimiter() to match " middle " and try to - // match "one" against a substitution. If it's successful, we now - // have - // two last - // We use matchToDelimiter() a second time to match " last" and - // try to match "two" against a substitution. If "two" matches - // the substitution, we have a successful parse. - // - // Since it's possible in many cases to find multiple instances - // of each of these pieces of rule text in the input string, - // we need to try all the possible combinations of these - // locations. This prevents us from prematurely declaring a mismatch, - // and makes sure we match as much input text as we can. - int highWaterMark = 0; - double result = 0; - int start = 0; - double tempBaseValue = (double)(baseValue <= 0 ? 0 : baseValue); - - UnicodeString temp; - do { - // our partial parse result starts out as this rule's base - // value. If it finds a successful match, matchToDelimiter() - // will compose this in some way with what it gets back from - // the substitution, giving us a new partial parse result - pp.setIndex(0); - - temp.setTo(fRuleText, sub1Pos, sub2Pos - sub1Pos); - double partialResult = matchToDelimiter(workText, start, tempBaseValue, - temp, pp, sub1, - nonNumericalExecutedRuleMask, - upperBound); - - // if we got a successful match (or were trying to match a - // null substitution), pp is now pointing at the first unmatched - // character. Take note of that, and try matchToDelimiter() - // on the input text again - if (pp.getIndex() != 0 || sub1 == NULL) { - start = pp.getIndex(); - - UnicodeString workText2; - workText2.setTo(workText, pp.getIndex(), workText.length() - pp.getIndex()); - ParsePosition pp2; - - // the second matchToDelimiter() will compose our previous - // partial result with whatever it gets back from its - // substitution if there's a successful match, giving us - // a real result - temp.setTo(fRuleText, sub2Pos, fRuleText.length() - sub2Pos); - partialResult = matchToDelimiter(workText2, 0, partialResult, - temp, pp2, sub2, - nonNumericalExecutedRuleMask, - upperBound); - - // if we got a successful match on this second - // matchToDelimiter() call, update the high-water mark - // and result (if necessary) - if (pp2.getIndex() != 0 || sub2 == NULL) { - if (prefixLength + pp.getIndex() + pp2.getIndex() > highWaterMark) { - highWaterMark = prefixLength + pp.getIndex() + pp2.getIndex(); - result = partialResult; - } - } - else { - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port - int32_t i_temp = pp2.getErrorIndex() + sub1Pos + pp.getIndex(); - if (i_temp> parsePosition.getErrorIndex()) { - parsePosition.setErrorIndex(i_temp); - } - } - } - else { - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port - int32_t i_temp = sub1Pos + pp.getErrorIndex(); - if (i_temp > parsePosition.getErrorIndex()) { - parsePosition.setErrorIndex(i_temp); - } - } - // keep trying to match things until the outer matchToDelimiter() - // call fails to make a match (each time, it picks up where it - // left off the previous time) - } while (sub1Pos != sub2Pos - && pp.getIndex() > 0 - && pp.getIndex() < workText.length() - && pp.getIndex() != start); - - // update the caller's ParsePosition with our high-water mark - // (i.e., it now points at the first character this function - // didn't match-- the ParsePosition is therefore unchanged if - // we didn't match anything) - parsePosition.setIndex(highWaterMark); - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port - if (highWaterMark > 0) { - parsePosition.setErrorIndex(0); - } - - // this is a hack for one unusual condition: Normally, whether this - // rule belong to a fraction rule set or not is handled by its - // substitutions. But if that rule HAS NO substitutions, then - // we have to account for it here. By definition, if the matching - // rule in a fraction rule set has no substitutions, its numerator - // is 1, and so the result is the reciprocal of its base value. - if (isFractionRule && highWaterMark > 0 && sub1 == NULL) { - result = 1 / result; - } - - resVal.setDouble(result); - return TRUE; // ??? do we need to worry if it is a long or a double? -} - -/** -* This function is used by parse() to match the text being parsed -* against a possible prefix string. This function -* matches characters from the beginning of the string being parsed -* to characters from the prospective prefix. If they match, pp is -* updated to the first character not matched, and the result is -* the unparsed part of the string. If they don't match, the whole -* string is returned, and pp is left unchanged. -* @param text The string being parsed -* @param prefix The text to match against -* @param pp On entry, ignored and assumed to be 0. On exit, points -* to the first unmatched character (assuming the whole prefix matched), -* or is unchanged (if the whole prefix didn't match). -* @return If things match, this is the unparsed part of "text"; -* if they didn't match, this is "text". -*/ -void -NFRule::stripPrefix(UnicodeString& text, const UnicodeString& prefix, ParsePosition& pp) const -{ - // if the prefix text is empty, dump out without doing anything - if (prefix.length() != 0) { - UErrorCode status = U_ZERO_ERROR; - // use prefixLength() to match the beginning of - // "text" against "prefix". This function returns the - // number of characters from "text" that matched (or 0 if - // we didn't match the whole prefix) - int32_t pfl = prefixLength(text, prefix, status); - if (U_FAILURE(status)) { // Memory allocation error. - return; - } - if (pfl != 0) { - // if we got a successful match, update the parse position - // and strip the prefix off of "text" - pp.setIndex(pp.getIndex() + pfl); - text.remove(0, pfl); - } - } -} - -/** -* Used by parse() to match a substitution and any following text. -* "text" is searched for instances of "delimiter". For each instance -* of delimiter, the intervening text is tested to see whether it -* matches the substitution. The longest match wins. -* @param text The string being parsed -* @param startPos The position in "text" where we should start looking -* for "delimiter". -* @param baseValue A partial parse result (often the rule's base value), -* which is combined with the result from matching the substitution -* @param delimiter The string to search "text" for. -* @param pp Ignored and presumed to be 0 on entry. If there's a match, -* on exit this will point to the first unmatched character. -* @param sub If we find "delimiter" in "text", this substitution is used -* to match the text between the beginning of the string and the -* position of "delimiter." (If "delimiter" is the empty string, then -* this function just matches against this substitution and updates -* everything accordingly.) -* @param upperBound When matching the substitution, it will only -* consider rules with base values lower than this value. -* @return If there's a match, this is the result of composing -* baseValue with the result of matching the substitution. Otherwise, -* this is new Long(0). It's never null. If the result is an integer, -* this will be an instance of Long; otherwise, it's an instance of -* Double. -* -* !!! note {dlf} in point of fact, in the java code the caller always converts -* the result to a double, so we might as well return one. -*/ -double -NFRule::matchToDelimiter(const UnicodeString& text, - int32_t startPos, - double _baseValue, - const UnicodeString& delimiter, - ParsePosition& pp, - const NFSubstitution* sub, - uint32_t nonNumericalExecutedRuleMask, - double upperBound) const -{ - UErrorCode status = U_ZERO_ERROR; - // if "delimiter" contains real (i.e., non-ignorable) text, search - // it for "delimiter" beginning at "start". If that succeeds, then - // use "sub"'s doParse() method to match the text before the - // instance of "delimiter" we just found. - if (!allIgnorable(delimiter, status)) { - if (U_FAILURE(status)) { //Memory allocation error. - return 0; - } - ParsePosition tempPP; - Formattable result; - - // use findText() to search for "delimiter". It returns a two- - // element array: element 0 is the position of the match, and - // element 1 is the number of characters that matched - // "delimiter". - int32_t dLen; - int32_t dPos = findText(text, delimiter, startPos, &dLen); - - // if findText() succeeded, isolate the text preceding the - // match, and use "sub" to match that text - while (dPos >= 0) { - UnicodeString subText; - subText.setTo(text, 0, dPos); - if (subText.length() > 0) { - UBool success = sub->doParse(subText, tempPP, _baseValue, upperBound, -#if UCONFIG_NO_COLLATION - FALSE, -#else - formatter->isLenient(), -#endif - nonNumericalExecutedRuleMask, - result); - - // if the substitution could match all the text up to - // where we found "delimiter", then this function has - // a successful match. Bump the caller's parse position - // to point to the first character after the text - // that matches "delimiter", and return the result - // we got from parsing the substitution. - if (success && tempPP.getIndex() == dPos) { - pp.setIndex(dPos + dLen); - return result.getDouble(); - } - else { - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port - if (tempPP.getErrorIndex() > 0) { - pp.setErrorIndex(tempPP.getErrorIndex()); - } else { - pp.setErrorIndex(tempPP.getIndex()); - } - } - } - - // if we didn't match the substitution, search for another - // copy of "delimiter" in "text" and repeat the loop if - // we find it - tempPP.setIndex(0); - dPos = findText(text, delimiter, dPos + dLen, &dLen); - } - // if we make it here, this was an unsuccessful match, and we - // leave pp unchanged and return 0 - pp.setIndex(0); - return 0; - - // if "delimiter" is empty, or consists only of ignorable characters - // (i.e., is semantically empty), thwe we obviously can't search - // for "delimiter". Instead, just use "sub" to parse as much of - // "text" as possible. - } - else if (sub == NULL) { - return _baseValue; - } - else { - ParsePosition tempPP; - Formattable result; - - // try to match the whole string against the substitution - UBool success = sub->doParse(text, tempPP, _baseValue, upperBound, -#if UCONFIG_NO_COLLATION - FALSE, -#else - formatter->isLenient(), -#endif - nonNumericalExecutedRuleMask, - result); - if (success && (tempPP.getIndex() != 0)) { - // if there's a successful match (or it's a null - // substitution), update pp to point to the first - // character we didn't match, and pass the result from - // sub.doParse() on through to the caller - pp.setIndex(tempPP.getIndex()); - return result.getDouble(); - } - else { - // commented out because ParsePosition doesn't have error index in 1.1.x - // restored for ICU4C port - pp.setErrorIndex(tempPP.getErrorIndex()); - } - - // and if we get to here, then nothing matched, so we return - // 0 and leave pp alone - return 0; - } -} - -/** -* Used by stripPrefix() to match characters. If lenient parse mode -* is off, this just calls startsWith(). If lenient parse mode is on, -* this function uses CollationElementIterators to match characters in -* the strings (only primary-order differences are significant in -* determining whether there's a match). -* @param str The string being tested -* @param prefix The text we're hoping to see at the beginning -* of "str" -* @return If "prefix" is found at the beginning of "str", this -* is the number of characters in "str" that were matched (this -* isn't necessarily the same as the length of "prefix" when matching -* text with a collator). If there's no match, this is 0. -*/ -int32_t -NFRule::prefixLength(const UnicodeString& str, const UnicodeString& prefix, UErrorCode& status) const -{ - // if we're looking for an empty prefix, it obviously matches - // zero characters. Just go ahead and return 0. - if (prefix.length() == 0) { - return 0; - } - -#if !UCONFIG_NO_COLLATION - // go through all this grief if we're in lenient-parse mode - if (formatter->isLenient()) { - // get the formatter's collator and use it to create two - // collation element iterators, one over the target string - // and another over the prefix (right now, we'll throw an - // exception if the collator we get back from the formatter - // isn't a RuleBasedCollator, because RuleBasedCollator defines - // the CollationElementIterator protocol. Hopefully, this - // will change someday.) - const RuleBasedCollator* collator = formatter->getCollator(); - if (collator == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - LocalPointer<CollationElementIterator> strIter(collator->createCollationElementIterator(str)); - LocalPointer<CollationElementIterator> prefixIter(collator->createCollationElementIterator(prefix)); - // Check for memory allocation error. - if (strIter.isNull() || prefixIter.isNull()) { - status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - UErrorCode err = U_ZERO_ERROR; - - // The original code was problematic. Consider this match: - // prefix = "fifty-" - // string = " fifty-7" - // The intent is to match string up to the '7', by matching 'fifty-' at position 1 - // in the string. Unfortunately, we were getting a match, and then computing where - // the match terminated by rematching the string. The rematch code was using as an - // initial guess the substring of string between 0 and prefix.length. Because of - // the leading space and trailing hyphen (both ignorable) this was succeeding, leaving - // the position before the hyphen in the string. Recursing down, we then parsed the - // remaining string '-7' as numeric. The resulting number turned out as 43 (50 - 7). - // This was not pretty, especially since the string "fifty-7" parsed just fine. - // - // We have newer APIs now, so we can use calls on the iterator to determine what we - // matched up to. If we terminate because we hit the last element in the string, - // our match terminates at this length. If we terminate because we hit the last element - // in the target, our match terminates at one before the element iterator position. - - // match collation elements between the strings - int32_t oStr = strIter->next(err); - int32_t oPrefix = prefixIter->next(err); - - while (oPrefix != CollationElementIterator::NULLORDER) { - // skip over ignorable characters in the target string - while (CollationElementIterator::primaryOrder(oStr) == 0 - && oStr != CollationElementIterator::NULLORDER) { - oStr = strIter->next(err); - } - - // skip over ignorable characters in the prefix - while (CollationElementIterator::primaryOrder(oPrefix) == 0 - && oPrefix != CollationElementIterator::NULLORDER) { - oPrefix = prefixIter->next(err); - } - - // dlf: move this above following test, if we consume the - // entire target, aren't we ok even if the source was also - // entirely consumed? - - // if skipping over ignorables brought to the end of - // the prefix, we DID match: drop out of the loop - if (oPrefix == CollationElementIterator::NULLORDER) { - break; - } - - // if skipping over ignorables brought us to the end - // of the target string, we didn't match and return 0 - if (oStr == CollationElementIterator::NULLORDER) { - return 0; - } - - // match collation elements from the two strings - // (considering only primary differences). If we - // get a mismatch, dump out and return 0 - if (CollationElementIterator::primaryOrder(oStr) - != CollationElementIterator::primaryOrder(oPrefix)) { - return 0; - - // otherwise, advance to the next character in each string - // and loop (we drop out of the loop when we exhaust - // collation elements in the prefix) - } else { - oStr = strIter->next(err); - oPrefix = prefixIter->next(err); - } - } - - int32_t result = strIter->getOffset(); - if (oStr != CollationElementIterator::NULLORDER) { - --result; // back over character that we don't want to consume; - } - -#ifdef RBNF_DEBUG - fprintf(stderr, "prefix length: %d\n", result); -#endif - return result; -#if 0 - //---------------------------------------------------------------- - // JDK 1.2-specific API call - // return strIter.getOffset(); - //---------------------------------------------------------------- - // JDK 1.1 HACK (take out for 1.2-specific code) - - // if we make it to here, we have a successful match. Now we - // have to find out HOW MANY characters from the target string - // matched the prefix (there isn't necessarily a one-to-one - // mapping between collation elements and characters). - // In JDK 1.2, there's a simple getOffset() call we can use. - // In JDK 1.1, on the other hand, we have to go through some - // ugly contortions. First, use the collator to compare the - // same number of characters from the prefix and target string. - // If they're equal, we're done. - collator->setStrength(Collator::PRIMARY); - if (str.length() >= prefix.length()) { - UnicodeString temp; - temp.setTo(str, 0, prefix.length()); - if (collator->equals(temp, prefix)) { -#ifdef RBNF_DEBUG - fprintf(stderr, "returning: %d\n", prefix.length()); -#endif - return prefix.length(); - } - } - - // if they're not equal, then we have to compare successively - // larger and larger substrings of the target string until we - // get to one that matches the prefix. At that point, we know - // how many characters matched the prefix, and we can return. - int32_t p = 1; - while (p <= str.length()) { - UnicodeString temp; - temp.setTo(str, 0, p); - if (collator->equals(temp, prefix)) { - return p; - } else { - ++p; - } - } - - // SHOULD NEVER GET HERE!!! - return 0; - //---------------------------------------------------------------- -#endif - - // If lenient parsing is turned off, forget all that crap above. - // Just use String.startsWith() and be done with it. - } else -#endif - { - if (str.startsWith(prefix)) { - return prefix.length(); - } else { - return 0; - } - } -} - -/** -* Searches a string for another string. If lenient parsing is off, -* this just calls indexOf(). If lenient parsing is on, this function -* uses CollationElementIterator to match characters, and only -* primary-order differences are significant in determining whether -* there's a match. -* @param str The string to search -* @param key The string to search "str" for -* @param startingAt The index into "str" where the search is to -* begin -* @return A two-element array of ints. Element 0 is the position -* of the match, or -1 if there was no match. Element 1 is the -* number of characters in "str" that matched (which isn't necessarily -* the same as the length of "key") -*/ -int32_t -NFRule::findText(const UnicodeString& str, - const UnicodeString& key, - int32_t startingAt, - int32_t* length) const -{ - if (rulePatternFormat) { - Formattable result; - FieldPosition position(UNUM_INTEGER_FIELD); - position.setBeginIndex(startingAt); - rulePatternFormat->parseType(str, this, result, position); - int start = position.getBeginIndex(); - if (start >= 0) { - int32_t pluralRuleStart = fRuleText.indexOf(gDollarOpenParenthesis, -1, 0); - int32_t pluralRuleSuffix = fRuleText.indexOf(gClosedParenthesisDollar, -1, pluralRuleStart) + 2; - int32_t matchLen = position.getEndIndex() - start; - UnicodeString prefix(fRuleText.tempSubString(0, pluralRuleStart)); - UnicodeString suffix(fRuleText.tempSubString(pluralRuleSuffix)); - if (str.compare(start - prefix.length(), prefix.length(), prefix, 0, prefix.length()) == 0 - && str.compare(start + matchLen, suffix.length(), suffix, 0, suffix.length()) == 0) - { - *length = matchLen + prefix.length() + suffix.length(); - return start - prefix.length(); - } - } - *length = 0; - return -1; - } - if (!formatter->isLenient()) { - // if lenient parsing is turned off, this is easy: just call - // String.indexOf() and we're done - *length = key.length(); - return str.indexOf(key, startingAt); - } - else { - // but if lenient parsing is turned ON, we've got some work - // ahead of us - return findTextLenient(str, key, startingAt, length); - } -} - -int32_t -NFRule::findTextLenient(const UnicodeString& str, - const UnicodeString& key, - int32_t startingAt, - int32_t* length) const -{ - //---------------------------------------------------------------- - // JDK 1.1 HACK (take out of 1.2-specific code) - - // in JDK 1.2, CollationElementIterator provides us with an - // API to map between character offsets and collation elements - // and we can do this by marching through the string comparing - // collation elements. We can't do that in JDK 1.1. Insted, - // we have to go through this horrible slow mess: - int32_t p = startingAt; - int32_t keyLen = 0; - - // basically just isolate smaller and smaller substrings of - // the target string (each running to the end of the string, - // and with the first one running from startingAt to the end) - // and then use prefixLength() to see if the search key is at - // the beginning of each substring. This is excruciatingly - // slow, but it will locate the key and tell use how long the - // matching text was. - UnicodeString temp; - UErrorCode status = U_ZERO_ERROR; - while (p < str.length() && keyLen == 0) { - temp.setTo(str, p, str.length() - p); - keyLen = prefixLength(temp, key, status); - if (U_FAILURE(status)) { - break; - } - if (keyLen != 0) { - *length = keyLen; - return p; - } - ++p; - } - // if we make it to here, we didn't find it. Return -1 for the - // location. The length should be ignored, but set it to 0, - // which should be "safe" - *length = 0; - return -1; -} - -/** -* Checks to see whether a string consists entirely of ignorable -* characters. -* @param str The string to test. -* @return true if the string is empty of consists entirely of -* characters that the number formatter's collator says are -* ignorable at the primary-order level. false otherwise. -*/ -UBool -NFRule::allIgnorable(const UnicodeString& str, UErrorCode& status) const -{ - // if the string is empty, we can just return true - if (str.length() == 0) { - return TRUE; - } - -#if !UCONFIG_NO_COLLATION - // if lenient parsing is turned on, walk through the string with - // a collation element iterator and make sure each collation - // element is 0 (ignorable) at the primary level - if (formatter->isLenient()) { - const RuleBasedCollator* collator = formatter->getCollator(); - if (collator == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - LocalPointer<CollationElementIterator> iter(collator->createCollationElementIterator(str)); - - // Memory allocation error check. - if (iter.isNull()) { - status = U_MEMORY_ALLOCATION_ERROR; - return FALSE; - } - - UErrorCode err = U_ZERO_ERROR; - int32_t o = iter->next(err); - while (o != CollationElementIterator::NULLORDER - && CollationElementIterator::primaryOrder(o) == 0) { - o = iter->next(err); - } - - return o == CollationElementIterator::NULLORDER; - } -#endif - - // if lenient parsing is turned off, there is no such thing as - // an ignorable character: return true only if the string is empty - return FALSE; -} - -void -NFRule::setDecimalFormatSymbols(const DecimalFormatSymbols& newSymbols, UErrorCode& status) { - if (sub1 != NULL) { - sub1->setDecimalFormatSymbols(newSymbols, status); - } - if (sub2 != NULL) { - sub2->setDecimalFormatSymbols(newSymbols, status); - } -} - -U_NAMESPACE_END - -/* U_HAVE_RBNF */ -#endif |