diff options
author | Steven R. Loomis <srloomis@us.ibm.com> | 2018-07-09 13:46:16 -0700 |
---|---|---|
committer | Anna Henningsen <anna@addaleax.net> | 2018-07-11 00:15:23 +0200 |
commit | 538acead6670d711ddb71c0b852089b792c996e3 (patch) | |
tree | 917c6df14436e66d4883feb7bb9269480fce06ab /deps/icu-small/source/common/rbbi_cache.cpp | |
parent | ed715ef8900afa5056ebd5ef995e89eebd4987c2 (diff) | |
download | android-node-v8-538acead6670d711ddb71c0b852089b792c996e3.tar.gz android-node-v8-538acead6670d711ddb71c0b852089b792c996e3.tar.bz2 android-node-v8-538acead6670d711ddb71c0b852089b792c996e3.zip |
deps: icu 62.1 bump (Unicode 11, CLDR 33.1)
- Full release notes: http://site.icu-project.org/download/62
Fixes: https://github.com/nodejs/node/issues/21452
PR-URL: https://github.com/nodejs/node/pull/21728
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Bradley Farias <bradley.meck@gmail.com>
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Tiancheng "Timothy" Gu <timothygu99@gmail.com>
Reviewed-By: Richard Lau <riclau@uk.ibm.com>
Diffstat (limited to 'deps/icu-small/source/common/rbbi_cache.cpp')
-rw-r--r-- | deps/icu-small/source/common/rbbi_cache.cpp | 49 |
1 files changed, 40 insertions, 9 deletions
diff --git a/deps/icu-small/source/common/rbbi_cache.cpp b/deps/icu-small/source/common/rbbi_cache.cpp index ba9329d477..60316ce642 100644 --- a/deps/icu-small/source/common/rbbi_cache.cpp +++ b/deps/icu-small/source/common/rbbi_cache.cpp @@ -354,14 +354,31 @@ UBool RuleBasedBreakIterator::BreakCache::populateNear(int32_t position, UErrorC if ((position < fBoundaries[fStartBufIdx] - 15) || position > (fBoundaries[fEndBufIdx] + 15)) { int32_t aBoundary = 0; int32_t ruleStatusIndex = 0; - // TODO: check for position == length of text. Although may still need to back up to get rule status. if (position > 20) { - int32_t backupPos = fBI->handlePrevious(position); - fBI->fPosition = backupPos; - aBoundary = fBI->handleNext(); // Ignore dictionary, just finding a rule based boundary. - ruleStatusIndex = fBI->fRuleStatusIndex; + int32_t backupPos = fBI->handleSafePrevious(position); + + if (backupPos > 0) { + // Advance to the boundary following the backup position. + // There is a complication: the safe reverse rules identify pairs of code points + // that are safe. If advancing from the safe point moves forwards by less than + // two code points, we need to advance one more time to ensure that the boundary + // is good, including a correct rules status value. + // + fBI->fPosition = backupPos; + aBoundary = fBI->handleNext(); + if (aBoundary <= backupPos + 4) { + // +4 is a quick test for possibly having advanced only one codepoint. + // Four being the length of the longest potential code point, a supplementary in UTF-8 + utext_setNativeIndex(&fBI->fText, aBoundary); + if (backupPos == utext_getPreviousNativeIndex(&fBI->fText)) { + // The initial handleNext() only advanced by a single code point. Go again. + aBoundary = fBI->handleNext(); // Safe rules identify safe pairs. + } + } + ruleStatusIndex = fBI->fRuleStatusIndex; + } } - reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point. + reset(aBoundary, ruleStatusIndex); // Reset cache to hold aBoundary as a single starting point. } // Fill in boundaries between existing cache content and the new requested position. @@ -485,16 +502,30 @@ UBool RuleBasedBreakIterator::BreakCache::populatePreceding(UErrorCode &status) if (backupPosition <= 0) { backupPosition = 0; } else { - backupPosition = fBI->handlePrevious(backupPosition); + backupPosition = fBI->handleSafePrevious(backupPosition); } if (backupPosition == UBRK_DONE || backupPosition == 0) { position = 0; positionStatusIdx = 0; } else { - fBI->fPosition = backupPosition; // TODO: pass starting position in a clearer way. + // Advance to the boundary following the backup position. + // There is a complication: the safe reverse rules identify pairs of code points + // that are safe. If advancing from the safe point moves forwards by less than + // two code points, we need to advance one more time to ensure that the boundary + // is good, including a correct rules status value. + // + fBI->fPosition = backupPosition; position = fBI->handleNext(); + if (position <= backupPosition + 4) { + // +4 is a quick test for possibly having advanced only one codepoint. + // Four being the length of the longest potential code point, a supplementary in UTF-8 + utext_setNativeIndex(&fBI->fText, position); + if (backupPosition == utext_getPreviousNativeIndex(&fBI->fText)) { + // The initial handleNext() only advanced by a single code point. Go again. + position = fBI->handleNext(); // Safe rules identify safe pairs. + } + }; positionStatusIdx = fBI->fRuleStatusIndex; - } } while (position >= fromPosition); |