diff options
Diffstat (limited to 'deps/icu-small/source/common/rbbirb.cpp')
-rw-r--r-- | deps/icu-small/source/common/rbbirb.cpp | 45 |
1 files changed, 37 insertions, 8 deletions
diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp index c67f6f8166..9fc8f8e814 100644 --- a/deps/icu-small/source/common/rbbirb.cpp +++ b/deps/icu-small/source/common/rbbirb.cpp @@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules, UParseError *parseErr, UErrorCode &status) - : fRules(rules) + : fRules(rules), fStrippedRules(rules) { fStatus = &status; // status is checked below fParseError = parseErr; @@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { return NULL; } - // Remove comments and whitespace from the rules to make it smaller. - UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules)); + // Remove whitespace from the rules to make it smaller. + // The rule parser has already removed comments. + fStrippedRules = fScanner->stripRules(fStrippedRules); // Calculate the size of each section in the data. // Sizes here are padded up to a multiple of 8 for better memory alignment. @@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize()); int32_t trieSize = align8(fSetBuilder->getTrieSize()); int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t)); - int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar)); + int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar)); (void)safeFwdTableSize; @@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { data->fStatusTable = data->fTrie + trieSize; data->fStatusTableLen= statusTableSize; data->fRuleSource = data->fStatusTable + statusTableSize; - data->fRuleSourceLen = strippedRules.length() * sizeof(UChar); + data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar); uprv_memset(data->fReserved, 0, sizeof(data->fReserved)); @@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() { ruleStatusTable[i] = fRuleStatusVals->elementAti(i); } - strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); + fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus); return data; } @@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, // // UnicodeSet processing. // Munge the Unicode Sets to create a set of character categories. - // Generate the mapping tables (TRIE) from input 32-bit characters to + // Generate the mapping tables (TRIE) from input code points to // the character categories. // - builder.fSetBuilder->build(); + builder.fSetBuilder->buildRanges(); // @@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, } #endif + builder.optimizeTables(); + builder.fSetBuilder->buildTrie(); + + + // // Package up the compiled data into a memory image // in the run-time format. @@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules, return This; } +void RBBIRuleBuilder::optimizeTables() { + int32_t leftClass; + int32_t rightClass; + + leftClass = 3; + rightClass = 0; + while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) { + fSetBuilder->mergeCategories(leftClass, rightClass); + fForwardTables->removeColumn(rightClass); + fReverseTables->removeColumn(rightClass); + fSafeFwdTables->removeColumn(rightClass); + fSafeRevTables->removeColumn(rightClass); + } + + fForwardTables->removeDuplicateStates(); + fReverseTables->removeDuplicateStates(); + fSafeFwdTables->removeDuplicateStates(); + fSafeRevTables->removeDuplicateStates(); + + + +} + U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |