summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common/rbbirb.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/icu-small/source/common/rbbirb.cpp')
-rw-r--r--deps/icu-small/source/common/rbbirb.cpp45
1 files changed, 37 insertions, 8 deletions
diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp
index c67f6f8166..9fc8f8e814 100644
--- a/deps/icu-small/source/common/rbbirb.cpp
+++ b/deps/icu-small/source/common/rbbirb.cpp
@@ -47,7 +47,7 @@ U_NAMESPACE_BEGIN
RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
UParseError *parseErr,
UErrorCode &status)
- : fRules(rules)
+ : fRules(rules), fStrippedRules(rules)
{
fStatus = &status; // status is checked below
fParseError = parseErr;
@@ -147,8 +147,9 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
return NULL;
}
- // Remove comments and whitespace from the rules to make it smaller.
- UnicodeString strippedRules((const UnicodeString&)RBBIRuleScanner::stripRules(fRules));
+ // Remove whitespace from the rules to make it smaller.
+ // The rule parser has already removed comments.
+ fStrippedRules = fScanner->stripRules(fStrippedRules);
// Calculate the size of each section in the data.
// Sizes here are padded up to a multiple of 8 for better memory alignment.
@@ -162,7 +163,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
- int32_t rulesSize = align8((strippedRules.length()+1) * sizeof(UChar));
+ int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
(void)safeFwdTableSize;
@@ -225,7 +226,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
data->fRuleSource = data->fStatusTable + statusTableSize;
- data->fRuleSourceLen = strippedRules.length() * sizeof(UChar);
+ data->fRuleSourceLen = fStrippedRules.length() * sizeof(UChar);
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
@@ -245,7 +246,7 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
ruleStatusTable[i] = fRuleStatusVals->elementAti(i);
}
- strippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
+ fStrippedRules.extract((UChar *)((uint8_t *)data+data->fRuleSource), rulesSize/2+1, *fStatus);
return data;
}
@@ -281,10 +282,10 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
//
// UnicodeSet processing.
// Munge the Unicode Sets to create a set of character categories.
- // Generate the mapping tables (TRIE) from input 32-bit characters to
+ // Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
- builder.fSetBuilder->build();
+ builder.fSetBuilder->buildRanges();
//
@@ -316,6 +317,11 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
}
#endif
+ builder.optimizeTables();
+ builder.fSetBuilder->buildTrie();
+
+
+
//
// Package up the compiled data into a memory image
// in the run-time format.
@@ -347,6 +353,29 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
return This;
}
+void RBBIRuleBuilder::optimizeTables() {
+ int32_t leftClass;
+ int32_t rightClass;
+
+ leftClass = 3;
+ rightClass = 0;
+ while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
+ fSetBuilder->mergeCategories(leftClass, rightClass);
+ fForwardTables->removeColumn(rightClass);
+ fReverseTables->removeColumn(rightClass);
+ fSafeFwdTables->removeColumn(rightClass);
+ fSafeRevTables->removeColumn(rightClass);
+ }
+
+ fForwardTables->removeDuplicateStates();
+ fReverseTables->removeDuplicateStates();
+ fSafeFwdTables->removeDuplicateStates();
+ fSafeRevTables->removeDuplicateStates();
+
+
+
+}
+
U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */