summaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common/rbbirb.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/icu-small/source/common/rbbirb.cpp')
-rw-r--r--deps/icu-small/source/common/rbbirb.cpp194
1 files changed, 67 insertions, 127 deletions
diff --git a/deps/icu-small/source/common/rbbirb.cpp b/deps/icu-small/source/common/rbbirb.cpp
index 9fc8f8e814..a46f483d23 100644
--- a/deps/icu-small/source/common/rbbirb.cpp
+++ b/deps/icu-small/source/common/rbbirb.cpp
@@ -62,10 +62,7 @@ RBBIRuleBuilder::RBBIRuleBuilder(const UnicodeString &rules,
fSafeFwdTree = NULL;
fSafeRevTree = NULL;
fDefaultTree = &fForwardTree;
- fForwardTables = NULL;
- fReverseTables = NULL;
- fSafeFwdTables = NULL;
- fSafeRevTables = NULL;
+ fForwardTable = NULL;
fRuleStatusVals = NULL;
fChainRules = FALSE;
fLBCMNoChain = FALSE;
@@ -114,11 +111,7 @@ RBBIRuleBuilder::~RBBIRuleBuilder() {
delete fUSetNodes;
delete fSetBuilder;
- delete fForwardTables;
- delete fReverseTables;
- delete fSafeFwdTables;
- delete fSafeRevTables;
-
+ delete fForwardTable;
delete fForwardTree;
delete fReverseTree;
delete fSafeFwdTree;
@@ -157,21 +150,15 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
// without the padding.
//
int32_t headerSize = align8(sizeof(RBBIDataHeader));
- int32_t forwardTableSize = align8(fForwardTables->getTableSize());
- int32_t reverseTableSize = align8(fReverseTables->getTableSize());
- int32_t safeFwdTableSize = align8(fSafeFwdTables->getTableSize());
- int32_t safeRevTableSize = align8(fSafeRevTables->getTableSize());
+ int32_t forwardTableSize = align8(fForwardTable->getTableSize());
+ int32_t reverseTableSize = align8(fForwardTable->getSafeTableSize());
int32_t trieSize = align8(fSetBuilder->getTrieSize());
int32_t statusTableSize = align8(fRuleStatusVals->size() * sizeof(int32_t));
int32_t rulesSize = align8((fStrippedRules.length()+1) * sizeof(UChar));
- (void)safeFwdTableSize;
-
int32_t totalSize = headerSize
+ forwardTableSize
- + /* reverseTableSize */ 0
- + /* safeFwdTableSize */ 0
- + (safeRevTableSize ? safeRevTableSize : reverseTableSize)
+ + reverseTableSize
+ statusTableSize + trieSize + rulesSize;
RBBIDataHeader *data = (RBBIDataHeader *)uprv_malloc(totalSize);
@@ -190,38 +177,13 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
data->fLength = totalSize;
data->fCatCount = fSetBuilder->getNumCharCategories();
- // Only save the forward table and the safe reverse table,
- // because these are the only ones used at run-time.
- //
- // For the moment, we still build the other tables if they are present in the rule source files,
- // for backwards compatibility. Old rule files need to work, and this is the simplest approach.
- //
- // Additional backwards compatibility consideration: if no safe rules are provided, consider the
- // reverse rules to actually be the safe reverse rules.
-
data->fFTable = headerSize;
data->fFTableLen = forwardTableSize;
- // Do not save Reverse Table.
- data->fRTable = data->fFTable + forwardTableSize;
- data->fRTableLen = 0;
-
- // Do not save the Safe Forward table.
- data->fSFTable = data->fRTable + 0;
- data->fSFTableLen = 0;
-
- data->fSRTable = data->fSFTable + 0;
- if (safeRevTableSize > 0) {
- data->fSRTableLen = safeRevTableSize;
- } else if (reverseTableSize > 0) {
- data->fSRTableLen = reverseTableSize;
- } else {
- U_ASSERT(FALSE); // Rule build should have failed for lack of a reverse table
- // before reaching this point.
- }
-
+ data->fRTable = data->fFTable + data->fFTableLen;
+ data->fRTableLen = reverseTableSize;
- data->fTrie = data->fSRTable + data->fSRTableLen;
+ data->fTrie = data->fRTable + data->fRTableLen;
data->fTrieLen = fSetBuilder->getTrieSize();
data->fStatusTable = data->fTrie + trieSize;
data->fStatusTableLen= statusTableSize;
@@ -230,15 +192,8 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
uprv_memset(data->fReserved, 0, sizeof(data->fReserved));
- fForwardTables->exportTable((uint8_t *)data + data->fFTable);
- // fReverseTables->exportTable((uint8_t *)data + data->fRTable);
- // fSafeFwdTables->exportTable((uint8_t *)data + data->fSFTable);
- if (safeRevTableSize > 0) {
- fSafeRevTables->exportTable((uint8_t *)data + data->fSRTable);
- } else {
- fReverseTables->exportTable((uint8_t *)data + data->fSRTable);
- }
-
+ fForwardTable->exportTable((uint8_t *)data + data->fFTable);
+ fForwardTable->exportSafeTable((uint8_t *)data + data->fRTable);
fSetBuilder->serializeTrie ((uint8_t *)data + data->fTrie);
int32_t *ruleStatusTable = (int32_t *)((uint8_t *)data + data->fStatusTable);
@@ -252,10 +207,6 @@ RBBIDataHeader *RBBIRuleBuilder::flattenData() {
}
-
-
-
-
//----------------------------------------------------------------------------------------
//
// createRuleBasedBreakIterator construct from source rules that are passed in
@@ -267,8 +218,6 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
UParseError *parseError,
UErrorCode &status)
{
- // status checked below
-
//
// Read the input rules, generate a parse tree, symbol table,
// and list of all Unicode Sets referenced by the rules.
@@ -277,7 +226,38 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
if (U_FAILURE(status)) { // status checked here bcos build below doesn't
return NULL;
}
- builder.fScanner->parse();
+
+ RBBIDataHeader *data = builder.build(status);
+
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ //
+ // Create a break iterator from the compiled rules.
+ // (Identical to creation from stored pre-compiled rules)
+ //
+ // status is checked after init in construction.
+ RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
+ if (U_FAILURE(status)) {
+ delete This;
+ This = NULL;
+ }
+ else if(This == NULL) { // test for NULL
+ status = U_MEMORY_ALLOCATION_ERROR;
+ }
+ return This;
+}
+
+RBBIDataHeader *RBBIRuleBuilder::build(UErrorCode &status) {
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
+
+ fScanner->parse();
+ if (U_FAILURE(status)) {
+ return nullptr;
+ }
//
// UnicodeSet processing.
@@ -285,95 +265,55 @@ RBBIRuleBuilder::createRuleBasedBreakIterator( const UnicodeString &rules,
// Generate the mapping tables (TRIE) from input code points to
// the character categories.
//
- builder.fSetBuilder->buildRanges();
-
+ fSetBuilder->buildRanges();
//
// Generate the DFA state transition table.
//
- builder.fForwardTables = new RBBITableBuilder(&builder, &builder.fForwardTree);
- builder.fReverseTables = new RBBITableBuilder(&builder, &builder.fReverseTree);
- builder.fSafeFwdTables = new RBBITableBuilder(&builder, &builder.fSafeFwdTree);
- builder.fSafeRevTables = new RBBITableBuilder(&builder, &builder.fSafeRevTree);
- if (builder.fForwardTables == NULL || builder.fReverseTables == NULL ||
- builder.fSafeFwdTables == NULL || builder.fSafeRevTables == NULL)
- {
+ fForwardTable = new RBBITableBuilder(this, &fForwardTree, status);
+ if (fForwardTable == nullptr) {
status = U_MEMORY_ALLOCATION_ERROR;
- delete builder.fForwardTables; builder.fForwardTables = NULL;
- delete builder.fReverseTables; builder.fReverseTables = NULL;
- delete builder.fSafeFwdTables; builder.fSafeFwdTables = NULL;
- delete builder.fSafeRevTables; builder.fSafeRevTables = NULL;
- return NULL;
+ return nullptr;
}
- builder.fForwardTables->build();
- builder.fReverseTables->build();
- builder.fSafeFwdTables->build();
- builder.fSafeRevTables->build();
+ fForwardTable->buildForwardTable();
+ optimizeTables();
+ fForwardTable->buildSafeReverseTable(status);
+
#ifdef RBBI_DEBUG
- if (builder.fDebugEnv && uprv_strstr(builder.fDebugEnv, "states")) {
- builder.fForwardTables->printRuleStatusTable();
+ if (fDebugEnv && uprv_strstr(fDebugEnv, "states")) {
+ fForwardTable->printStates();
+ fForwardTable->printRuleStatusTable();
+ fForwardTable->printReverseTable();
}
#endif
- builder.optimizeTables();
- builder.fSetBuilder->buildTrie();
-
-
+ fSetBuilder->buildTrie();
//
// Package up the compiled data into a memory image
// in the run-time format.
//
- RBBIDataHeader *data = builder.flattenData(); // returns NULL if error
- if (U_FAILURE(*builder.fStatus)) {
- return NULL;
- }
-
-
- //
- // Clean up the compiler related stuff
- //
-
-
- //
- // Create a break iterator from the compiled rules.
- // (Identical to creation from stored pre-compiled rules)
- //
- // status is checked after init in construction.
- RuleBasedBreakIterator *This = new RuleBasedBreakIterator(data, status);
+ RBBIDataHeader *data = flattenData(); // returns NULL if error
if (U_FAILURE(status)) {
- delete This;
- This = NULL;
+ return nullptr;
}
- else if(This == NULL) { // test for NULL
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- return This;
+ return data;
}
void RBBIRuleBuilder::optimizeTables() {
- int32_t leftClass;
- int32_t rightClass;
-
- leftClass = 3;
- rightClass = 0;
- while (fForwardTables->findDuplCharClassFrom(leftClass, rightClass)) {
- fSetBuilder->mergeCategories(leftClass, rightClass);
- fForwardTables->removeColumn(rightClass);
- fReverseTables->removeColumn(rightClass);
- fSafeFwdTables->removeColumn(rightClass);
- fSafeRevTables->removeColumn(rightClass);
- }
-
- fForwardTables->removeDuplicateStates();
- fReverseTables->removeDuplicateStates();
- fSafeFwdTables->removeDuplicateStates();
- fSafeRevTables->removeDuplicateStates();
-
+ // Begin looking for duplicates with char class 3.
+ // Classes 0, 1 and 2 are special; they are unused, {bof} and {eof} respectively,
+ // and should not have other categories merged into them.
+ IntPair duplPair = {3, 0};
+ while (fForwardTable->findDuplCharClassFrom(&duplPair)) {
+ fSetBuilder->mergeCategories(duplPair);
+ fForwardTable->removeColumn(duplPair.second);
+ }
+ fForwardTable->removeDuplicateStates();
}
U_NAMESPACE_END