diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/common/stringtriebuilder.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/common/stringtriebuilder.cpp | 618 |
1 files changed, 0 insertions, 618 deletions
diff --git a/deps/node/deps/icu-small/source/common/stringtriebuilder.cpp b/deps/node/deps/icu-small/source/common/stringtriebuilder.cpp deleted file mode 100644 index 6f9cc2e5..00000000 --- a/deps/node/deps/icu-small/source/common/stringtriebuilder.cpp +++ /dev/null @@ -1,618 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: stringtriebuilder.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010dec24 -* created by: Markus W. Scherer -*/ - -#include "utypeinfo.h" // for 'typeid' to work -#include "unicode/utypes.h" -#include "unicode/stringtriebuilder.h" -#include "uassert.h" -#include "uhash.h" - -U_CDECL_BEGIN - -static int32_t U_CALLCONV -hashStringTrieNode(const UHashTok key) { - return icu::StringTrieBuilder::hashNode(key.pointer); -} - -static UBool U_CALLCONV -equalStringTrieNodes(const UHashTok key1, const UHashTok key2) { - return icu::StringTrieBuilder::equalNodes(key1.pointer, key2.pointer); -} - -U_CDECL_END - -U_NAMESPACE_BEGIN - -StringTrieBuilder::StringTrieBuilder() : nodes(NULL) {} - -StringTrieBuilder::~StringTrieBuilder() { - deleteCompactBuilder(); -} - -void -StringTrieBuilder::createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - nodes=uhash_openSize(hashStringTrieNode, equalStringTrieNodes, NULL, - sizeGuess, &errorCode); - if(U_SUCCESS(errorCode)) { - if(nodes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } else { - uhash_setKeyDeleter(nodes, uprv_deleteUObject); - } - } -} - -void -StringTrieBuilder::deleteCompactBuilder() { - uhash_close(nodes); - nodes=NULL; -} - -void -StringTrieBuilder::build(UStringTrieBuildOption buildOption, int32_t elementsLength, - UErrorCode &errorCode) { - if(buildOption==USTRINGTRIE_BUILD_FAST) { - writeNode(0, elementsLength, 0); - } else /* USTRINGTRIE_BUILD_SMALL */ { - createCompactBuilder(2*elementsLength, errorCode); - Node *root=makeNode(0, elementsLength, 0, errorCode); - if(U_SUCCESS(errorCode)) { - root->markRightEdgesFirst(-1); - root->write(*this); - } - deleteCompactBuilder(); - } -} - -// Requires start<limit, -// and all strings of the [start..limit[ elements must be sorted and -// have a common prefix of length unitIndex. -int32_t -StringTrieBuilder::writeNode(int32_t start, int32_t limit, int32_t unitIndex) { - UBool hasValue=FALSE; - int32_t value=0; - int32_t type; - if(unitIndex==getElementStringLength(start)) { - // An intermediate or final value. - value=getElementValue(start++); - if(start==limit) { - return writeValueAndFinal(value, TRUE); // final-value node - } - hasValue=TRUE; - } - // Now all [start..limit[ strings are longer than unitIndex. - int32_t minUnit=getElementUnit(start, unitIndex); - int32_t maxUnit=getElementUnit(limit-1, unitIndex); - if(minUnit==maxUnit) { - // Linear-match node: All strings have the same character at unitIndex. - int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); - writeNode(start, limit, lastUnitIndex); - // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. - int32_t length=lastUnitIndex-unitIndex; - int32_t maxLinearMatchLength=getMaxLinearMatchLength(); - while(length>maxLinearMatchLength) { - lastUnitIndex-=maxLinearMatchLength; - length-=maxLinearMatchLength; - writeElementUnits(start, lastUnitIndex, maxLinearMatchLength); - write(getMinLinearMatch()+maxLinearMatchLength-1); - } - writeElementUnits(start, unitIndex, length); - type=getMinLinearMatch()+length-1; - } else { - // Branch node. - int32_t length=countElementUnits(start, limit, unitIndex); - // length>=2 because minUnit!=maxUnit. - writeBranchSubNode(start, limit, unitIndex, length); - if(--length<getMinLinearMatch()) { - type=length; - } else { - write(length); - type=0; - } - } - return writeValueAndType(hasValue, value, type); -} - -// start<limit && all strings longer than unitIndex && -// length different units at unitIndex -int32_t -StringTrieBuilder::writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length) { - UChar middleUnits[kMaxSplitBranchLevels]; - int32_t lessThan[kMaxSplitBranchLevels]; - int32_t ltLength=0; - while(length>getMaxBranchLinearSubNodeLength()) { - // Branch on the middle unit. - // First, find the middle unit. - int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); - // Encode the less-than branch first. - middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit - lessThan[ltLength]=writeBranchSubNode(start, i, unitIndex, length/2); - ++ltLength; - // Continue for the greater-or-equal branch. - start=i; - length=length-length/2; - } - // For each unit, find its elements array start and whether it has a final value. - int32_t starts[kMaxBranchLinearSubNodeLength]; - UBool isFinal[kMaxBranchLinearSubNodeLength-1]; - int32_t unitNumber=0; - do { - int32_t i=starts[unitNumber]=start; - UChar unit=getElementUnit(i++, unitIndex); - i=indexOfElementWithNextUnit(i, unitIndex, unit); - isFinal[unitNumber]= start==i-1 && unitIndex+1==getElementStringLength(start); - start=i; - } while(++unitNumber<length-1); - // unitNumber==length-1, and the maxUnit elements range is [start..limit[ - starts[unitNumber]=start; - - // Write the sub-nodes in reverse order: The jump lengths are deltas from - // after their own positions, so if we wrote the minUnit sub-node first, - // then its jump delta would be larger. - // Instead we write the minUnit sub-node last, for a shorter delta. - int32_t jumpTargets[kMaxBranchLinearSubNodeLength-1]; - do { - --unitNumber; - if(!isFinal[unitNumber]) { - jumpTargets[unitNumber]=writeNode(starts[unitNumber], starts[unitNumber+1], unitIndex+1); - } - } while(unitNumber>0); - // The maxUnit sub-node is written as the very last one because we do - // not jump for it at all. - unitNumber=length-1; - writeNode(start, limit, unitIndex+1); - int32_t offset=write(getElementUnit(start, unitIndex)); - // Write the rest of this node's unit-value pairs. - while(--unitNumber>=0) { - start=starts[unitNumber]; - int32_t value; - if(isFinal[unitNumber]) { - // Write the final value for the one string ending with this unit. - value=getElementValue(start); - } else { - // Write the delta to the start position of the sub-node. - value=offset-jumpTargets[unitNumber]; - } - writeValueAndFinal(value, isFinal[unitNumber]); - offset=write(getElementUnit(start, unitIndex)); - } - // Write the split-branch nodes. - while(ltLength>0) { - --ltLength; - writeDeltaTo(lessThan[ltLength]); - offset=write(middleUnits[ltLength]); - } - return offset; -} - -// Requires start<limit, -// and all strings of the [start..limit[ elements must be sorted and -// have a common prefix of length unitIndex. -StringTrieBuilder::Node * -StringTrieBuilder::makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - UBool hasValue=FALSE; - int32_t value=0; - if(unitIndex==getElementStringLength(start)) { - // An intermediate or final value. - value=getElementValue(start++); - if(start==limit) { - return registerFinalValue(value, errorCode); - } - hasValue=TRUE; - } - Node *node; - // Now all [start..limit[ strings are longer than unitIndex. - int32_t minUnit=getElementUnit(start, unitIndex); - int32_t maxUnit=getElementUnit(limit-1, unitIndex); - if(minUnit==maxUnit) { - // Linear-match node: All strings have the same character at unitIndex. - int32_t lastUnitIndex=getLimitOfLinearMatch(start, limit-1, unitIndex); - Node *nextNode=makeNode(start, limit, lastUnitIndex, errorCode); - // Break the linear-match sequence into chunks of at most kMaxLinearMatchLength. - int32_t length=lastUnitIndex-unitIndex; - int32_t maxLinearMatchLength=getMaxLinearMatchLength(); - while(length>maxLinearMatchLength) { - lastUnitIndex-=maxLinearMatchLength; - length-=maxLinearMatchLength; - node=createLinearMatchNode(start, lastUnitIndex, maxLinearMatchLength, nextNode); - nextNode=registerNode(node, errorCode); - } - node=createLinearMatchNode(start, unitIndex, length, nextNode); - } else { - // Branch node. - int32_t length=countElementUnits(start, limit, unitIndex); - // length>=2 because minUnit!=maxUnit. - Node *subNode=makeBranchSubNode(start, limit, unitIndex, length, errorCode); - node=new BranchHeadNode(length, subNode); - } - if(hasValue && node!=NULL) { - if(matchNodesCanHaveValues()) { - ((ValueNode *)node)->setValue(value); - } else { - node=new IntermediateValueNode(value, registerNode(node, errorCode)); - } - } - return registerNode(node, errorCode); -} - -// start<limit && all strings longer than unitIndex && -// length different units at unitIndex -StringTrieBuilder::Node * -StringTrieBuilder::makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, - int32_t length, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - UChar middleUnits[kMaxSplitBranchLevels]; - Node *lessThan[kMaxSplitBranchLevels]; - int32_t ltLength=0; - while(length>getMaxBranchLinearSubNodeLength()) { - // Branch on the middle unit. - // First, find the middle unit. - int32_t i=skipElementsBySomeUnits(start, unitIndex, length/2); - // Create the less-than branch. - middleUnits[ltLength]=getElementUnit(i, unitIndex); // middle unit - lessThan[ltLength]=makeBranchSubNode(start, i, unitIndex, length/2, errorCode); - ++ltLength; - // Continue for the greater-or-equal branch. - start=i; - length=length-length/2; - } - if(U_FAILURE(errorCode)) { - return NULL; - } - ListBranchNode *listNode=new ListBranchNode(); - if(listNode==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - // For each unit, find its elements array start and whether it has a final value. - int32_t unitNumber=0; - do { - int32_t i=start; - UChar unit=getElementUnit(i++, unitIndex); - i=indexOfElementWithNextUnit(i, unitIndex, unit); - if(start==i-1 && unitIndex+1==getElementStringLength(start)) { - listNode->add(unit, getElementValue(start)); - } else { - listNode->add(unit, makeNode(start, i, unitIndex+1, errorCode)); - } - start=i; - } while(++unitNumber<length-1); - // unitNumber==length-1, and the maxUnit elements range is [start..limit[ - UChar unit=getElementUnit(start, unitIndex); - if(start==limit-1 && unitIndex+1==getElementStringLength(start)) { - listNode->add(unit, getElementValue(start)); - } else { - listNode->add(unit, makeNode(start, limit, unitIndex+1, errorCode)); - } - Node *node=registerNode(listNode, errorCode); - // Create the split-branch nodes. - while(ltLength>0) { - --ltLength; - node=registerNode( - new SplitBranchNode(middleUnits[ltLength], lessThan[ltLength], node), errorCode); - } - return node; -} - -StringTrieBuilder::Node * -StringTrieBuilder::registerNode(Node *newNode, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - delete newNode; - return NULL; - } - if(newNode==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - const UHashElement *old=uhash_find(nodes, newNode); - if(old!=NULL) { - delete newNode; - return (Node *)old->key.pointer; - } - // If uhash_puti() returns a non-zero value from an equivalent, previously - // registered node, then uhash_find() failed to find that and we will leak newNode. -#if U_DEBUG - int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. -#endif - uhash_puti(nodes, newNode, 1, &errorCode); - U_ASSERT(oldValue==0); - if(U_FAILURE(errorCode)) { - delete newNode; - return NULL; - } - return newNode; -} - -StringTrieBuilder::Node * -StringTrieBuilder::registerFinalValue(int32_t value, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return NULL; - } - FinalValueNode key(value); - const UHashElement *old=uhash_find(nodes, &key); - if(old!=NULL) { - return (Node *)old->key.pointer; - } - Node *newNode=new FinalValueNode(value); - if(newNode==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - // If uhash_puti() returns a non-zero value from an equivalent, previously - // registered node, then uhash_find() failed to find that and we will leak newNode. -#if U_DEBUG - int32_t oldValue= // Only in debug mode to avoid a compiler warning about unused oldValue. -#endif - uhash_puti(nodes, newNode, 1, &errorCode); - U_ASSERT(oldValue==0); - if(U_FAILURE(errorCode)) { - delete newNode; - return NULL; - } - return newNode; -} - -int32_t -StringTrieBuilder::hashNode(const void *node) { - return ((const Node *)node)->hashCode(); -} - -UBool -StringTrieBuilder::equalNodes(const void *left, const void *right) { - return *(const Node *)left==*(const Node *)right; -} - -UBool -StringTrieBuilder::Node::operator==(const Node &other) const { - return this==&other || (typeid(*this)==typeid(other) && hash==other.hash); -} - -int32_t -StringTrieBuilder::Node::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber; - } - return edgeNumber; -} - -UBool -StringTrieBuilder::FinalValueNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const FinalValueNode &o=(const FinalValueNode &)other; - return value==o.value; -} - -void -StringTrieBuilder::FinalValueNode::write(StringTrieBuilder &builder) { - offset=builder.writeValueAndFinal(value, TRUE); -} - -UBool -StringTrieBuilder::ValueNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const ValueNode &o=(const ValueNode &)other; - return hasValue==o.hasValue && (!hasValue || value==o.value); -} - -UBool -StringTrieBuilder::IntermediateValueNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!ValueNode::operator==(other)) { - return FALSE; - } - const IntermediateValueNode &o=(const IntermediateValueNode &)other; - return next==o.next; -} - -int32_t -StringTrieBuilder::IntermediateValueNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); - } - return edgeNumber; -} - -void -StringTrieBuilder::IntermediateValueNode::write(StringTrieBuilder &builder) { - next->write(builder); - offset=builder.writeValueAndFinal(value, FALSE); -} - -UBool -StringTrieBuilder::LinearMatchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!ValueNode::operator==(other)) { - return FALSE; - } - const LinearMatchNode &o=(const LinearMatchNode &)other; - return length==o.length && next==o.next; -} - -int32_t -StringTrieBuilder::LinearMatchNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); - } - return edgeNumber; -} - -UBool -StringTrieBuilder::ListBranchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const ListBranchNode &o=(const ListBranchNode &)other; - for(int32_t i=0; i<length; ++i) { - if(units[i]!=o.units[i] || values[i]!=o.values[i] || equal[i]!=o.equal[i]) { - return FALSE; - } - } - return TRUE; -} - -int32_t -StringTrieBuilder::ListBranchNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - firstEdgeNumber=edgeNumber; - int32_t step=0; - int32_t i=length; - do { - Node *edge=equal[--i]; - if(edge!=NULL) { - edgeNumber=edge->markRightEdgesFirst(edgeNumber-step); - } - // For all but the rightmost edge, decrement the edge number. - step=1; - } while(i>0); - offset=edgeNumber; - } - return edgeNumber; -} - -void -StringTrieBuilder::ListBranchNode::write(StringTrieBuilder &builder) { - // Write the sub-nodes in reverse order: The jump lengths are deltas from - // after their own positions, so if we wrote the minUnit sub-node first, - // then its jump delta would be larger. - // Instead we write the minUnit sub-node last, for a shorter delta. - int32_t unitNumber=length-1; - Node *rightEdge=equal[unitNumber]; - int32_t rightEdgeNumber= rightEdge==NULL ? firstEdgeNumber : rightEdge->getOffset(); - do { - --unitNumber; - if(equal[unitNumber]!=NULL) { - equal[unitNumber]->writeUnlessInsideRightEdge(firstEdgeNumber, rightEdgeNumber, builder); - } - } while(unitNumber>0); - // The maxUnit sub-node is written as the very last one because we do - // not jump for it at all. - unitNumber=length-1; - if(rightEdge==NULL) { - builder.writeValueAndFinal(values[unitNumber], TRUE); - } else { - rightEdge->write(builder); - } - offset=builder.write(units[unitNumber]); - // Write the rest of this node's unit-value pairs. - while(--unitNumber>=0) { - int32_t value; - UBool isFinal; - if(equal[unitNumber]==NULL) { - // Write the final value for the one string ending with this unit. - value=values[unitNumber]; - isFinal=TRUE; - } else { - // Write the delta to the start position of the sub-node. - U_ASSERT(equal[unitNumber]->getOffset()>0); - value=offset-equal[unitNumber]->getOffset(); - isFinal=FALSE; - } - builder.writeValueAndFinal(value, isFinal); - offset=builder.write(units[unitNumber]); - } -} - -UBool -StringTrieBuilder::SplitBranchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!Node::operator==(other)) { - return FALSE; - } - const SplitBranchNode &o=(const SplitBranchNode &)other; - return unit==o.unit && lessThan==o.lessThan && greaterOrEqual==o.greaterOrEqual; -} - -int32_t -StringTrieBuilder::SplitBranchNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - firstEdgeNumber=edgeNumber; - edgeNumber=greaterOrEqual->markRightEdgesFirst(edgeNumber); - offset=edgeNumber=lessThan->markRightEdgesFirst(edgeNumber-1); - } - return edgeNumber; -} - -void -StringTrieBuilder::SplitBranchNode::write(StringTrieBuilder &builder) { - // Encode the less-than branch first. - lessThan->writeUnlessInsideRightEdge(firstEdgeNumber, greaterOrEqual->getOffset(), builder); - // Encode the greater-or-equal branch last because we do not jump for it at all. - greaterOrEqual->write(builder); - // Write this node. - U_ASSERT(lessThan->getOffset()>0); - builder.writeDeltaTo(lessThan->getOffset()); // less-than - offset=builder.write(unit); -} - -UBool -StringTrieBuilder::BranchHeadNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!ValueNode::operator==(other)) { - return FALSE; - } - const BranchHeadNode &o=(const BranchHeadNode &)other; - return length==o.length && next==o.next; -} - -int32_t -StringTrieBuilder::BranchHeadNode::markRightEdgesFirst(int32_t edgeNumber) { - if(offset==0) { - offset=edgeNumber=next->markRightEdgesFirst(edgeNumber); - } - return edgeNumber; -} - -void -StringTrieBuilder::BranchHeadNode::write(StringTrieBuilder &builder) { - next->write(builder); - if(length<=builder.getMinLinearMatch()) { - offset=builder.writeValueAndType(hasValue, value, length-1); - } else { - builder.write(length-1); - offset=builder.writeValueAndType(hasValue, value, 0); - } -} - -U_NAMESPACE_END |