diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/common/bytestriebuilder.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/common/bytestriebuilder.cpp | 504 |
1 files changed, 0 insertions, 504 deletions
diff --git a/deps/node/deps/icu-small/source/common/bytestriebuilder.cpp b/deps/node/deps/icu-small/source/common/bytestriebuilder.cpp deleted file mode 100644 index ec1ab7d8..00000000 --- a/deps/node/deps/icu-small/source/common/bytestriebuilder.cpp +++ /dev/null @@ -1,504 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2010-2012, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* file name: bytestriebuilder.cpp -* encoding: UTF-8 -* tab size: 8 (not used) -* indentation:4 -* -* created on: 2010sep25 -* created by: Markus W. Scherer -*/ - -#include "unicode/utypes.h" -#include "unicode/bytestrie.h" -#include "unicode/bytestriebuilder.h" -#include "unicode/stringpiece.h" -#include "charstr.h" -#include "cmemory.h" -#include "uhash.h" -#include "uarrsort.h" -#include "uassert.h" -#include "ustr_imp.h" - -U_NAMESPACE_BEGIN - -/* - * Note: This builder implementation stores (bytes, value) pairs with full copies - * of the byte sequences, until the BytesTrie is built. - * It might(!) take less memory if we collected the data in a temporary, dynamic trie. - */ - -class BytesTrieElement : public UMemory { -public: - // Use compiler's default constructor, initializes nothing. - - void setTo(StringPiece s, int32_t val, CharString &strings, UErrorCode &errorCode); - - StringPiece getString(const CharString &strings) const { - int32_t offset=stringOffset; - int32_t length; - if(offset>=0) { - length=(uint8_t)strings[offset++]; - } else { - offset=~offset; - length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; - offset+=2; - } - return StringPiece(strings.data()+offset, length); - } - int32_t getStringLength(const CharString &strings) const { - int32_t offset=stringOffset; - if(offset>=0) { - return (uint8_t)strings[offset]; - } else { - offset=~offset; - return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; - } - } - - char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; } - - int32_t getValue() const { return value; } - - int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const; - -private: - const char *data(const CharString &strings) const { - int32_t offset=stringOffset; - if(offset>=0) { - ++offset; - } else { - offset=~offset+2; - } - return strings.data()+offset; - } - - // If the stringOffset is non-negative, then the first strings byte contains - // the string length. - // If the stringOffset is negative, then the first two strings bytes contain - // the string length (big-endian), and the offset needs to be bit-inverted. - // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.) - int32_t stringOffset; - int32_t value; -}; - -void -BytesTrieElement::setTo(StringPiece s, int32_t val, - CharString &strings, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - int32_t length=s.length(); - if(length>0xffff) { - // Too long: We store the length in 1 or 2 bytes. - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - int32_t offset=strings.length(); - if(length>0xff) { - offset=~offset; - strings.append((char)(length>>8), errorCode); - } - strings.append((char)length, errorCode); - stringOffset=offset; - value=val; - strings.append(s, errorCode); -} - -int32_t -BytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const { - // TODO: add StringPiece::compare(), see ticket #8187 - StringPiece thisString=getString(strings); - StringPiece otherString=other.getString(strings); - int32_t lengthDiff=thisString.length()-otherString.length(); - int32_t commonLength; - if(lengthDiff<=0) { - commonLength=thisString.length(); - } else { - commonLength=otherString.length(); - } - int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength); - return diff!=0 ? diff : lengthDiff; -} - -BytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode) - : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0), - bytes(NULL), bytesCapacity(0), bytesLength(0) { - if(U_FAILURE(errorCode)) { - return; - } - strings=new CharString(); - if(strings==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } -} - -BytesTrieBuilder::~BytesTrieBuilder() { - delete strings; - delete[] elements; - uprv_free(bytes); -} - -BytesTrieBuilder & -BytesTrieBuilder::add(StringPiece s, int32_t value, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return *this; - } - if(bytesLength>0) { - // Cannot add elements after building. - errorCode=U_NO_WRITE_PERMISSION; - return *this; - } - if(elementsLength==elementsCapacity) { - int32_t newCapacity; - if(elementsCapacity==0) { - newCapacity=1024; - } else { - newCapacity=4*elementsCapacity; - } - BytesTrieElement *newElements=new BytesTrieElement[newCapacity]; - if(newElements==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - return *this; // error instead of dereferencing null - } - if(elementsLength>0) { - uprv_memcpy(newElements, elements, (size_t)elementsLength*sizeof(BytesTrieElement)); - } - delete[] elements; - elements=newElements; - elementsCapacity=newCapacity; - } - elements[elementsLength++].setTo(s, value, *strings, errorCode); - return *this; -} - -U_CDECL_BEGIN - -static int32_t U_CALLCONV -compareElementStrings(const void *context, const void *left, const void *right) { - const CharString *strings=static_cast<const CharString *>(context); - const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left); - const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right); - return leftElement->compareStringTo(*rightElement, *strings); -} - -U_CDECL_END - -BytesTrie * -BytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - buildBytes(buildOption, errorCode); - BytesTrie *newTrie=NULL; - if(U_SUCCESS(errorCode)) { - newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength)); - if(newTrie==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } else { - bytes=NULL; // The new trie now owns the array. - bytesCapacity=0; - } - } - return newTrie; -} - -StringPiece -BytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - buildBytes(buildOption, errorCode); - StringPiece result; - if(U_SUCCESS(errorCode)) { - result.set(bytes+(bytesCapacity-bytesLength), bytesLength); - } - return result; -} - -void -BytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { - if(U_FAILURE(errorCode)) { - return; - } - if(bytes!=NULL && bytesLength>0) { - // Already built. - return; - } - if(bytesLength==0) { - if(elementsLength==0) { - errorCode=U_INDEX_OUTOFBOUNDS_ERROR; - return; - } - uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), - compareElementStrings, strings, - FALSE, // need not be a stable sort - &errorCode); - if(U_FAILURE(errorCode)) { - return; - } - // Duplicate strings are not allowed. - StringPiece prev=elements[0].getString(*strings); - for(int32_t i=1; i<elementsLength; ++i) { - StringPiece current=elements[i].getString(*strings); - if(prev==current) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - return; - } - prev=current; - } - } - // Create and byte-serialize the trie for the elements. - bytesLength=0; - int32_t capacity=strings->length(); - if(capacity<1024) { - capacity=1024; - } - if(bytesCapacity<capacity) { - uprv_free(bytes); - bytes=static_cast<char *>(uprv_malloc(capacity)); - if(bytes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - bytesCapacity=0; - return; - } - bytesCapacity=capacity; - } - StringTrieBuilder::build(buildOption, elementsLength, errorCode); - if(bytes==NULL) { - errorCode=U_MEMORY_ALLOCATION_ERROR; - } -} - -BytesTrieBuilder & -BytesTrieBuilder::clear() { - strings->clear(); - elementsLength=0; - bytesLength=0; - return *this; -} - -int32_t -BytesTrieBuilder::getElementStringLength(int32_t i) const { - return elements[i].getStringLength(*strings); -} - -UChar -BytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { - return (uint8_t)elements[i].charAt(byteIndex, *strings); -} - -int32_t -BytesTrieBuilder::getElementValue(int32_t i) const { - return elements[i].getValue(); -} - -int32_t -BytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { - const BytesTrieElement &firstElement=elements[first]; - const BytesTrieElement &lastElement=elements[last]; - int32_t minStringLength=firstElement.getStringLength(*strings); - while(++byteIndex<minStringLength && - firstElement.charAt(byteIndex, *strings)== - lastElement.charAt(byteIndex, *strings)) {} - return byteIndex; -} - -int32_t -BytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const { - int32_t length=0; // Number of different bytes at byteIndex. - int32_t i=start; - do { - char byte=elements[i++].charAt(byteIndex, *strings); - while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) { - ++i; - } - ++length; - } while(i<limit); - return length; -} - -int32_t -BytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const { - do { - char byte=elements[i++].charAt(byteIndex, *strings); - while(byte==elements[i].charAt(byteIndex, *strings)) { - ++i; - } - } while(--count>0); - return i; -} - -int32_t -BytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const { - char b=(char)byte; - while(b==elements[i].charAt(byteIndex, *strings)) { - ++i; - } - return i; -} - -BytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) - : LinearMatchNode(len, nextNode), s(bytes) { - hash=static_cast<int32_t>( - static_cast<uint32_t>(hash)*37u + static_cast<uint32_t>(ustr_hashCharsN(bytes, len))); -} - -UBool -BytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const { - if(this==&other) { - return TRUE; - } - if(!LinearMatchNode::operator==(other)) { - return FALSE; - } - const BTLinearMatchNode &o=(const BTLinearMatchNode &)other; - return 0==uprv_memcmp(s, o.s, length); -} - -void -BytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) { - BytesTrieBuilder &b=(BytesTrieBuilder &)builder; - next->write(builder); - b.write(s, length); - offset=b.write(b.getMinLinearMatch()+length-1); -} - -StringTrieBuilder::Node * -BytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, - Node *nextNode) const { - return new BTLinearMatchNode( - elements[i].getString(*strings).data()+byteIndex, - length, - nextNode); -} - -UBool -BytesTrieBuilder::ensureCapacity(int32_t length) { - if(bytes==NULL) { - return FALSE; // previous memory allocation had failed - } - if(length>bytesCapacity) { - int32_t newCapacity=bytesCapacity; - do { - newCapacity*=2; - } while(newCapacity<=length); - char *newBytes=static_cast<char *>(uprv_malloc(newCapacity)); - if(newBytes==NULL) { - // unable to allocate memory - uprv_free(bytes); - bytes=NULL; - bytesCapacity=0; - return FALSE; - } - uprv_memcpy(newBytes+(newCapacity-bytesLength), - bytes+(bytesCapacity-bytesLength), bytesLength); - uprv_free(bytes); - bytes=newBytes; - bytesCapacity=newCapacity; - } - return TRUE; -} - -int32_t -BytesTrieBuilder::write(int32_t byte) { - int32_t newLength=bytesLength+1; - if(ensureCapacity(newLength)) { - bytesLength=newLength; - bytes[bytesCapacity-bytesLength]=(char)byte; - } - return bytesLength; -} - -int32_t -BytesTrieBuilder::write(const char *b, int32_t length) { - int32_t newLength=bytesLength+length; - if(ensureCapacity(newLength)) { - bytesLength=newLength; - uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length); - } - return bytesLength; -} - -int32_t -BytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { - return write(elements[i].getString(*strings).data()+byteIndex, length); -} - -int32_t -BytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { - if(0<=i && i<=BytesTrie::kMaxOneByteValue) { - return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal); - } - char intBytes[5]; - int32_t length=1; - if(i<0 || i>0xffffff) { - intBytes[0]=(char)BytesTrie::kFiveByteValueLead; - intBytes[1]=(char)((uint32_t)i>>24); - intBytes[2]=(char)((uint32_t)i>>16); - intBytes[3]=(char)((uint32_t)i>>8); - intBytes[4]=(char)i; - length=5; - // } else if(i<=BytesTrie::kMaxOneByteValue) { - // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i); - } else { - if(i<=BytesTrie::kMaxTwoByteValue) { - intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8)); - } else { - if(i<=BytesTrie::kMaxThreeByteValue) { - intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16)); - } else { - intBytes[0]=(char)BytesTrie::kFourByteValueLead; - intBytes[1]=(char)(i>>16); - length=2; - } - intBytes[length++]=(char)(i>>8); - } - intBytes[length++]=(char)i; - } - intBytes[0]=(char)((intBytes[0]<<1)|isFinal); - return write(intBytes, length); -} - -int32_t -BytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { - int32_t offset=write(node); - if(hasValue) { - offset=writeValueAndFinal(value, FALSE); - } - return offset; -} - -int32_t -BytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { - int32_t i=bytesLength-jumpTarget; - U_ASSERT(i>=0); - if(i<=BytesTrie::kMaxOneByteDelta) { - return write(i); - } - char intBytes[5]; - int32_t length; - if(i<=BytesTrie::kMaxTwoByteDelta) { - intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); - length=1; - } else { - if(i<=BytesTrie::kMaxThreeByteDelta) { - intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); - length=2; - } else { - if(i<=0xffffff) { - intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; - length=3; - } else { - intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; - intBytes[1]=(char)(i>>24); - length=4; - } - intBytes[1]=(char)(i>>16); - } - intBytes[1]=(char)(i>>8); - } - intBytes[length++]=(char)i; - return write(intBytes, length); -} - -U_NAMESPACE_END |