summaryrefslogtreecommitdiff
path: root/deps/node/deps/icu-small/source/common/umutablecptrie.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/node/deps/icu-small/source/common/umutablecptrie.cpp')
-rw-r--r--deps/node/deps/icu-small/source/common/umutablecptrie.cpp1678
1 files changed, 0 insertions, 1678 deletions
diff --git a/deps/node/deps/icu-small/source/common/umutablecptrie.cpp b/deps/node/deps/icu-small/source/common/umutablecptrie.cpp
deleted file mode 100644
index 40af4b6c..00000000
--- a/deps/node/deps/icu-small/source/common/umutablecptrie.cpp
+++ /dev/null
@@ -1,1678 +0,0 @@
-// © 2017 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-
-// umutablecptrie.cpp (inspired by utrie2_builder.cpp)
-// created: 2017dec29 Markus W. Scherer
-
-// #define UCPTRIE_DEBUG
-#ifdef UCPTRIE_DEBUG
-# include <stdio.h>
-#endif
-
-#include "unicode/utypes.h"
-#include "unicode/ucptrie.h"
-#include "unicode/umutablecptrie.h"
-#include "unicode/uobject.h"
-#include "unicode/utf16.h"
-#include "cmemory.h"
-#include "uassert.h"
-#include "ucptrie_impl.h"
-
-U_NAMESPACE_BEGIN
-
-namespace {
-
-constexpr int32_t MAX_UNICODE = 0x10ffff;
-
-constexpr int32_t UNICODE_LIMIT = 0x110000;
-constexpr int32_t BMP_LIMIT = 0x10000;
-constexpr int32_t ASCII_LIMIT = 0x80;
-
-constexpr int32_t I_LIMIT = UNICODE_LIMIT >> UCPTRIE_SHIFT_3;
-constexpr int32_t BMP_I_LIMIT = BMP_LIMIT >> UCPTRIE_SHIFT_3;
-constexpr int32_t ASCII_I_LIMIT = ASCII_LIMIT >> UCPTRIE_SHIFT_3;
-
-constexpr int32_t SMALL_DATA_BLOCKS_PER_BMP_BLOCK = (1 << (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3));
-
-// Flag values for data blocks.
-constexpr uint8_t ALL_SAME = 0;
-constexpr uint8_t MIXED = 1;
-constexpr uint8_t SAME_AS = 2;
-
-/** Start with allocation of 16k data entries. */
-constexpr int32_t INITIAL_DATA_LENGTH = ((int32_t)1 << 14);
-
-/** Grow about 8x each time. */
-constexpr int32_t MEDIUM_DATA_LENGTH = ((int32_t)1 << 17);
-
-/**
- * Maximum length of the build-time data array.
- * One entry per 0x110000 code points.
- */
-constexpr int32_t MAX_DATA_LENGTH = UNICODE_LIMIT;
-
-// Flag values for index-3 blocks while compacting/building.
-constexpr uint8_t I3_NULL = 0;
-constexpr uint8_t I3_BMP = 1;
-constexpr uint8_t I3_16 = 2;
-constexpr uint8_t I3_18 = 3;
-
-constexpr int32_t INDEX_3_18BIT_BLOCK_LENGTH = UCPTRIE_INDEX_3_BLOCK_LENGTH + UCPTRIE_INDEX_3_BLOCK_LENGTH / 8;
-
-class AllSameBlocks;
-
-class MutableCodePointTrie : public UMemory {
-public:
- MutableCodePointTrie(uint32_t initialValue, uint32_t errorValue, UErrorCode &errorCode);
- MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode);
- MutableCodePointTrie(const MutableCodePointTrie &other) = delete;
- ~MutableCodePointTrie();
-
- MutableCodePointTrie &operator=(const MutableCodePointTrie &other) = delete;
-
- static MutableCodePointTrie *fromUCPMap(const UCPMap *map, UErrorCode &errorCode);
- static MutableCodePointTrie *fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode);
-
- uint32_t get(UChar32 c) const;
- int32_t getRange(UChar32 start, UCPMapValueFilter *filter, const void *context,
- uint32_t *pValue) const;
-
- void set(UChar32 c, uint32_t value, UErrorCode &errorCode);
- void setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode);
-
- UCPTrie *build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode);
-
-private:
- void clear();
-
- bool ensureHighStart(UChar32 c);
- int32_t allocDataBlock(int32_t blockLength);
- int32_t getDataBlock(int32_t i);
-
- void maskValues(uint32_t mask);
- UChar32 findHighStart() const;
- int32_t compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks);
- int32_t compactData(int32_t fastILimit, uint32_t *newData, int32_t dataNullIndex);
- int32_t compactIndex(int32_t fastILimit, UErrorCode &errorCode);
- int32_t compactTrie(int32_t fastILimit, UErrorCode &errorCode);
-
- uint32_t *index = nullptr;
- int32_t indexCapacity = 0;
- int32_t index3NullOffset = -1;
- uint32_t *data = nullptr;
- int32_t dataCapacity = 0;
- int32_t dataLength = 0;
- int32_t dataNullOffset = -1;
-
- uint32_t origInitialValue;
- uint32_t initialValue;
- uint32_t errorValue;
- UChar32 highStart;
- uint32_t highValue;
-#ifdef UCPTRIE_DEBUG
-public:
- const char *name;
-#endif
-private:
- /** Temporary array while building the final data. */
- uint16_t *index16 = nullptr;
- uint8_t flags[UNICODE_LIMIT >> UCPTRIE_SHIFT_3];
-};
-
-MutableCodePointTrie::MutableCodePointTrie(uint32_t iniValue, uint32_t errValue, UErrorCode &errorCode) :
- origInitialValue(iniValue), initialValue(iniValue), errorValue(errValue),
- highStart(0), highValue(initialValue)
-#ifdef UCPTRIE_DEBUG
- , name("open")
-#endif
- {
- if (U_FAILURE(errorCode)) { return; }
- index = (uint32_t *)uprv_malloc(BMP_I_LIMIT * 4);
- data = (uint32_t *)uprv_malloc(INITIAL_DATA_LENGTH * 4);
- if (index == nullptr || data == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- indexCapacity = BMP_I_LIMIT;
- dataCapacity = INITIAL_DATA_LENGTH;
-}
-
-MutableCodePointTrie::MutableCodePointTrie(const MutableCodePointTrie &other, UErrorCode &errorCode) :
- index3NullOffset(other.index3NullOffset),
- dataNullOffset(other.dataNullOffset),
- origInitialValue(other.origInitialValue), initialValue(other.initialValue),
- errorValue(other.errorValue),
- highStart(other.highStart), highValue(other.highValue)
-#ifdef UCPTRIE_DEBUG
- , name("mutable clone")
-#endif
- {
- if (U_FAILURE(errorCode)) { return; }
- int32_t iCapacity = highStart <= BMP_LIMIT ? BMP_I_LIMIT : I_LIMIT;
- index = (uint32_t *)uprv_malloc(iCapacity * 4);
- data = (uint32_t *)uprv_malloc(other.dataCapacity * 4);
- if (index == nullptr || data == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- indexCapacity = iCapacity;
- dataCapacity = other.dataCapacity;
-
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- uprv_memcpy(flags, other.flags, iLimit);
- uprv_memcpy(index, other.index, iLimit * 4);
- uprv_memcpy(data, other.data, (size_t)other.dataLength * 4);
- dataLength = other.dataLength;
- U_ASSERT(other.index16 == nullptr);
-}
-
-MutableCodePointTrie::~MutableCodePointTrie() {
- uprv_free(index);
- uprv_free(data);
- uprv_free(index16);
-}
-
-MutableCodePointTrie *MutableCodePointTrie::fromUCPMap(const UCPMap *map, UErrorCode &errorCode) {
- // Use the highValue as the initialValue to reduce the highStart.
- uint32_t errorValue = ucpmap_get(map, -1);
- uint32_t initialValue = ucpmap_get(map, 0x10ffff);
- LocalPointer<MutableCodePointTrie> mutableTrie(
- new MutableCodePointTrie(initialValue, errorValue, errorCode),
- errorCode);
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- UChar32 start = 0, end;
- uint32_t value;
- while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
- nullptr, nullptr, &value)) >= 0) {
- if (value != initialValue) {
- if (start == end) {
- mutableTrie->set(start, value, errorCode);
- } else {
- mutableTrie->setRange(start, end, value, errorCode);
- }
- }
- start = end + 1;
- }
- if (U_SUCCESS(errorCode)) {
- return mutableTrie.orphan();
- } else {
- return nullptr;
- }
-}
-
-MutableCodePointTrie *MutableCodePointTrie::fromUCPTrie(const UCPTrie *trie, UErrorCode &errorCode) {
- // Use the highValue as the initialValue to reduce the highStart.
- uint32_t errorValue;
- uint32_t initialValue;
- switch (trie->valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- errorValue = trie->data.ptr16[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
- initialValue = trie->data.ptr16[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
- break;
- case UCPTRIE_VALUE_BITS_32:
- errorValue = trie->data.ptr32[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
- initialValue = trie->data.ptr32[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
- break;
- case UCPTRIE_VALUE_BITS_8:
- errorValue = trie->data.ptr8[trie->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET];
- initialValue = trie->data.ptr8[trie->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET];
- break;
- default:
- // Unreachable if the trie is properly initialized.
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- LocalPointer<MutableCodePointTrie> mutableTrie(
- new MutableCodePointTrie(initialValue, errorValue, errorCode),
- errorCode);
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- UChar32 start = 0, end;
- uint32_t value;
- while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
- nullptr, nullptr, &value)) >= 0) {
- if (value != initialValue) {
- if (start == end) {
- mutableTrie->set(start, value, errorCode);
- } else {
- mutableTrie->setRange(start, end, value, errorCode);
- }
- }
- start = end + 1;
- }
- if (U_SUCCESS(errorCode)) {
- return mutableTrie.orphan();
- } else {
- return nullptr;
- }
-}
-
-void MutableCodePointTrie::clear() {
- index3NullOffset = dataNullOffset = -1;
- dataLength = 0;
- highValue = initialValue = origInitialValue;
- highStart = 0;
- uprv_free(index16);
- index16 = nullptr;
-}
-
-uint32_t MutableCodePointTrie::get(UChar32 c) const {
- if ((uint32_t)c > MAX_UNICODE) {
- return errorValue;
- }
- if (c >= highStart) {
- return highValue;
- }
- int32_t i = c >> UCPTRIE_SHIFT_3;
- if (flags[i] == ALL_SAME) {
- return index[i];
- } else {
- return data[index[i] + (c & UCPTRIE_SMALL_DATA_MASK)];
- }
-}
-
-inline uint32_t maybeFilterValue(uint32_t value, uint32_t initialValue, uint32_t nullValue,
- UCPMapValueFilter *filter, const void *context) {
- if (value == initialValue) {
- value = nullValue;
- } else if (filter != nullptr) {
- value = filter(context, value);
- }
- return value;
-}
-
-UChar32 MutableCodePointTrie::getRange(
- UChar32 start, UCPMapValueFilter *filter, const void *context,
- uint32_t *pValue) const {
- if ((uint32_t)start > MAX_UNICODE) {
- return U_SENTINEL;
- }
- if (start >= highStart) {
- if (pValue != nullptr) {
- uint32_t value = highValue;
- if (filter != nullptr) { value = filter(context, value); }
- *pValue = value;
- }
- return MAX_UNICODE;
- }
- uint32_t nullValue = initialValue;
- if (filter != nullptr) { nullValue = filter(context, nullValue); }
- UChar32 c = start;
- uint32_t value;
- bool haveValue = false;
- int32_t i = c >> UCPTRIE_SHIFT_3;
- do {
- if (flags[i] == ALL_SAME) {
- uint32_t value2 = maybeFilterValue(index[i], initialValue, nullValue,
- filter, context);
- if (haveValue) {
- if (value2 != value) {
- return c - 1;
- }
- } else {
- value = value2;
- if (pValue != nullptr) { *pValue = value; }
- haveValue = true;
- }
- c = (c + UCPTRIE_SMALL_DATA_BLOCK_LENGTH) & ~UCPTRIE_SMALL_DATA_MASK;
- } else /* MIXED */ {
- int32_t di = index[i] + (c & UCPTRIE_SMALL_DATA_MASK);
- uint32_t value2 = maybeFilterValue(data[di], initialValue, nullValue,
- filter, context);
- if (haveValue) {
- if (value2 != value) {
- return c - 1;
- }
- } else {
- value = value2;
- if (pValue != nullptr) { *pValue = value; }
- haveValue = true;
- }
- while ((++c & UCPTRIE_SMALL_DATA_MASK) != 0) {
- if (maybeFilterValue(data[++di], initialValue, nullValue,
- filter, context) != value) {
- return c - 1;
- }
- }
- }
- ++i;
- } while (c < highStart);
- U_ASSERT(haveValue);
- if (maybeFilterValue(highValue, initialValue, nullValue,
- filter, context) != value) {
- return c - 1;
- } else {
- return MAX_UNICODE;
- }
-}
-
-void
-writeBlock(uint32_t *block, uint32_t value) {
- uint32_t *limit = block + UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- while (block < limit) {
- *block++ = value;
- }
-}
-
-bool MutableCodePointTrie::ensureHighStart(UChar32 c) {
- if (c >= highStart) {
- // Round up to a UCPTRIE_CP_PER_INDEX_2_ENTRY boundary to simplify compaction.
- c = (c + UCPTRIE_CP_PER_INDEX_2_ENTRY) & ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
- int32_t i = highStart >> UCPTRIE_SHIFT_3;
- int32_t iLimit = c >> UCPTRIE_SHIFT_3;
- if (iLimit > indexCapacity) {
- uint32_t *newIndex = (uint32_t *)uprv_malloc(I_LIMIT * 4);
- if (newIndex == nullptr) { return false; }
- uprv_memcpy(newIndex, index, i * 4);
- uprv_free(index);
- index = newIndex;
- indexCapacity = I_LIMIT;
- }
- do {
- flags[i] = ALL_SAME;
- index[i] = initialValue;
- } while(++i < iLimit);
- highStart = c;
- }
- return true;
-}
-
-int32_t MutableCodePointTrie::allocDataBlock(int32_t blockLength) {
- int32_t newBlock = dataLength;
- int32_t newTop = newBlock + blockLength;
- if (newTop > dataCapacity) {
- int32_t capacity;
- if (dataCapacity < MEDIUM_DATA_LENGTH) {
- capacity = MEDIUM_DATA_LENGTH;
- } else if (dataCapacity < MAX_DATA_LENGTH) {
- capacity = MAX_DATA_LENGTH;
- } else {
- // Should never occur.
- // Either MAX_DATA_LENGTH is incorrect,
- // or the code writes more values than should be possible.
- return -1;
- }
- uint32_t *newData = (uint32_t *)uprv_malloc(capacity * 4);
- if (newData == nullptr) {
- return -1;
- }
- uprv_memcpy(newData, data, (size_t)dataLength * 4);
- uprv_free(data);
- data = newData;
- dataCapacity = capacity;
- }
- dataLength = newTop;
- return newBlock;
-}
-
-/**
- * No error checking for illegal arguments.
- *
- * @return -1 if no new data block available (out of memory in data array)
- * @internal
- */
-int32_t MutableCodePointTrie::getDataBlock(int32_t i) {
- if (flags[i] == MIXED) {
- return index[i];
- }
- if (i < BMP_I_LIMIT) {
- int32_t newBlock = allocDataBlock(UCPTRIE_FAST_DATA_BLOCK_LENGTH);
- if (newBlock < 0) { return newBlock; }
- int32_t iStart = i & ~(SMALL_DATA_BLOCKS_PER_BMP_BLOCK -1);
- int32_t iLimit = iStart + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- do {
- U_ASSERT(flags[iStart] == ALL_SAME);
- writeBlock(data + newBlock, index[iStart]);
- flags[iStart] = MIXED;
- index[iStart++] = newBlock;
- newBlock += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- } while (iStart < iLimit);
- return index[i];
- } else {
- int32_t newBlock = allocDataBlock(UCPTRIE_SMALL_DATA_BLOCK_LENGTH);
- if (newBlock < 0) { return newBlock; }
- writeBlock(data + newBlock, index[i]);
- flags[i] = MIXED;
- index[i] = newBlock;
- return newBlock;
- }
-}
-
-void MutableCodePointTrie::set(UChar32 c, uint32_t value, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if ((uint32_t)c > MAX_UNICODE) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- int32_t block;
- if (!ensureHighStart(c) || (block = getDataBlock(c >> UCPTRIE_SHIFT_3)) < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- data[block + (c & UCPTRIE_SMALL_DATA_MASK)] = value;
-}
-
-void
-fillBlock(uint32_t *block, UChar32 start, UChar32 limit, uint32_t value) {
- uint32_t *pLimit = block + limit;
- block += start;
- while (block < pLimit) {
- *block++ = value;
- }
-}
-
-void MutableCodePointTrie::setRange(UChar32 start, UChar32 end, uint32_t value, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return;
- }
- if ((uint32_t)start > MAX_UNICODE || (uint32_t)end > MAX_UNICODE || start > end) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
- if (!ensureHighStart(end)) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- UChar32 limit = end + 1;
- if (start & UCPTRIE_SMALL_DATA_MASK) {
- // Set partial block at [start..following block boundary[.
- int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
- if (block < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- UChar32 nextStart = (start + UCPTRIE_SMALL_DATA_MASK) & ~UCPTRIE_SMALL_DATA_MASK;
- if (nextStart <= limit) {
- fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, UCPTRIE_SMALL_DATA_BLOCK_LENGTH,
- value);
- start = nextStart;
- } else {
- fillBlock(data + block, start & UCPTRIE_SMALL_DATA_MASK, limit & UCPTRIE_SMALL_DATA_MASK,
- value);
- return;
- }
- }
-
- // Number of positions in the last, partial block.
- int32_t rest = limit & UCPTRIE_SMALL_DATA_MASK;
-
- // Round down limit to a block boundary.
- limit &= ~UCPTRIE_SMALL_DATA_MASK;
-
- // Iterate over all-value blocks.
- while (start < limit) {
- int32_t i = start >> UCPTRIE_SHIFT_3;
- if (flags[i] == ALL_SAME) {
- index[i] = value;
- } else /* MIXED */ {
- fillBlock(data + index[i], 0, UCPTRIE_SMALL_DATA_BLOCK_LENGTH, value);
- }
- start += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- }
-
- if (rest > 0) {
- // Set partial block at [last block boundary..limit[.
- int32_t block = getDataBlock(start >> UCPTRIE_SHIFT_3);
- if (block < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- fillBlock(data + block, 0, rest, value);
- }
-}
-
-/* compaction --------------------------------------------------------------- */
-
-void MutableCodePointTrie::maskValues(uint32_t mask) {
- initialValue &= mask;
- errorValue &= mask;
- highValue &= mask;
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- for (int32_t i = 0; i < iLimit; ++i) {
- if (flags[i] == ALL_SAME) {
- index[i] &= mask;
- }
- }
- for (int32_t i = 0; i < dataLength; ++i) {
- data[i] &= mask;
- }
-}
-
-inline bool
-equalBlocks(const uint32_t *s, const uint32_t *t, int32_t length) {
- while (length > 0 && *s == *t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
-}
-
-inline bool
-equalBlocks(const uint16_t *s, const uint32_t *t, int32_t length) {
- while (length > 0 && *s == *t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
-}
-
-inline bool
-equalBlocks(const uint16_t *s, const uint16_t *t, int32_t length) {
- while (length > 0 && *s == *t) {
- ++s;
- ++t;
- --length;
- }
- return length == 0;
-}
-
-bool allValuesSameAs(const uint32_t *p, int32_t length, uint32_t value) {
- const uint32_t *pLimit = p + length;
- while (p < pLimit && *p == value) { ++p; }
- return p == pLimit;
-}
-
-/** Search for an identical block. */
-int32_t findSameBlock(const uint32_t *p, int32_t pStart, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- // Ensure that we do not even partially get past length.
- length -= blockLength;
-
- q += qStart;
- while (pStart <= length) {
- if (equalBlocks(p + pStart, q, blockLength)) {
- return pStart;
- }
- ++pStart;
- }
- return -1;
-}
-
-int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- // Ensure that we do not even partially get past length.
- length -= blockLength;
-
- q += qStart;
- while (pStart <= length) {
- if (equalBlocks(p + pStart, q, blockLength)) {
- return pStart;
- }
- ++pStart;
- }
- return -1;
-}
-
-int32_t findSameBlock(const uint16_t *p, int32_t pStart, int32_t length,
- const uint16_t *q, int32_t qStart, int32_t blockLength) {
- // Ensure that we do not even partially get past length.
- length -= blockLength;
-
- q += qStart;
- while (pStart <= length) {
- if (equalBlocks(p + pStart, q, blockLength)) {
- return pStart;
- }
- ++pStart;
- }
- return -1;
-}
-
-int32_t findAllSameBlock(const uint32_t *p, int32_t start, int32_t limit,
- uint32_t value, int32_t blockLength) {
- // Ensure that we do not even partially get past limit.
- limit -= blockLength;
-
- for (int32_t block = start; block <= limit; ++block) {
- if (p[block] == value) {
- for (int32_t i = 1;; ++i) {
- if (i == blockLength) {
- return block;
- }
- if (p[block + i] != value) {
- block += i;
- break;
- }
- }
- }
- }
- return -1;
-}
-
-/**
- * Look for maximum overlap of the beginning of the other block
- * with the previous, adjacent block.
- */
-int32_t getOverlap(const uint32_t *p, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- int32_t overlap = blockLength - 1;
- U_ASSERT(overlap <= length);
- q += qStart;
- while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
- --overlap;
- }
- return overlap;
-}
-
-int32_t getOverlap(const uint16_t *p, int32_t length,
- const uint32_t *q, int32_t qStart, int32_t blockLength) {
- int32_t overlap = blockLength - 1;
- U_ASSERT(overlap <= length);
- q += qStart;
- while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
- --overlap;
- }
- return overlap;
-}
-
-int32_t getOverlap(const uint16_t *p, int32_t length,
- const uint16_t *q, int32_t qStart, int32_t blockLength) {
- int32_t overlap = blockLength - 1;
- U_ASSERT(overlap <= length);
- q += qStart;
- while (overlap > 0 && !equalBlocks(p + (length - overlap), q, overlap)) {
- --overlap;
- }
- return overlap;
-}
-
-int32_t getAllSameOverlap(const uint32_t *p, int32_t length, uint32_t value,
- int32_t blockLength) {
- int32_t min = length - (blockLength - 1);
- int32_t i = length;
- while (min < i && p[i - 1] == value) { --i; }
- return length - i;
-}
-
-bool isStartOfSomeFastBlock(uint32_t dataOffset, const uint32_t index[], int32_t fastILimit) {
- for (int32_t i = 0; i < fastILimit; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
- if (index[i] == dataOffset) {
- return true;
- }
- }
- return false;
-}
-
-/**
- * Finds the start of the last range in the trie by enumerating backward.
- * Indexes for code points higher than this will be omitted.
- */
-UChar32 MutableCodePointTrie::findHighStart() const {
- int32_t i = highStart >> UCPTRIE_SHIFT_3;
- while (i > 0) {
- bool match;
- if (flags[--i] == ALL_SAME) {
- match = index[i] == highValue;
- } else /* MIXED */ {
- const uint32_t *p = data + index[i];
- for (int32_t j = 0;; ++j) {
- if (j == UCPTRIE_SMALL_DATA_BLOCK_LENGTH) {
- match = true;
- break;
- }
- if (p[j] != highValue) {
- match = false;
- break;
- }
- }
- }
- if (!match) {
- return (i + 1) << UCPTRIE_SHIFT_3;
- }
- }
- return 0;
-}
-
-class AllSameBlocks {
-public:
- static constexpr int32_t NEW_UNIQUE = -1;
- static constexpr int32_t OVERFLOW = -2;
-
- AllSameBlocks() : length(0), mostRecent(-1) {}
-
- int32_t findOrAdd(int32_t index, int32_t count, uint32_t value) {
- if (mostRecent >= 0 && values[mostRecent] == value) {
- refCounts[mostRecent] += count;
- return indexes[mostRecent];
- }
- for (int32_t i = 0; i < length; ++i) {
- if (values[i] == value) {
- mostRecent = i;
- refCounts[i] += count;
- return indexes[i];
- }
- }
- if (length == CAPACITY) {
- return OVERFLOW;
- }
- mostRecent = length;
- indexes[length] = index;
- values[length] = value;
- refCounts[length++] = count;
- return NEW_UNIQUE;
- }
-
- /** Replaces the block which has the lowest reference count. */
- void add(int32_t index, int32_t count, uint32_t value) {
- U_ASSERT(length == CAPACITY);
- int32_t least = -1;
- int32_t leastCount = I_LIMIT;
- for (int32_t i = 0; i < length; ++i) {
- U_ASSERT(values[i] != value);
- if (refCounts[i] < leastCount) {
- least = i;
- leastCount = refCounts[i];
- }
- }
- U_ASSERT(least >= 0);
- mostRecent = least;
- indexes[least] = index;
- values[least] = value;
- refCounts[least] = count;
- }
-
- int32_t findMostUsed() const {
- if (length == 0) { return -1; }
- int32_t max = -1;
- int32_t maxCount = 0;
- for (int32_t i = 0; i < length; ++i) {
- if (refCounts[i] > maxCount) {
- max = i;
- maxCount = refCounts[i];
- }
- }
- return indexes[max];
- }
-
-private:
- static constexpr int32_t CAPACITY = 32;
-
- int32_t length;
- int32_t mostRecent;
-
- int32_t indexes[CAPACITY];
- uint32_t values[CAPACITY];
- int32_t refCounts[CAPACITY];
-};
-
-int32_t MutableCodePointTrie::compactWholeDataBlocks(int32_t fastILimit, AllSameBlocks &allSameBlocks) {
-#ifdef UCPTRIE_DEBUG
- bool overflow = false;
-#endif
-
- // ASCII data will be stored as a linear table, even if the following code
- // does not yet count it that way.
- int32_t newDataCapacity = ASCII_LIMIT;
- // Add room for a small data null block in case it would match the start of
- // a fast data block where dataNullOffset must not be set in that case.
- newDataCapacity += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- // Add room for special values (errorValue, highValue) and padding.
- newDataCapacity += 4;
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
- int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- for (int32_t i = 0; i < iLimit; i += inc) {
- if (i == fastILimit) {
- blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- inc = 1;
- }
- uint32_t value = index[i];
- if (flags[i] == MIXED) {
- // Really mixed?
- const uint32_t *p = data + value;
- value = *p;
- if (allValuesSameAs(p + 1, blockLength - 1, value)) {
- flags[i] = ALL_SAME;
- index[i] = value;
- // Fall through to ALL_SAME handling.
- } else {
- newDataCapacity += blockLength;
- continue;
- }
- } else {
- U_ASSERT(flags[i] == ALL_SAME);
- if (inc > 1) {
- // Do all of the fast-range data block's ALL_SAME parts have the same value?
- bool allSame = true;
- int32_t next_i = i + inc;
- for (int32_t j = i + 1; j < next_i; ++j) {
- U_ASSERT(flags[j] == ALL_SAME);
- if (index[j] != value) {
- allSame = false;
- break;
- }
- }
- if (!allSame) {
- // Turn it into a MIXED block.
- if (getDataBlock(i) < 0) {
- return -1;
- }
- newDataCapacity += blockLength;
- continue;
- }
- }
- }
- // Is there another ALL_SAME block with the same value?
- int32_t other = allSameBlocks.findOrAdd(i, inc, value);
- if (other == AllSameBlocks::OVERFLOW) {
- // The fixed-size array overflowed. Slow check for a duplicate block.
-#ifdef UCPTRIE_DEBUG
- if (!overflow) {
- puts("UCPTrie AllSameBlocks overflow");
- overflow = true;
- }
-#endif
- int32_t jInc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- for (int32_t j = 0;; j += jInc) {
- if (j == i) {
- allSameBlocks.add(i, inc, value);
- break;
- }
- if (j == fastILimit) {
- jInc = 1;
- }
- if (flags[j] == ALL_SAME && index[j] == value) {
- allSameBlocks.add(j, jInc + inc, value);
- other = j;
- break;
- // We could keep counting blocks with the same value
- // before we add the first one, which may improve compaction in rare cases,
- // but it would make it slower.
- }
- }
- }
- if (other >= 0) {
- flags[i] = SAME_AS;
- index[i] = other;
- } else {
- // New unique same-value block.
- newDataCapacity += blockLength;
- }
- }
- return newDataCapacity;
-}
-
-#ifdef UCPTRIE_DEBUG
-# define DEBUG_DO(expr) expr
-#else
-# define DEBUG_DO(expr)
-#endif
-
-#ifdef UCPTRIE_DEBUG
-// Braille symbols: U+28xx = UTF-8 E2 A0 80..E2 A3 BF
-int32_t appendValue(char s[], int32_t length, uint32_t value) {
- value ^= value >> 16;
- value ^= value >> 8;
- s[length] = 0xE2;
- s[length + 1] = (char)(0xA0 + ((value >> 6) & 3));
- s[length + 2] = (char)(0x80 + (value & 0x3F));
- return length + 3;
-}
-
-void printBlock(const uint32_t *block, int32_t blockLength, uint32_t value,
- UChar32 start, int32_t overlap, uint32_t initialValue) {
- char s[UCPTRIE_FAST_DATA_BLOCK_LENGTH * 3 + 3];
- int32_t length = 0;
- int32_t i;
- for (i = 0; i < overlap; ++i) {
- length = appendValue(s, length, 0); // Braille blank
- }
- s[length++] = '|';
- for (; i < blockLength; ++i) {
- if (block != nullptr) {
- value = block[i];
- }
- if (value == initialValue) {
- value = 0x40; // Braille lower left dot
- }
- length = appendValue(s, length, value);
- }
- s[length] = 0;
- start += overlap;
- if (start <= 0xffff) {
- printf(" %04lX %s|\n", (long)start, s);
- } else if (start <= 0xfffff) {
- printf(" %5lX %s|\n", (long)start, s);
- } else {
- printf(" %6lX %s|\n", (long)start, s);
- }
-}
-#endif
-
-/**
- * Compacts a build-time trie.
- *
- * The compaction
- * - removes blocks that are identical with earlier ones
- * - overlaps each new non-duplicate block as much as possible with the previously-written one
- * - works with fast-range data blocks whose length is a multiple of that of
- * higher-code-point data blocks
- *
- * It does not try to find an optimal order of writing, deduplicating, and overlapping blocks.
- */
-int32_t MutableCodePointTrie::compactData(int32_t fastILimit,
- uint32_t *newData, int32_t dataNullIndex) {
-#ifdef UCPTRIE_DEBUG
- int32_t countSame=0, sumOverlaps=0;
- bool printData = dataLength == 29088 /* line.brk */ ||
- // dataLength == 30048 /* CanonIterData */ ||
- dataLength == 50400 /* zh.txt~stroke */;
-#endif
-
- // The linear ASCII data has been copied into newData already.
- int32_t newDataLength = 0;
- for (int32_t i = 0; newDataLength < ASCII_LIMIT;
- newDataLength += UCPTRIE_FAST_DATA_BLOCK_LENGTH, i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK) {
- index[i] = newDataLength;
-#ifdef UCPTRIE_DEBUG
- if (printData) {
- printBlock(newData + newDataLength, UCPTRIE_FAST_DATA_BLOCK_LENGTH, 0, newDataLength, 0, initialValue);
- }
-#endif
- }
-
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- int32_t blockLength = UCPTRIE_FAST_DATA_BLOCK_LENGTH;
- int32_t inc = SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- int32_t fastLength = 0;
- for (int32_t i = ASCII_I_LIMIT; i < iLimit; i += inc) {
- if (i == fastILimit) {
- blockLength = UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- inc = 1;
- fastLength = newDataLength;
- }
- if (flags[i] == ALL_SAME) {
- uint32_t value = index[i];
- int32_t n;
- // Find an earlier part of the data array of length blockLength
- // that is filled with this value.
- // If we find a match, and the current block is the data null block,
- // and it is not a fast block but matches the start of a fast block,
- // then we need to continue looking.
- // This is because this small block is shorter than the fast block,
- // and not all of the rest of the fast block is filled with this value.
- // Otherwise trie.getRange() would detect that the fast block starts at
- // dataNullOffset and assume incorrectly that it is filled with the null value.
- for (int32_t start = 0;
- (n = findAllSameBlock(newData, start, newDataLength,
- value, blockLength)) >= 0 &&
- i == dataNullIndex && i >= fastILimit && n < fastLength &&
- isStartOfSomeFastBlock(n, index, fastILimit);
- start = n + 1) {}
- if (n >= 0) {
- DEBUG_DO(++countSame);
- index[i] = n;
- } else {
- n = getAllSameOverlap(newData, newDataLength, value, blockLength);
- DEBUG_DO(sumOverlaps += n);
-#ifdef UCPTRIE_DEBUG
- if (printData) {
- printBlock(nullptr, blockLength, value, i << UCPTRIE_SHIFT_3, n, initialValue);
- }
-#endif
- index[i] = newDataLength - n;
- while (n < blockLength) {
- newData[newDataLength++] = value;
- ++n;
- }
- }
- } else if (flags[i] == MIXED) {
- const uint32_t *block = data + index[i];
- int32_t n = findSameBlock(newData, 0, newDataLength, block, 0, blockLength);
- if (n >= 0) {
- DEBUG_DO(++countSame);
- index[i] = n;
- } else {
- n = getOverlap(newData, newDataLength, block, 0, blockLength);
- DEBUG_DO(sumOverlaps += n);
-#ifdef UCPTRIE_DEBUG
- if (printData) {
- printBlock(block, blockLength, 0, i << UCPTRIE_SHIFT_3, n, initialValue);
- }
-#endif
- index[i] = newDataLength - n;
- while (n < blockLength) {
- newData[newDataLength++] = block[n++];
- }
- }
- } else /* SAME_AS */ {
- uint32_t j = index[i];
- index[i] = index[j];
- }
- }
-
-#ifdef UCPTRIE_DEBUG
- /* we saved some space */
- printf("compacting UCPTrie: count of 32-bit data words %lu->%lu countSame=%ld sumOverlaps=%ld\n",
- (long)dataLength, (long)newDataLength, (long)countSame, (long)sumOverlaps);
-#endif
- return newDataLength;
-}
-
-int32_t MutableCodePointTrie::compactIndex(int32_t fastILimit, UErrorCode &errorCode) {
- int32_t fastIndexLength = fastILimit >> (UCPTRIE_FAST_SHIFT - UCPTRIE_SHIFT_3);
- if ((highStart >> UCPTRIE_FAST_SHIFT) <= fastIndexLength) {
- // Only the linear fast index, no multi-stage index tables.
- index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
- return fastIndexLength;
- }
-
- // Condense the fast index table.
- // Also, does it contain an index-3 block with all dataNullOffset?
- uint16_t fastIndex[UCPTRIE_BMP_INDEX_LENGTH]; // fastIndexLength
- int32_t i3FirstNull = -1;
- for (int32_t i = 0, j = 0; i < fastILimit; ++j) {
- uint32_t i3 = index[i];
- fastIndex[j] = (uint16_t)i3;
- if (i3 == (uint32_t)dataNullOffset) {
- if (i3FirstNull < 0) {
- i3FirstNull = j;
- } else if (index3NullOffset < 0 &&
- (j - i3FirstNull + 1) == UCPTRIE_INDEX_3_BLOCK_LENGTH) {
- index3NullOffset = i3FirstNull;
- }
- } else {
- i3FirstNull = -1;
- }
- // Set the index entries that compactData() skipped.
- // Needed when the multi-stage index covers the fast index range as well.
- int32_t iNext = i + SMALL_DATA_BLOCKS_PER_BMP_BLOCK;
- while (++i < iNext) {
- i3 += UCPTRIE_SMALL_DATA_BLOCK_LENGTH;
- index[i] = i3;
- }
- }
-
- // Examine index-3 blocks. For each determine one of:
- // - same as the index-3 null block
- // - same as a fast-index block
- // - 16-bit indexes
- // - 18-bit indexes
- // We store this in the first flags entry for the index-3 block.
- //
- // Also determine an upper limit for the index-3 table length.
- int32_t index3Capacity = 0;
- i3FirstNull = index3NullOffset;
- // If the fast index covers the whole BMP, then
- // the multi-stage index is only for supplementary code points.
- // Otherwise, the multi-stage index covers all of Unicode.
- int32_t iStart = fastILimit < BMP_I_LIMIT ? 0 : BMP_I_LIMIT;
- int32_t iLimit = highStart >> UCPTRIE_SHIFT_3;
- for (int32_t i = iStart; i < iLimit;) {
- int32_t j = i;
- int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
- uint32_t oredI3 = 0;
- bool isNull = true;
- do {
- uint32_t i3 = index[j];
- oredI3 |= i3;
- if (i3 != (uint32_t)dataNullOffset) {
- isNull = false;
- }
- } while (++j < jLimit);
- if (isNull) {
- flags[i] = I3_NULL;
- if (i3FirstNull < 0) {
- if (oredI3 <= 0xffff) {
- index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
- } else {
- index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
- }
- i3FirstNull = 0;
- }
- } else {
- if (oredI3 <= 0xffff) {
- int32_t n = findSameBlock(fastIndex, 0, fastIndexLength,
- index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
- if (n >= 0) {
- flags[i] = I3_BMP;
- index[i] = n;
- } else {
- flags[i] = I3_16;
- index3Capacity += UCPTRIE_INDEX_3_BLOCK_LENGTH;
- }
- } else {
- flags[i] = I3_18;
- index3Capacity += INDEX_3_18BIT_BLOCK_LENGTH;
- }
- }
- i = j;
- }
-
- int32_t index2Capacity = (iLimit - iStart) >> UCPTRIE_SHIFT_2_3;
-
- // Length of the index-1 table, rounded up.
- int32_t index1Length = (index2Capacity + UCPTRIE_INDEX_2_MASK) >> UCPTRIE_SHIFT_1_2;
-
- // Index table: Fast index, index-1, index-3, index-2.
- // +1 for possible index table padding.
- int32_t index16Capacity = fastIndexLength + index1Length + index3Capacity + index2Capacity + 1;
- index16 = (uint16_t *)uprv_malloc(index16Capacity * 2);
- if (index16 == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uprv_memcpy(index16, fastIndex, fastIndexLength * 2);
-
- // Compact the index-3 table and write an uncompacted version of the index-2 table.
- uint16_t index2[UNICODE_LIMIT >> UCPTRIE_SHIFT_2]; // index2Capacity
- int32_t i2Length = 0;
- i3FirstNull = index3NullOffset;
- int32_t index3Start = fastIndexLength + index1Length;
- int32_t indexLength = index3Start;
- for (int32_t i = iStart; i < iLimit; i += UCPTRIE_INDEX_3_BLOCK_LENGTH) {
- int32_t i3;
- uint8_t f = flags[i];
- if (f == I3_NULL && i3FirstNull < 0) {
- // First index-3 null block. Write & overlap it like a normal block, then remember it.
- f = dataNullOffset <= 0xffff ? I3_16 : I3_18;
- i3FirstNull = 0;
- }
- if (f == I3_NULL) {
- i3 = index3NullOffset;
- } else if (f == I3_BMP) {
- i3 = index[i];
- } else if (f == I3_16) {
- int32_t n = findSameBlock(index16, index3Start, indexLength,
- index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
- if (n >= 0) {
- i3 = n;
- } else {
- if (indexLength == index3Start) {
- // No overlap at the boundary between the index-1 and index-3 tables.
- n = 0;
- } else {
- n = getOverlap(index16, indexLength,
- index, i, UCPTRIE_INDEX_3_BLOCK_LENGTH);
- }
- i3 = indexLength - n;
- while (n < UCPTRIE_INDEX_3_BLOCK_LENGTH) {
- index16[indexLength++] = index[i + n++];
- }
- }
- } else {
- U_ASSERT(f == I3_18);
- // Encode an index-3 block that contains one or more data indexes exceeding 16 bits.
- int32_t j = i;
- int32_t jLimit = i + UCPTRIE_INDEX_3_BLOCK_LENGTH;
- int32_t k = indexLength;
- do {
- ++k;
- uint32_t v = index[j++];
- uint32_t upperBits = (v & 0x30000) >> 2;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 4;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 6;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 8;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 10;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 12;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 14;
- index16[k++] = v;
- v = index[j++];
- upperBits |= (v & 0x30000) >> 16;
- index16[k++] = v;
- index16[k - 9] = upperBits;
- } while (j < jLimit);
- int32_t n = findSameBlock(index16, index3Start, indexLength,
- index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
- if (n >= 0) {
- i3 = n | 0x8000;
- } else {
- if (indexLength == index3Start) {
- // No overlap at the boundary between the index-1 and index-3 tables.
- n = 0;
- } else {
- n = getOverlap(index16, indexLength,
- index16, indexLength, INDEX_3_18BIT_BLOCK_LENGTH);
- }
- i3 = (indexLength - n) | 0x8000;
- if (n > 0) {
- int32_t start = indexLength;
- while (n < INDEX_3_18BIT_BLOCK_LENGTH) {
- index16[indexLength++] = index16[start + n++];
- }
- } else {
- indexLength += INDEX_3_18BIT_BLOCK_LENGTH;
- }
- }
- }
- if (index3NullOffset < 0 && i3FirstNull >= 0) {
- index3NullOffset = i3;
- }
- // Set the index-2 table entry.
- index2[i2Length++] = i3;
- }
- U_ASSERT(i2Length == index2Capacity);
- U_ASSERT(indexLength <= index3Start + index3Capacity);
-
- if (index3NullOffset < 0) {
- index3NullOffset = UCPTRIE_NO_INDEX3_NULL_OFFSET;
- }
- if (indexLength >= (UCPTRIE_NO_INDEX3_NULL_OFFSET + UCPTRIE_INDEX_3_BLOCK_LENGTH)) {
- // The index-3 offsets exceed 15 bits, or
- // the last one cannot be distinguished from the no-null-block value.
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- // Compact the index-2 table and write the index-1 table.
- int32_t blockLength = UCPTRIE_INDEX_2_BLOCK_LENGTH;
- int32_t i1 = fastIndexLength;
- for (int32_t i = 0; i < i2Length; i += blockLength) {
- if ((i2Length - i) < blockLength) {
- // highStart is inside the last index-2 block. Shorten it.
- blockLength = i2Length - i;
- }
- int32_t i2;
- int32_t n = findSameBlock(index16, index3Start, indexLength,
- index2, i, blockLength);
- if (n >= 0) {
- i2 = n;
- } else {
- if (indexLength == index3Start) {
- // No overlap at the boundary between the index-1 and index-3/2 tables.
- n = 0;
- } else {
- n = getOverlap(index16, indexLength, index2, i, blockLength);
- }
- i2 = indexLength - n;
- while (n < blockLength) {
- index16[indexLength++] = index2[i + n++];
- }
- }
- // Set the index-1 table entry.
- index16[i1++] = i2;
- }
- U_ASSERT(i1 == index3Start);
- U_ASSERT(indexLength <= index16Capacity);
-
-#ifdef UCPTRIE_DEBUG
- /* we saved some space */
- printf("compacting UCPTrie: count of 16-bit index words %lu->%lu\n",
- (long)iLimit, (long)indexLength);
-#endif
-
- return indexLength;
-}
-
-int32_t MutableCodePointTrie::compactTrie(int32_t fastILimit, UErrorCode &errorCode) {
- // Find the real highStart and round it up.
- U_ASSERT((highStart & (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) == 0);
- highValue = get(MAX_UNICODE);
- int32_t realHighStart = findHighStart();
- realHighStart = (realHighStart + (UCPTRIE_CP_PER_INDEX_2_ENTRY - 1)) &
- ~(UCPTRIE_CP_PER_INDEX_2_ENTRY - 1);
- if (realHighStart == UNICODE_LIMIT) {
- highValue = initialValue;
- }
-
-#ifdef UCPTRIE_DEBUG
- printf("UCPTrie: highStart U+%06lx highValue 0x%lx initialValue 0x%lx\n",
- (long)realHighStart, (long)highValue, (long)initialValue);
-#endif
-
- // We always store indexes and data values for the fast range.
- // Pin highStart to the top of that range while building.
- UChar32 fastLimit = fastILimit << UCPTRIE_SHIFT_3;
- if (realHighStart < fastLimit) {
- for (int32_t i = (realHighStart >> UCPTRIE_SHIFT_3); i < fastILimit; ++i) {
- flags[i] = ALL_SAME;
- index[i] = highValue;
- }
- highStart = fastLimit;
- } else {
- highStart = realHighStart;
- }
-
- uint32_t asciiData[ASCII_LIMIT];
- for (int32_t i = 0; i < ASCII_LIMIT; ++i) {
- asciiData[i] = get(i);
- }
-
- // First we look for which data blocks have the same value repeated over the whole block,
- // deduplicate such blocks, find a good null data block (for faster enumeration),
- // and get an upper bound for the necessary data array length.
- AllSameBlocks allSameBlocks;
- int32_t newDataCapacity = compactWholeDataBlocks(fastILimit, allSameBlocks);
- if (newDataCapacity < 0) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uint32_t *newData = (uint32_t *)uprv_malloc(newDataCapacity * 4);
- if (newData == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- uprv_memcpy(newData, asciiData, sizeof(asciiData));
-
- int32_t dataNullIndex = allSameBlocks.findMostUsed();
- int32_t newDataLength = compactData(fastILimit, newData, dataNullIndex);
- U_ASSERT(newDataLength <= newDataCapacity);
- uprv_free(data);
- data = newData;
- dataCapacity = newDataCapacity;
- dataLength = newDataLength;
- if (dataLength > (0x3ffff + UCPTRIE_SMALL_DATA_BLOCK_LENGTH)) {
- // The offset of the last data block is too high to be stored in the index table.
- errorCode = U_INDEX_OUTOFBOUNDS_ERROR;
- return 0;
- }
-
- if (dataNullIndex >= 0) {
- dataNullOffset = index[dataNullIndex];
-#ifdef UCPTRIE_DEBUG
- if (data[dataNullOffset] != initialValue) {
- printf("UCPTrie initialValue %lx -> more common nullValue %lx\n",
- (long)initialValue, (long)data[dataNullOffset]);
- }
-#endif
- initialValue = data[dataNullOffset];
- } else {
- dataNullOffset = UCPTRIE_NO_DATA_NULL_OFFSET;
- }
-
- int32_t indexLength = compactIndex(fastILimit, errorCode);
- highStart = realHighStart;
- return indexLength;
-}
-
-UCPTrie *MutableCodePointTrie::build(UCPTrieType type, UCPTrieValueWidth valueWidth, UErrorCode &errorCode) {
- if (U_FAILURE(errorCode)) {
- return nullptr;
- }
- if (type < UCPTRIE_TYPE_FAST || UCPTRIE_TYPE_SMALL < type ||
- valueWidth < UCPTRIE_VALUE_BITS_16 || UCPTRIE_VALUE_BITS_8 < valueWidth) {
- errorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
-
- // The mutable trie always stores 32-bit values.
- // When we build a UCPTrie for a smaller value width, we first mask off unused bits
- // before compacting the data.
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_32:
- break;
- case UCPTRIE_VALUE_BITS_16:
- maskValues(0xffff);
- break;
- case UCPTRIE_VALUE_BITS_8:
- maskValues(0xff);
- break;
- default:
- break;
- }
-
- UChar32 fastLimit = type == UCPTRIE_TYPE_FAST ? BMP_LIMIT : UCPTRIE_SMALL_LIMIT;
- int32_t indexLength = compactTrie(fastLimit >> UCPTRIE_SHIFT_3, errorCode);
- if (U_FAILURE(errorCode)) {
- clear();
- return nullptr;
- }
-
- // Ensure data table alignment: The index length must be even for uint32_t data.
- if (valueWidth == UCPTRIE_VALUE_BITS_32 && (indexLength & 1) != 0) {
- index16[indexLength++] = 0xffee; // arbitrary value
- }
-
- // Make the total trie structure length a multiple of 4 bytes by padding the data table,
- // and store special values as the last two data values.
- int32_t length = indexLength * 2;
- if (valueWidth == UCPTRIE_VALUE_BITS_16) {
- if (((indexLength ^ dataLength) & 1) != 0) {
- // padding
- data[dataLength++] = errorValue;
- }
- if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
- data[dataLength++] = highValue;
- data[dataLength++] = errorValue;
- }
- length += dataLength * 2;
- } else if (valueWidth == UCPTRIE_VALUE_BITS_32) {
- // 32-bit data words never need padding to a multiple of 4 bytes.
- if (data[dataLength - 1] != errorValue || data[dataLength - 2] != highValue) {
- if (data[dataLength - 1] != highValue) {
- data[dataLength++] = highValue;
- }
- data[dataLength++] = errorValue;
- }
- length += dataLength * 4;
- } else {
- int32_t and3 = (length + dataLength) & 3;
- if (and3 == 0 && data[dataLength - 1] == errorValue && data[dataLength - 2] == highValue) {
- // all set
- } else if(and3 == 3 && data[dataLength - 1] == highValue) {
- data[dataLength++] = errorValue;
- } else {
- while (and3 != 2) {
- data[dataLength++] = highValue;
- and3 = (and3 + 1) & 3;
- }
- data[dataLength++] = highValue;
- data[dataLength++] = errorValue;
- }
- length += dataLength;
- }
-
- // Calculate the total length of the UCPTrie as a single memory block.
- length += sizeof(UCPTrie);
- U_ASSERT((length & 3) == 0);
-
- uint8_t *bytes = (uint8_t *)uprv_malloc(length);
- if (bytes == nullptr) {
- errorCode = U_MEMORY_ALLOCATION_ERROR;
- clear();
- return nullptr;
- }
- UCPTrie *trie = reinterpret_cast<UCPTrie *>(bytes);
- uprv_memset(trie, 0, sizeof(UCPTrie));
- trie->indexLength = indexLength;
- trie->dataLength = dataLength;
-
- trie->highStart = highStart;
- // Round up shifted12HighStart to a multiple of 0x1000 for easy testing from UTF-8 lead bytes.
- // Runtime code needs to then test for the real highStart as well.
- trie->shifted12HighStart = (highStart + 0xfff) >> 12;
- trie->type = type;
- trie->valueWidth = valueWidth;
-
- trie->index3NullOffset = index3NullOffset;
- trie->dataNullOffset = dataNullOffset;
- trie->nullValue = initialValue;
-
- bytes += sizeof(UCPTrie);
-
- // Fill the index and data arrays.
- uint16_t *dest16 = (uint16_t *)bytes;
- trie->index = dest16;
-
- if (highStart <= fastLimit) {
- // Condense only the fast index from the mutable-trie index.
- for (int32_t i = 0, j = 0; j < indexLength; i += SMALL_DATA_BLOCKS_PER_BMP_BLOCK, ++j) {
- *dest16++ = (uint16_t)index[i]; // dest16[j]
- }
- } else {
- uprv_memcpy(dest16, index16, indexLength * 2);
- dest16 += indexLength;
- }
- bytes += indexLength * 2;
-
- // Write the data array.
- const uint32_t *p = data;
- switch (valueWidth) {
- case UCPTRIE_VALUE_BITS_16:
- // Write 16-bit data values.
- trie->data.ptr16 = dest16;
- for (int32_t i = dataLength; i > 0; --i) {
- *dest16++ = (uint16_t)*p++;
- }
- break;
- case UCPTRIE_VALUE_BITS_32:
- // Write 32-bit data values.
- trie->data.ptr32 = (uint32_t *)bytes;
- uprv_memcpy(bytes, p, (size_t)dataLength * 4);
- break;
- case UCPTRIE_VALUE_BITS_8:
- // Write 8-bit data values.
- trie->data.ptr8 = bytes;
- for (int32_t i = dataLength; i > 0; --i) {
- *bytes++ = (uint8_t)*p++;
- }
- break;
- default:
- // Will not occur, valueWidth checked at the beginning.
- break;
- }
-
-#ifdef UCPTRIE_DEBUG
- trie->name = name;
-
- ucptrie_printLengths(trie, "");
-#endif
-
- clear();
- return trie;
-}
-
-} // namespace
-
-U_NAMESPACE_END
-
-U_NAMESPACE_USE
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- LocalPointer<MutableCodePointTrie> trie(
- new MutableCodePointTrie(initialValue, errorValue, *pErrorCode), *pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(trie.orphan());
-}
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if (other == nullptr) {
- return nullptr;
- }
- LocalPointer<MutableCodePointTrie> clone(
- new MutableCodePointTrie(*reinterpret_cast<const MutableCodePointTrie *>(other), *pErrorCode), *pErrorCode);
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(clone.orphan());
-}
-
-U_CAPI void U_EXPORT2
-umutablecptrie_close(UMutableCPTrie *trie) {
- delete reinterpret_cast<MutableCodePointTrie *>(trie);
-}
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if (map == nullptr) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPMap(map, *pErrorCode));
-}
-
-U_CAPI UMutableCPTrie * U_EXPORT2
-umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- if (trie == nullptr) {
- *pErrorCode = U_ILLEGAL_ARGUMENT_ERROR;
- return nullptr;
- }
- return reinterpret_cast<UMutableCPTrie *>(MutableCodePointTrie::fromUCPTrie(trie, *pErrorCode));
-}
-
-U_CAPI uint32_t U_EXPORT2
-umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c) {
- return reinterpret_cast<const MutableCodePointTrie *>(trie)->get(c);
-}
-
-namespace {
-
-UChar32 getRange(const void *trie, UChar32 start,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- return reinterpret_cast<const MutableCodePointTrie *>(trie)->
- getRange(start, filter, context, pValue);
-}
-
-} // namespace
-
-U_CAPI UChar32 U_EXPORT2
-umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
- UCPMapRangeOption option, uint32_t surrogateValue,
- UCPMapValueFilter *filter, const void *context, uint32_t *pValue) {
- return ucptrie_internalGetRange(getRange, trie, start,
- option, surrogateValue,
- filter, context, pValue);
-}
-
-U_CAPI void U_EXPORT2
-umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return;
- }
- reinterpret_cast<MutableCodePointTrie *>(trie)->set(c, value, *pErrorCode);
-}
-
-U_CAPI void U_EXPORT2
-umutablecptrie_setRange(UMutableCPTrie *trie, UChar32 start, UChar32 end,
- uint32_t value, UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return;
- }
- reinterpret_cast<MutableCodePointTrie *>(trie)->setRange(start, end, value, *pErrorCode);
-}
-
-/* Compact and internally serialize the trie. */
-U_CAPI UCPTrie * U_EXPORT2
-umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
- UErrorCode *pErrorCode) {
- if (U_FAILURE(*pErrorCode)) {
- return nullptr;
- }
- return reinterpret_cast<MutableCodePointTrie *>(trie)->build(type, valueWidth, *pErrorCode);
-}
-
-#ifdef UCPTRIE_DEBUG
-U_CFUNC void umutablecptrie_setName(UMutableCPTrie *trie, const char *name) {
- reinterpret_cast<MutableCodePointTrie *>(trie)->name = name;
-}
-#endif