diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/common/dictionarydata.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/common/dictionarydata.cpp | 242 |
1 files changed, 0 insertions, 242 deletions
diff --git a/deps/node/deps/icu-small/source/common/dictionarydata.cpp b/deps/node/deps/icu-small/source/common/dictionarydata.cpp deleted file mode 100644 index 0efa5874..00000000 --- a/deps/node/deps/icu-small/source/common/dictionarydata.cpp +++ /dev/null @@ -1,242 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -******************************************************************************* -* Copyright (C) 2014-2016, International Business Machines -* Corporation and others. All Rights Reserved. -******************************************************************************* -* dictionarydata.h -* -* created on: 2012may31 -* created by: Markus W. Scherer & Maxime Serrano -*/ - -#include "dictionarydata.h" -#include "unicode/ucharstrie.h" -#include "unicode/bytestrie.h" -#include "unicode/udata.h" -#include "cmemory.h" - -#if !UCONFIG_NO_BREAK_ITERATION - -U_NAMESPACE_BEGIN - -const int32_t DictionaryData::TRIE_TYPE_BYTES = 0; -const int32_t DictionaryData::TRIE_TYPE_UCHARS = 1; -const int32_t DictionaryData::TRIE_TYPE_MASK = 7; -const int32_t DictionaryData::TRIE_HAS_VALUES = 8; - -const int32_t DictionaryData::TRANSFORM_NONE = 0; -const int32_t DictionaryData::TRANSFORM_TYPE_OFFSET = 0x1000000; -const int32_t DictionaryData::TRANSFORM_TYPE_MASK = 0x7f000000; -const int32_t DictionaryData::TRANSFORM_OFFSET_MASK = 0x1fffff; - -DictionaryMatcher::~DictionaryMatcher() { -} - -UCharsDictionaryMatcher::~UCharsDictionaryMatcher() { - udata_close(file); -} - -int32_t UCharsDictionaryMatcher::getType() const { - return DictionaryData::TRIE_TYPE_UCHARS; -} - -int32_t UCharsDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit, - int32_t *lengths, int32_t *cpLengths, int32_t *values, - int32_t *prefix) const { - - UCharsTrie uct(characters); - int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text); - int32_t wordCount = 0; - int32_t codePointsMatched = 0; - - for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { - UStringTrieResult result = (codePointsMatched == 0) ? uct.first(c) : uct.next(c); - int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex; - codePointsMatched += 1; - if (USTRINGTRIE_HAS_VALUE(result)) { - if (wordCount < limit) { - if (values != NULL) { - values[wordCount] = uct.getValue(); - } - if (lengths != NULL) { - lengths[wordCount] = lengthMatched; - } - if (cpLengths != NULL) { - cpLengths[wordCount] = codePointsMatched; - } - ++wordCount; - } - if (result == USTRINGTRIE_FINAL_VALUE) { - break; - } - } - else if (result == USTRINGTRIE_NO_MATCH) { - break; - } - if (lengthMatched >= maxLength) { - break; - } - } - - if (prefix != NULL) { - *prefix = codePointsMatched; - } - return wordCount; -} - -BytesDictionaryMatcher::~BytesDictionaryMatcher() { - udata_close(file); -} - -UChar32 BytesDictionaryMatcher::transform(UChar32 c) const { - if ((transformConstant & DictionaryData::TRANSFORM_TYPE_MASK) == DictionaryData::TRANSFORM_TYPE_OFFSET) { - if (c == 0x200D) { - return 0xFF; - } else if (c == 0x200C) { - return 0xFE; - } - int32_t delta = c - (transformConstant & DictionaryData::TRANSFORM_OFFSET_MASK); - if (delta < 0 || 0xFD < delta) { - return U_SENTINEL; - } - return (UChar32)delta; - } - return c; -} - -int32_t BytesDictionaryMatcher::getType() const { - return DictionaryData::TRIE_TYPE_BYTES; -} - -int32_t BytesDictionaryMatcher::matches(UText *text, int32_t maxLength, int32_t limit, - int32_t *lengths, int32_t *cpLengths, int32_t *values, - int32_t *prefix) const { - BytesTrie bt(characters); - int32_t startingTextIndex = (int32_t)utext_getNativeIndex(text); - int32_t wordCount = 0; - int32_t codePointsMatched = 0; - - for (UChar32 c = utext_next32(text); c >= 0; c=utext_next32(text)) { - UStringTrieResult result = (codePointsMatched == 0) ? bt.first(transform(c)) : bt.next(transform(c)); - int32_t lengthMatched = (int32_t)utext_getNativeIndex(text) - startingTextIndex; - codePointsMatched += 1; - if (USTRINGTRIE_HAS_VALUE(result)) { - if (wordCount < limit) { - if (values != NULL) { - values[wordCount] = bt.getValue(); - } - if (lengths != NULL) { - lengths[wordCount] = lengthMatched; - } - if (cpLengths != NULL) { - cpLengths[wordCount] = codePointsMatched; - } - ++wordCount; - } - if (result == USTRINGTRIE_FINAL_VALUE) { - break; - } - } - else if (result == USTRINGTRIE_NO_MATCH) { - break; - } - if (lengthMatched >= maxLength) { - break; - } - } - - if (prefix != NULL) { - *prefix = codePointsMatched; - } - return wordCount; -} - - -U_NAMESPACE_END - -U_NAMESPACE_USE - -U_CAPI int32_t U_EXPORT2 -udict_swap(const UDataSwapper *ds, const void *inData, int32_t length, - void *outData, UErrorCode *pErrorCode) { - const UDataInfo *pInfo; - int32_t headerSize; - const uint8_t *inBytes; - uint8_t *outBytes; - const int32_t *inIndexes; - int32_t indexes[DictionaryData::IX_COUNT]; - int32_t i, offset, size; - - headerSize = udata_swapDataHeader(ds, inData, length, outData, pErrorCode); - if (pErrorCode == NULL || U_FAILURE(*pErrorCode)) return 0; - pInfo = (const UDataInfo *)((const char *)inData + 4); - if (!(pInfo->dataFormat[0] == 0x44 && - pInfo->dataFormat[1] == 0x69 && - pInfo->dataFormat[2] == 0x63 && - pInfo->dataFormat[3] == 0x74 && - pInfo->formatVersion[0] == 1)) { - udata_printError(ds, "udict_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as dictionary data\n", - pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], pInfo->formatVersion[0]); - *pErrorCode = U_UNSUPPORTED_ERROR; - return 0; - } - - inBytes = (const uint8_t *)inData + headerSize; - outBytes = (uint8_t *)outData + headerSize; - - inIndexes = (const int32_t *)inBytes; - if (length >= 0) { - length -= headerSize; - if (length < (int32_t)(sizeof(indexes))) { - udata_printError(ds, "udict_swap(): too few bytes (%d after header) for dictionary data\n", length); - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - } - - for (i = 0; i < DictionaryData::IX_COUNT; i++) { - indexes[i] = udata_readInt32(ds, inIndexes[i]); - } - - size = indexes[DictionaryData::IX_TOTAL_SIZE]; - - if (length >= 0) { - if (length < size) { - udata_printError(ds, "udict_swap(): too few bytes (%d after header) for all of dictionary data\n", length); - *pErrorCode = U_INDEX_OUTOFBOUNDS_ERROR; - return 0; - } - - if (inBytes != outBytes) { - uprv_memcpy(outBytes, inBytes, size); - } - - offset = 0; - ds->swapArray32(ds, inBytes, sizeof(indexes), outBytes, pErrorCode); - offset = (int32_t)sizeof(indexes); - int32_t trieType = indexes[DictionaryData::IX_TRIE_TYPE] & DictionaryData::TRIE_TYPE_MASK; - int32_t nextOffset = indexes[DictionaryData::IX_RESERVED1_OFFSET]; - - if (trieType == DictionaryData::TRIE_TYPE_UCHARS) { - ds->swapArray16(ds, inBytes + offset, nextOffset - offset, outBytes + offset, pErrorCode); - } else if (trieType == DictionaryData::TRIE_TYPE_BYTES) { - // nothing to do - } else { - udata_printError(ds, "udict_swap(): unknown trie type!\n"); - *pErrorCode = U_UNSUPPORTED_ERROR; - return 0; - } - - // these next two sections are empty in the current format, - // but may be used later. - offset = nextOffset; - nextOffset = indexes[DictionaryData::IX_RESERVED2_OFFSET]; - offset = nextOffset; - nextOffset = indexes[DictionaryData::IX_TOTAL_SIZE]; - offset = nextOffset; - } - return headerSize + size; -} -#endif |