diff options
author | Steven R. Loomis <srloomis@us.ibm.com> | 2017-09-21 15:31:38 -0700 |
---|---|---|
committer | Steven R. Loomis <srloomis@us.ibm.com> | 2017-11-09 18:25:58 -0800 |
commit | 44d3e17985befbd45457d5ad7f0a0387849e1b2f (patch) | |
tree | f75f2eddb868f13254b7f514875534dee616c0d6 /deps/icu-small/source/common/rbbidata.cpp | |
parent | 3b3ceafaf922e1d79950595eaa501aa412913820 (diff) | |
download | android-node-v8-44d3e17985befbd45457d5ad7f0a0387849e1b2f.tar.gz android-node-v8-44d3e17985befbd45457d5ad7f0a0387849e1b2f.tar.bz2 android-node-v8-44d3e17985befbd45457d5ad7f0a0387849e1b2f.zip |
deps: ICU 60 bump
- Update to released ICU 60.1, including:
- CLDR 32 (many new languages and data improvements)
- Unicode 10 (8,518 new characters, including four new scripts,
7,494 new Han characters, and 56 new emoji characters)
- UTF-8 malformed bytes now handled according to W3C/WHATWG spec
Fixes: https://github.com/nodejs/node/issues/15540
PR-URL: https://github.com/nodejs/node/pull/16876
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Michael Dawson <michael_dawson@ca.ibm.com>
Diffstat (limited to 'deps/icu-small/source/common/rbbidata.cpp')
-rw-r--r-- | deps/icu-small/source/common/rbbidata.cpp | 76 |
1 files changed, 33 insertions, 43 deletions
diff --git a/deps/icu-small/source/common/rbbidata.cpp b/deps/icu-small/source/common/rbbidata.cpp index ecdc8f4165..d66eca82f8 100644 --- a/deps/icu-small/source/common/rbbidata.cpp +++ b/deps/icu-small/source/common/rbbidata.cpp @@ -14,7 +14,7 @@ #include "unicode/utypes.h" #include "rbbidata.h" #include "rbbirb.h" -#include "utrie.h" +#include "utrie2.h" #include "udatamem.h" #include "cmemory.h" #include "cstring.h" @@ -23,23 +23,6 @@ #include "uassert.h" -//----------------------------------------------------------------------------------- -// -// Trie access folding function. Copied as-is from properties code in uchar.c -// -//----------------------------------------------------------------------------------- -U_CDECL_BEGIN -static int32_t U_CALLCONV -getFoldingOffset(uint32_t data) { - /* if bit 15 is set, then the folding offset is in bits 14..0 of the 16-bit trie result */ - if(data&0x8000) { - return (int32_t)(data&0x7fff); - } else { - return 0; - } -} -U_CDECL_END - U_NAMESPACE_BEGIN //----------------------------------------------------------------------------- @@ -71,9 +54,8 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { dh->info.dataFormat[0] == 0x42 && // dataFormat="Brk " dh->info.dataFormat[1] == 0x72 && dh->info.dataFormat[2] == 0x6b && - dh->info.dataFormat[3] == 0x20) - // Note: info.fFormatVersion is duplicated in the RBBIDataHeader, and is - // validated when checking that. + dh->info.dataFormat[3] == 0x20 && + isDataVersionAcceptable(dh->info.formatVersion)) ) { status = U_INVALID_FORMAT_ERROR; return; @@ -84,6 +66,11 @@ RBBIDataWrapper::RBBIDataWrapper(UDataMemory* udm, UErrorCode &status) { fUDataMem = udm; } +UBool RBBIDataWrapper::isDataVersionAcceptable(const UVersionInfo version) { + return RBBI_DATA_FORMAT_VERSION[0] == version[0]; +} + + //----------------------------------------------------------------------------- // // init(). Does most of the work of construction, shared between the @@ -96,10 +83,11 @@ void RBBIDataWrapper::init0() { fReverseTable = NULL; fSafeFwdTable = NULL; fSafeRevTable = NULL; - fRuleSource = NULL; + fRuleSource = NULL; fRuleStatusTable = NULL; - fUDataMem = NULL; - fRefCount = 0; + fTrie = NULL; + fUDataMem = NULL; + fRefCount = 0; fDontFreeData = TRUE; } @@ -108,8 +96,7 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) { return; } fHeader = data; - if (fHeader->fMagic != 0xb1a0 || fHeader->fFormatVersion[0] != 3) - { + if (fHeader->fMagic != 0xb1a0 || !isDataVersionAcceptable(fHeader->fFormatVersion)) { status = U_INVALID_FORMAT_ERROR; return; } @@ -131,16 +118,23 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) { fSafeRevTable = (RBBIStateTable *)((char *)data + fHeader->fSRTable); } + // Rule Compatibility Hacks + // If a rule set includes reverse rules but does not explicitly include safe reverse rules, + // the reverse rules are to be treated as safe reverse rules. + + if (fSafeRevTable == NULL && fReverseTable != NULL) { + fSafeRevTable = fReverseTable; + fReverseTable = NULL; + } - utrie_unserialize(&fTrie, - (uint8_t *)data + fHeader->fTrie, - fHeader->fTrieLen, - &status); + fTrie = utrie2_openFromSerialized(UTRIE2_16_VALUE_BITS, + (uint8_t *)data + fHeader->fTrie, + fHeader->fTrieLen, + NULL, // *actual length + &status); if (U_FAILURE(status)) { return; } - fTrie.getFoldingOffset=getFoldingOffset; - fRuleSource = (UChar *)((char *)data + fHeader->fRuleSource); fRuleString.setTo(TRUE, fRuleSource, -1); @@ -165,6 +159,8 @@ void RBBIDataWrapper::init(const RBBIDataHeader *data, UErrorCode &status) { //----------------------------------------------------------------------------- RBBIDataWrapper::~RBBIDataWrapper() { U_ASSERT(fRefCount == 0); + utrie2_close(fTrie); + fTrie = NULL; if (fUDataMem) { udata_close(fUDataMem); } else if (!fDontFreeData) { @@ -323,7 +319,7 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD pInfo->dataFormat[1]==0x72 && pInfo->dataFormat[2]==0x6b && pInfo->dataFormat[3]==0x20 && - pInfo->formatVersion[0]==3 )) { + RBBIDataWrapper::isDataVersionAcceptable(pInfo->formatVersion) )) { udata_printError(ds, "ubrk_swap(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized\n", pInfo->dataFormat[0], pInfo->dataFormat[1], pInfo->dataFormat[2], pInfo->dataFormat[3], @@ -344,17 +340,11 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD // // Get the RRBI Data Header, and check that it appears to be OK. // - // Note: ICU 3.2 and earlier, RBBIDataHeader::fDataFormat was actually - // an int32_t with a value of 1. Starting with ICU 3.4, - // RBBI's fDataFormat matches the dataFormat field from the - // UDataInfo header, four int8_t bytes. The value is {3,1,0,0} - // const uint8_t *inBytes =(const uint8_t *)inData+headerSize; RBBIDataHeader *rbbiDH = (RBBIDataHeader *)inBytes; if (ds->readUInt32(rbbiDH->fMagic) != 0xb1a0 || - rbbiDH->fFormatVersion[0] != 3 || - ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) - { + !RBBIDataWrapper::isDataVersionAcceptable(rbbiDH->fFormatVersion) || + ds->readUInt32(rbbiDH->fLength) < sizeof(RBBIDataHeader)) { udata_printError(ds, "ubrk_swap(): RBBI Data header is invalid.\n"); *status=U_UNSUPPORTED_ERROR; return 0; @@ -451,8 +441,8 @@ ubrk_swap(const UDataSwapper *ds, const void *inData, int32_t length, void *outD } // Trie table for character categories - utrie_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), - outBytes+ds->readUInt32(rbbiDH->fTrie), status); + utrie2_swap(ds, inBytes+ds->readUInt32(rbbiDH->fTrie), ds->readUInt32(rbbiDH->fTrieLen), + outBytes+ds->readUInt32(rbbiDH->fTrie), status); // Source Rules Text. It's UChar data ds->swapArray16(ds, inBytes+ds->readUInt32(rbbiDH->fRuleSource), ds->readUInt32(rbbiDH->fRuleSourceLen), |