// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2013-2015, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * collationsettings.cpp * * created on: 2013feb07 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "unicode/ucol.h" #include "cmemory.h" #include "collation.h" #include "collationdata.h" #include "collationsettings.h" #include "sharedobject.h" #include "uassert.h" #include "umutex.h" #include "uvectr32.h" U_NAMESPACE_BEGIN CollationSettings::CollationSettings(const CollationSettings &other) : SharedObject(other), options(other.options), variableTop(other.variableTop), reorderTable(NULL), minHighNoReorder(other.minHighNoReorder), reorderRanges(NULL), reorderRangesLength(0), reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0), fastLatinOptions(other.fastLatinOptions) { UErrorCode errorCode = U_ZERO_ERROR; copyReorderingFrom(other, errorCode); if(fastLatinOptions >= 0) { uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries)); } } CollationSettings::~CollationSettings() { if(reorderCodesCapacity != 0) { uprv_free(const_cast(reorderCodes)); } } UBool CollationSettings::operator==(const CollationSettings &other) const { if(options != other.options) { return FALSE; } if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; } if(reorderCodesLength != other.reorderCodesLength) { return FALSE; } for(int32_t i = 0; i < reorderCodesLength; ++i) { if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; } } return TRUE; } int32_t CollationSettings::hashCode() const { int32_t h = options << 8; if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; } h ^= reorderCodesLength; for(int32_t i = 0; i < reorderCodesLength; ++i) { h ^= (reorderCodes[i] << i); } return h; } void CollationSettings::resetReordering() { // When we turn off reordering, we want to set a NULL permutation // rather than a no-op permutation. // Keep the memory via reorderCodes and its capacity. reorderTable = NULL; minHighNoReorder = 0; reorderRangesLength = 0; reorderCodesLength = 0; } void CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length, const uint32_t *ranges, int32_t rangesLength, const uint8_t *table, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } if(table != NULL && (rangesLength == 0 ? !reorderTableHasSplitBytes(table) : rangesLength >= 2 && // The first offset must be 0. The last offset must not be 0. (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) { // We need to release the memory before setting the alias pointer. if(reorderCodesCapacity != 0) { uprv_free(const_cast(reorderCodes)); reorderCodesCapacity = 0; } reorderTable = table; reorderCodes = codes; reorderCodesLength = length; // Drop ranges before the first split byte. They are reordered by the table. // This then speeds up reordering of the remaining ranges. int32_t firstSplitByteRangeIndex = 0; while(firstSplitByteRangeIndex < rangesLength && (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) { // The second byte of the primary limit is 0. ++firstSplitByteRangeIndex; } if(firstSplitByteRangeIndex == rangesLength) { U_ASSERT(!reorderTableHasSplitBytes(table)); minHighNoReorder = 0; reorderRanges = NULL; reorderRangesLength = 0; } else { U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0); minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; reorderRanges = ranges + firstSplitByteRangeIndex; reorderRangesLength = rangesLength - firstSplitByteRangeIndex; } return; } // Regenerate missing data. setReordering(data, codes, length, errorCode); } void CollationSettings::setReordering(const CollationData &data, const int32_t *codes, int32_t codesLength, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) { resetReordering(); return; } UVector32 rangesList(errorCode); data.makeReorderRanges(codes, codesLength, rangesList, errorCode); if(U_FAILURE(errorCode)) { return; } int32_t rangesLength = rangesList.size(); if(rangesLength == 0) { resetReordering(); return; } const uint32_t *ranges = reinterpret_cast(rangesList.getBuffer()); // ranges[] contains at least two (limit, offset) pairs. // The first offset must be 0. The last offset must not be 0. // Separators (at the low end) and trailing weights (at the high end) // are never reordered. U_ASSERT(rangesLength >= 2); U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0); minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000; // Write the lead byte permutation table. // Set a 0 for each lead byte that has a range boundary in the middle. uint8_t table[256]; int32_t b = 0; int32_t firstSplitByteRangeIndex = -1; for(int32_t i = 0; i < rangesLength; ++i) { uint32_t pair = ranges[i]; int32_t limit1 = (int32_t)(pair >> 24); while(b < limit1) { table[b] = (uint8_t)(b + pair); ++b; } // Check the second byte of the limit. if((pair & 0xff0000) != 0) { table[limit1] = 0; b = limit1 + 1; if(firstSplitByteRangeIndex < 0) { firstSplitByteRangeIndex = i; } } } while(b <= 0xff) { table[b] = (uint8_t)b; ++b; } if(firstSplitByteRangeIndex < 0) { // The lead byte permutation table alone suffices for reordering. rangesLength = 0; } else { // Remove the ranges below the first split byte. ranges += firstSplitByteRangeIndex; rangesLength -= firstSplitByteRangeIndex; } setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode); } void CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength, const uint32_t *ranges, int32_t rangesLength, const uint8_t *table, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } int32_t *ownedCodes; int32_t totalLength = codesLength + rangesLength; U_ASSERT(totalLength > 0); if(totalLength <= reorderCodesCapacity) { ownedCodes = const_cast(reorderCodes); } else { // Allocate one memory block for the codes, the ranges, and the 16-aligned table. int32_t capacity = (totalLength + 3) & ~3; // round up to a multiple of 4 ints ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256); if(ownedCodes == NULL) { resetReordering(); errorCode = U_MEMORY_ALLOCATION_ERROR; return; } if(reorderCodesCapacity != 0) { uprv_free(const_cast(reorderCodes)); } reorderCodes = ownedCodes; reorderCodesCapacity = capacity; } uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256); uprv_memcpy(ownedCodes, codes, codesLength * 4); uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4); reorderTable = reinterpret_cast(reorderCodes + reorderCodesCapacity); reorderCodesLength = codesLength; reorderRanges = reinterpret_cast(ownedCodes) + codesLength; reorderRangesLength = rangesLength; } void CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } if(!other.hasReordering()) { resetReordering(); return; } minHighNoReorder = other.minHighNoReorder; if(other.reorderCodesCapacity == 0) { // The reorder arrays are aliased to memory-mapped data. reorderTable = other.reorderTable; reorderRanges = other.reorderRanges; reorderRangesLength = other.reorderRangesLength; reorderCodes = other.reorderCodes; reorderCodesLength = other.reorderCodesLength; } else { setReorderArrays(other.reorderCodes, other.reorderCodesLength, other.reorderRanges, other.reorderRangesLength, other.reorderTable, errorCode); } } UBool CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) { U_ASSERT(table[0] == 0); for(int32_t i = 1; i < 256; ++i) { if(table[i] == 0) { return TRUE; } } return FALSE; } uint32_t CollationSettings::reorderEx(uint32_t p) const { if(p >= minHighNoReorder) { return p; } // Round up p so that its lower 16 bits are >= any offset bits. // Then compare q directly with (limit, offset) pairs. uint32_t q = p | 0xffff; uint32_t r; const uint32_t *ranges = reorderRanges; while(q >= (r = *ranges)) { ++ranges; } return p + (r << 24); } void CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } int32_t noStrength = options & ~STRENGTH_MASK; switch(value) { case UCOL_PRIMARY: case UCOL_SECONDARY: case UCOL_TERTIARY: case UCOL_QUATERNARY: case UCOL_IDENTICAL: options = noStrength | (value << STRENGTH_SHIFT); break; case UCOL_DEFAULT: options = noStrength | (defaultOptions & STRENGTH_MASK); break; default: errorCode = U_ILLEGAL_ARGUMENT_ERROR; break; } } void CollationSettings::setFlag(int32_t bit, UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } switch(value) { case UCOL_ON: options |= bit; break; case UCOL_OFF: options &= ~bit; break; case UCOL_DEFAULT: options = (options & ~bit) | (defaultOptions & bit); break; default: errorCode = U_ILLEGAL_ARGUMENT_ERROR; break; } } void CollationSettings::setCaseFirst(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK; switch(value) { case UCOL_OFF: options = noCaseFirst; break; case UCOL_LOWER_FIRST: options = noCaseFirst | CASE_FIRST; break; case UCOL_UPPER_FIRST: options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK; break; case UCOL_DEFAULT: options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK); break; default: errorCode = U_ILLEGAL_ARGUMENT_ERROR; break; } } void CollationSettings::setAlternateHandling(UColAttributeValue value, int32_t defaultOptions, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } int32_t noAlternate = options & ~ALTERNATE_MASK; switch(value) { case UCOL_NON_IGNORABLE: options = noAlternate; break; case UCOL_SHIFTED: options = noAlternate | SHIFTED; break; case UCOL_DEFAULT: options = noAlternate | (defaultOptions & ALTERNATE_MASK); break; default: errorCode = U_ILLEGAL_ARGUMENT_ERROR; break; } } void CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) { if(U_FAILURE(errorCode)) { return; } int32_t noMax = options & ~MAX_VARIABLE_MASK; switch(value) { case MAX_VAR_SPACE: case MAX_VAR_PUNCT: case MAX_VAR_SYMBOL: case MAX_VAR_CURRENCY: options = noMax | (value << MAX_VARIABLE_SHIFT); break; case UCOL_DEFAULT: options = noMax | (defaultOptions & MAX_VARIABLE_MASK); break; default: errorCode = U_ILLEGAL_ARGUMENT_ERROR; break; } } U_NAMESPACE_END #endif // !UCONFIG_NO_COLLATION