// © 2016 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html /* ******************************************************************************* * Copyright (C) 2013-2014, International Business Machines * Corporation and others. All Rights Reserved. ******************************************************************************* * collationrootelements.cpp * * created on: 2013mar05 * created by: Markus W. Scherer */ #include "unicode/utypes.h" #if !UCONFIG_NO_COLLATION #include "collation.h" #include "collationrootelements.h" #include "uassert.h" U_NAMESPACE_BEGIN int64_t CollationRootElements::lastCEWithPrimaryBefore(uint32_t p) const { if(p == 0) { return 0; } U_ASSERT(p > elements[elements[IX_FIRST_PRIMARY_INDEX]]); int32_t index = findP(p); uint32_t q = elements[index]; uint32_t secTer; if(p == (q & 0xffffff00)) { // p == elements[index] is a root primary. Find the CE before it. // We must not be in a primary range. U_ASSERT((q & PRIMARY_STEP_MASK) == 0); secTer = elements[index - 1]; if((secTer & SEC_TER_DELTA_FLAG) == 0) { // Primary CE just before p. p = secTer & 0xffffff00; secTer = Collation::COMMON_SEC_AND_TER_CE; } else { // secTer = last secondary & tertiary for the previous primary index -= 2; for(;;) { p = elements[index]; if((p & SEC_TER_DELTA_FLAG) == 0) { p &= 0xffffff00; break; } --index; } } } else { // p > elements[index] which is the previous primary. // Find the last secondary & tertiary weights for it. p = q & 0xffffff00; secTer = Collation::COMMON_SEC_AND_TER_CE; for(;;) { q = elements[++index]; if((q & SEC_TER_DELTA_FLAG) == 0) { // We must not be in a primary range. U_ASSERT((q & PRIMARY_STEP_MASK) == 0); break; } secTer = q; } } return ((int64_t)p << 32) | (secTer & ~SEC_TER_DELTA_FLAG); } int64_t CollationRootElements::firstCEWithPrimaryAtLeast(uint32_t p) const { if(p == 0) { return 0; } int32_t index = findP(p); if(p != (elements[index] & 0xffffff00)) { for(;;) { p = elements[++index]; if((p & SEC_TER_DELTA_FLAG) == 0) { // First primary after p. We must not be in a primary range. U_ASSERT((p & PRIMARY_STEP_MASK) == 0); break; } } } // The code above guarantees that p has at most 3 bytes: (p & 0xff) == 0. return ((int64_t)p << 32) | Collation::COMMON_SEC_AND_TER_CE; } uint32_t CollationRootElements::getPrimaryBefore(uint32_t p, UBool isCompressible) const { int32_t index = findPrimary(p); int32_t step; uint32_t q = elements[index]; if(p == (q & 0xffffff00)) { // Found p itself. Return the previous primary. // See if p is at the end of a previous range. step = (int32_t)q & PRIMARY_STEP_MASK; if(step == 0) { // p is not at the end of a range. Look for the previous primary. do { p = elements[--index]; } while((p & SEC_TER_DELTA_FLAG) != 0); return p & 0xffffff00; } } else { // p is in a range, and not at the start. uint32_t nextElement = elements[index + 1]; U_ASSERT(isEndOfPrimaryRange(nextElement)); step = (int32_t)nextElement & PRIMARY_STEP_MASK; } // Return the previous range primary. if((p & 0xffff) == 0) { return Collation::decTwoBytePrimaryByOneStep(p, isCompressible, step); } else { return Collation::decThreeBytePrimaryByOneStep(p, isCompressible, step); } } uint32_t CollationRootElements::getSecondaryBefore(uint32_t p, uint32_t s) const { int32_t index; uint32_t previousSec, sec; if(p == 0) { index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; // Gap at the beginning of the secondary CE range. previousSec = 0; sec = elements[index] >> 16; } else { index = findPrimary(p) + 1; previousSec = Collation::BEFORE_WEIGHT16; sec = getFirstSecTerForPrimary(index) >> 16; } U_ASSERT(s >= sec); while(s > sec) { previousSec = sec; U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); sec = elements[index++] >> 16; } U_ASSERT(sec == s); return previousSec; } uint32_t CollationRootElements::getTertiaryBefore(uint32_t p, uint32_t s, uint32_t t) const { U_ASSERT((t & ~Collation::ONLY_TERTIARY_MASK) == 0); int32_t index; uint32_t previousTer, secTer; if(p == 0) { if(s == 0) { index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; // Gap at the beginning of the tertiary CE range. previousTer = 0; } else { index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; previousTer = Collation::BEFORE_WEIGHT16; } secTer = elements[index] & ~SEC_TER_DELTA_FLAG; } else { index = findPrimary(p) + 1; previousTer = Collation::BEFORE_WEIGHT16; secTer = getFirstSecTerForPrimary(index); } uint32_t st = (s << 16) | t; while(st > secTer) { if((secTer >> 16) == s) { previousTer = secTer; } U_ASSERT((elements[index] & SEC_TER_DELTA_FLAG) != 0); secTer = elements[index++] & ~SEC_TER_DELTA_FLAG; } U_ASSERT(secTer == st); return previousTer & 0xffff; } uint32_t CollationRootElements::getPrimaryAfter(uint32_t p, int32_t index, UBool isCompressible) const { U_ASSERT(p == (elements[index] & 0xffffff00) || isEndOfPrimaryRange(elements[index + 1])); uint32_t q = elements[++index]; int32_t step; if((q & SEC_TER_DELTA_FLAG) == 0 && (step = (int32_t)q & PRIMARY_STEP_MASK) != 0) { // Return the next primary in this range. if((p & 0xffff) == 0) { return Collation::incTwoBytePrimaryByOffset(p, isCompressible, step); } else { return Collation::incThreeBytePrimaryByOffset(p, isCompressible, step); } } else { // Return the next primary in the list. while((q & SEC_TER_DELTA_FLAG) != 0) { q = elements[++index]; } U_ASSERT((q & PRIMARY_STEP_MASK) == 0); return q; } } uint32_t CollationRootElements::getSecondaryAfter(int32_t index, uint32_t s) const { uint32_t secTer; uint32_t secLimit; if(index == 0) { // primary = 0 U_ASSERT(s != 0); index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; secTer = elements[index]; // Gap at the end of the secondary CE range. secLimit = 0x10000; } else { U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); secTer = getFirstSecTerForPrimary(index + 1); // If this is an explicit sec/ter unit, then it will be read once more. // Gap for secondaries of primary CEs. secLimit = getSecondaryBoundary(); } for(;;) { uint32_t sec = secTer >> 16; if(sec > s) { return sec; } secTer = elements[++index]; if((secTer & SEC_TER_DELTA_FLAG) == 0) { return secLimit; } } } uint32_t CollationRootElements::getTertiaryAfter(int32_t index, uint32_t s, uint32_t t) const { uint32_t secTer; uint32_t terLimit; if(index == 0) { // primary = 0 if(s == 0) { U_ASSERT(t != 0); index = (int32_t)elements[IX_FIRST_TERTIARY_INDEX]; // Gap at the end of the tertiary CE range. terLimit = 0x4000; } else { index = (int32_t)elements[IX_FIRST_SECONDARY_INDEX]; // Gap for tertiaries of primary/secondary CEs. terLimit = getTertiaryBoundary(); } secTer = elements[index] & ~SEC_TER_DELTA_FLAG; } else { U_ASSERT(index >= (int32_t)elements[IX_FIRST_PRIMARY_INDEX]); secTer = getFirstSecTerForPrimary(index + 1); // If this is an explicit sec/ter unit, then it will be read once more. terLimit = getTertiaryBoundary(); } uint32_t st = (s << 16) | t; for(;;) { if(secTer > st) { U_ASSERT((secTer >> 16) == s); return secTer & 0xffff; } secTer = elements[++index]; // No tertiary greater than t for this primary+secondary. if((secTer & SEC_TER_DELTA_FLAG) == 0 || (secTer >> 16) > s) { return terLimit; } secTer &= ~SEC_TER_DELTA_FLAG; } } uint32_t CollationRootElements::getFirstSecTerForPrimary(int32_t index) const { uint32_t secTer = elements[index]; if((secTer & SEC_TER_DELTA_FLAG) == 0) { // No sec/ter delta. return Collation::COMMON_SEC_AND_TER_CE; } secTer &= ~SEC_TER_DELTA_FLAG; if(secTer > Collation::COMMON_SEC_AND_TER_CE) { // Implied sec/ter. return Collation::COMMON_SEC_AND_TER_CE; } // Explicit sec/ter below common/common. return secTer; } int32_t CollationRootElements::findPrimary(uint32_t p) const { // Requirement: p must occur as a root primary. U_ASSERT((p & 0xff) == 0); // at most a 3-byte primary int32_t index = findP(p); // If p is in a range, then we just assume that p is an actual primary in this range. // (Too cumbersome/expensive to check.) // Otherwise, it must be an exact match. U_ASSERT(isEndOfPrimaryRange(elements[index + 1]) || p == (elements[index] & 0xffffff00)); return index; } int32_t CollationRootElements::findP(uint32_t p) const { // p need not occur as a root primary. // For example, it might be a reordering group boundary. U_ASSERT((p >> 24) != Collation::UNASSIGNED_IMPLICIT_BYTE); // modified binary search int32_t start = (int32_t)elements[IX_FIRST_PRIMARY_INDEX]; U_ASSERT(p >= elements[start]); int32_t limit = length - 1; U_ASSERT(elements[limit] >= PRIMARY_SENTINEL); U_ASSERT(p < elements[limit]); while((start + 1) < limit) { // Invariant: elements[start] and elements[limit] are primaries, // and elements[start]<=p<=elements[limit]. int32_t i = (start + limit) / 2; uint32_t q = elements[i]; if((q & SEC_TER_DELTA_FLAG) != 0) { // Find the next primary. int32_t j = i + 1; for(;;) { if(j == limit) { break; } q = elements[j]; if((q & SEC_TER_DELTA_FLAG) == 0) { i = j; break; } ++j; } if((q & SEC_TER_DELTA_FLAG) != 0) { // Find the preceding primary. j = i - 1; for(;;) { if(j == start) { break; } q = elements[j]; if((q & SEC_TER_DELTA_FLAG) == 0) { i = j; break; } --j; } if((q & SEC_TER_DELTA_FLAG) != 0) { // No primary between start and limit. break; } } } if(p < (q & 0xffffff00)) { // Reset the "step" bits of a range end primary. limit = i; } else { start = i; } } return start; } U_NAMESPACE_END #endif // !UCONFIG_NO_COLLATION