diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/i18n/cpdtrans.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/i18n/cpdtrans.cpp | 616 |
1 files changed, 0 insertions, 616 deletions
diff --git a/deps/node/deps/icu-small/source/i18n/cpdtrans.cpp b/deps/node/deps/icu-small/source/i18n/cpdtrans.cpp deleted file mode 100644 index a204de5a..00000000 --- a/deps/node/deps/icu-small/source/i18n/cpdtrans.cpp +++ /dev/null @@ -1,616 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 1999-2011, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -* Date Name Description -* 11/17/99 aliu Creation. -********************************************************************** -*/ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_TRANSLITERATION - -#include "unicode/unifilt.h" -#include "unicode/uniset.h" -#include "cpdtrans.h" -#include "uvector.h" -#include "tridpars.h" -#include "cmemory.h" - -// keep in sync with Transliterator -//static const UChar ID_SEP = 0x002D; /*-*/ -static const UChar ID_DELIM = 0x003B; /*;*/ -static const UChar NEWLINE = 10; - -static const UChar COLON_COLON[] = {0x3A, 0x3A, 0}; //"::" - -U_NAMESPACE_BEGIN - -const UChar CompoundTransliterator::PASS_STRING[] = { 0x0025, 0x0050, 0x0061, 0x0073, 0x0073, 0 }; // "%Pass" - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompoundTransliterator) - -/** - * Constructs a new compound transliterator given an array of - * transliterators. The array of transliterators may be of any - * length, including zero or one, however, useful compound - * transliterators have at least two components. - * @param transliterators array of <code>Transliterator</code> - * objects - * @param transliteratorCount The number of - * <code>Transliterator</code> objects in transliterators. - * @param filter the filter. Any character for which - * <tt>filter.contains()</tt> returns <tt>false</tt> will not be - * altered by this transliterator. If <tt>filter</tt> is - * <tt>null</tt> then no filtering is applied. - */ -CompoundTransliterator::CompoundTransliterator( - Transliterator* const transliterators[], - int32_t transliteratorCount, - UnicodeFilter* adoptedFilter) : - Transliterator(joinIDs(transliterators, transliteratorCount), adoptedFilter), - trans(0), count(0), numAnonymousRBTs(0) { - setTransliterators(transliterators, transliteratorCount); -} - -/** - * Splits an ID of the form "ID;ID;..." into a compound using each - * of the IDs. - * @param id of above form - * @param forward if false, does the list in reverse order, and - * takes the inverse of each ID. - */ -CompoundTransliterator::CompoundTransliterator(const UnicodeString& id, - UTransDirection direction, - UnicodeFilter* adoptedFilter, - UParseError& /*parseError*/, - UErrorCode& status) : - Transliterator(id, adoptedFilter), - trans(0), numAnonymousRBTs(0) { - // TODO add code for parseError...currently unused, but - // later may be used by parsing code... - init(id, direction, TRUE, status); -} - -CompoundTransliterator::CompoundTransliterator(const UnicodeString& id, - UParseError& /*parseError*/, - UErrorCode& status) : - Transliterator(id, 0), // set filter to 0 here! - trans(0), numAnonymousRBTs(0) { - // TODO add code for parseError...currently unused, but - // later may be used by parsing code... - init(id, UTRANS_FORWARD, TRUE, status); -} - - -/** - * Private constructor for use of TransliteratorAlias - */ -CompoundTransliterator::CompoundTransliterator(const UnicodeString& newID, - UVector& list, - UnicodeFilter* adoptedFilter, - int32_t anonymousRBTs, - UParseError& /*parseError*/, - UErrorCode& status) : - Transliterator(newID, adoptedFilter), - trans(0), numAnonymousRBTs(anonymousRBTs) -{ - init(list, UTRANS_FORWARD, FALSE, status); -} - -/** - * Private constructor for Transliterator from a vector of - * transliterators. The caller is responsible for fixing up the - * ID. - */ -CompoundTransliterator::CompoundTransliterator(UVector& list, - UParseError& /*parseError*/, - UErrorCode& status) : - Transliterator(UnicodeString(), NULL), - trans(0), numAnonymousRBTs(0) -{ - // TODO add code for parseError...currently unused, but - // later may be used by parsing code... - init(list, UTRANS_FORWARD, FALSE, status); - // assume caller will fixup ID -} - -CompoundTransliterator::CompoundTransliterator(UVector& list, - int32_t anonymousRBTs, - UParseError& /*parseError*/, - UErrorCode& status) : - Transliterator(UnicodeString(), NULL), - trans(0), numAnonymousRBTs(anonymousRBTs) -{ - init(list, UTRANS_FORWARD, FALSE, status); -} - -/** - * Finish constructing a transliterator: only to be called by - * constructors. Before calling init(), set trans and filter to NULL. - * @param id the id containing ';'-separated entries - * @param direction either FORWARD or REVERSE - * @param idSplitPoint the index into id at which the - * adoptedSplitTransliterator should be inserted, if there is one, or - * -1 if there is none. - * @param adoptedSplitTransliterator a transliterator to be inserted - * before the entry at offset idSplitPoint in the id string. May be - * NULL to insert no entry. - * @param fixReverseID if TRUE, then reconstruct the ID of reverse - * entries by calling getID() of component entries. Some constructors - * do not require this because they apply a facade ID anyway. - * @param status the error code indicating success or failure - */ -void CompoundTransliterator::init(const UnicodeString& id, - UTransDirection direction, - UBool fixReverseID, - UErrorCode& status) { - // assert(trans == 0); - - if (U_FAILURE(status)) { - return; - } - - UVector list(status); - UnicodeSet* compoundFilter = NULL; - UnicodeString regenID; - if (!TransliteratorIDParser::parseCompoundID(id, direction, - regenID, list, compoundFilter)) { - status = U_INVALID_ID; - delete compoundFilter; - return; - } - - TransliteratorIDParser::instantiateList(list, status); - - init(list, direction, fixReverseID, status); - - if (compoundFilter != NULL) { - adoptFilter(compoundFilter); - } -} - -/** - * Finish constructing a transliterator: only to be called by - * constructors. Before calling init(), set trans and filter to NULL. - * @param list a vector of transliterator objects to be adopted. It - * should NOT be empty. The list should be in declared order. That - * is, it should be in the FORWARD order; if direction is REVERSE then - * the list order will be reversed. - * @param direction either FORWARD or REVERSE - * @param fixReverseID if TRUE, then reconstruct the ID of reverse - * entries by calling getID() of component entries. Some constructors - * do not require this because they apply a facade ID anyway. - * @param status the error code indicating success or failure - */ -void CompoundTransliterator::init(UVector& list, - UTransDirection direction, - UBool fixReverseID, - UErrorCode& status) { - // assert(trans == 0); - - // Allocate array - if (U_SUCCESS(status)) { - count = list.size(); - trans = (Transliterator **)uprv_malloc(count * sizeof(Transliterator *)); - /* test for NULL */ - if (trans == 0) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - } - - if (U_FAILURE(status) || trans == 0) { - // assert(trans == 0); - return; - } - - // Move the transliterators from the vector into an array. - // Reverse the order if necessary. - int32_t i; - for (i=0; i<count; ++i) { - int32_t j = (direction == UTRANS_FORWARD) ? i : count - 1 - i; - trans[i] = (Transliterator*) list.elementAt(j); - } - - // If the direction is UTRANS_REVERSE then we may need to fix the - // ID. - if (direction == UTRANS_REVERSE && fixReverseID) { - UnicodeString newID; - for (i=0; i<count; ++i) { - if (i > 0) { - newID.append(ID_DELIM); - } - newID.append(trans[i]->getID()); - } - setID(newID); - } - - computeMaximumContextLength(); -} - -/** - * Return the IDs of the given list of transliterators, concatenated - * with ID_DELIM delimiting them. Equivalent to the perlish expression - * join(ID_DELIM, map($_.getID(), transliterators). - */ -UnicodeString CompoundTransliterator::joinIDs(Transliterator* const transliterators[], - int32_t transCount) { - UnicodeString id; - for (int32_t i=0; i<transCount; ++i) { - if (i > 0) { - id.append(ID_DELIM); - } - id.append(transliterators[i]->getID()); - } - return id; // Return temporary -} - -/** - * Copy constructor. - */ -CompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) : - Transliterator(t), trans(0), count(0), numAnonymousRBTs(-1) { - *this = t; -} - -/** - * Destructor - */ -CompoundTransliterator::~CompoundTransliterator() { - freeTransliterators(); -} - -void CompoundTransliterator::freeTransliterators(void) { - if (trans != 0) { - for (int32_t i=0; i<count; ++i) { - delete trans[i]; - } - uprv_free(trans); - } - trans = 0; - count = 0; -} - -/** - * Assignment operator. - */ -CompoundTransliterator& CompoundTransliterator::operator=( - const CompoundTransliterator& t) -{ - Transliterator::operator=(t); - int32_t i = 0; - UBool failed = FALSE; - if (trans != NULL) { - for (i=0; i<count; ++i) { - delete trans[i]; - trans[i] = 0; - } - } - if (t.count > count) { - if (trans != NULL) { - uprv_free(trans); - } - trans = (Transliterator **)uprv_malloc(t.count * sizeof(Transliterator *)); - } - count = t.count; - if (trans != NULL) { - for (i=0; i<count; ++i) { - trans[i] = t.trans[i]->clone(); - if (trans[i] == NULL) { - failed = TRUE; - break; - } - } - } - - // if memory allocation failed delete backwards trans array - if (failed && i > 0) { - int32_t n; - for (n = i-1; n >= 0; n--) { - uprv_free(trans[n]); - trans[n] = NULL; - } - } - numAnonymousRBTs = t.numAnonymousRBTs; - return *this; -} - -/** - * Transliterator API. - */ -Transliterator* CompoundTransliterator::clone(void) const { - return new CompoundTransliterator(*this); -} - -/** - * Returns the number of transliterators in this chain. - * @return number of transliterators in this chain. - */ -int32_t CompoundTransliterator::getCount(void) const { - return count; -} - -/** - * Returns the transliterator at the given index in this chain. - * @param index index into chain, from 0 to <code>getCount() - 1</code> - * @return transliterator at the given index - */ -const Transliterator& CompoundTransliterator::getTransliterator(int32_t index) const { - return *trans[index]; -} - -void CompoundTransliterator::setTransliterators(Transliterator* const transliterators[], - int32_t transCount) { - Transliterator** a = (Transliterator **)uprv_malloc(transCount * sizeof(Transliterator *)); - if (a == NULL) { - return; - } - int32_t i = 0; - UBool failed = FALSE; - for (i=0; i<transCount; ++i) { - a[i] = transliterators[i]->clone(); - if (a[i] == NULL) { - failed = TRUE; - break; - } - } - if (failed && i > 0) { - int32_t n; - for (n = i-1; n >= 0; n--) { - uprv_free(a[n]); - a[n] = NULL; - } - return; - } - adoptTransliterators(a, transCount); -} - -void CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransliterators[], - int32_t transCount) { - // First free trans[] and set count to zero. Once this is done, - // orphan the filter. Set up the new trans[]. - freeTransliterators(); - trans = adoptedTransliterators; - count = transCount; - computeMaximumContextLength(); - setID(joinIDs(trans, count)); -} - -/** - * Append c to buf, unless buf is empty or buf already ends in c. - */ -static void _smartAppend(UnicodeString& buf, UChar c) { - if (buf.length() != 0 && - buf.charAt(buf.length() - 1) != c) { - buf.append(c); - } -} - -UnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource, - UBool escapeUnprintable) const { - // We do NOT call toRules() on our component transliterators, in - // general. If we have several rule-based transliterators, this - // yields a concatenation of the rules -- not what we want. We do - // handle compound RBT transliterators specially -- those for which - // compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex, - // we do call toRules() recursively. - rulesSource.truncate(0); - if (numAnonymousRBTs >= 1 && getFilter() != NULL) { - // If we are a compound RBT and if we have a global - // filter, then emit it at the top. - UnicodeString pat; - rulesSource.append(COLON_COLON, 2).append(getFilter()->toPattern(pat, escapeUnprintable)).append(ID_DELIM); - } - for (int32_t i=0; i<count; ++i) { - UnicodeString rule; - - // Anonymous RuleBasedTransliterators (inline rules and - // ::BEGIN/::END blocks) are given IDs that begin with - // "%Pass": use toRules() to write all the rules to the output - // (and insert "::Null;" if we have two in a row) - if (trans[i]->getID().startsWith(PASS_STRING, 5)) { - trans[i]->toRules(rule, escapeUnprintable); - if (numAnonymousRBTs > 1 && i > 0 && trans[i - 1]->getID().startsWith(PASS_STRING, 5)) - rule = UNICODE_STRING_SIMPLE("::Null;") + rule; - - // we also use toRules() on CompoundTransliterators (which we - // check for by looking for a semicolon in the ID)-- this gets - // the list of their child transliterators output in the right - // format - } else if (trans[i]->getID().indexOf(ID_DELIM) >= 0) { - trans[i]->toRules(rule, escapeUnprintable); - - // for everything else, use Transliterator::toRules() - } else { - trans[i]->Transliterator::toRules(rule, escapeUnprintable); - } - _smartAppend(rulesSource, NEWLINE); - rulesSource.append(rule); - _smartAppend(rulesSource, ID_DELIM); - } - return rulesSource; -} - -/** - * Implement Transliterator framework - */ -void CompoundTransliterator::handleGetSourceSet(UnicodeSet& result) const { - UnicodeSet set; - result.clear(); - for (int32_t i=0; i<count; ++i) { - result.addAll(trans[i]->getSourceSet(set)); - // Take the example of Hiragana-Latin. This is really - // Hiragana-Katakana; Katakana-Latin. The source set of - // these two is roughly [:Hiragana:] and [:Katakana:]. - // But the source set for the entire transliterator is - // actually [:Hiragana:] ONLY -- that is, the first - // non-empty source set. - - // This is a heuristic, and not 100% reliable. - if (!result.isEmpty()) { - break; - } - } -} - -/** - * Override Transliterator framework - */ -UnicodeSet& CompoundTransliterator::getTargetSet(UnicodeSet& result) const { - UnicodeSet set; - result.clear(); - for (int32_t i=0; i<count; ++i) { - // This is a heuristic, and not 100% reliable. - result.addAll(trans[i]->getTargetSet(set)); - } - return result; -} - -/** - * Implements {@link Transliterator#handleTransliterate}. - */ -void CompoundTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index, - UBool incremental) const { - /* Call each transliterator with the same contextStart and - * start, but with the limit as modified - * by preceding transliterators. The start index must be - * reset for each transliterator to give each a chance to - * transliterate the text. The initial contextStart index is known - * to still point to the same place after each transliterator - * is called because each transliterator will not change the - * text between contextStart and the initial start index. - * - * IMPORTANT: After the first transliterator, each subsequent - * transliterator only gets to transliterate text committed by - * preceding transliterators; that is, the start (output - * value) of transliterator i becomes the limit (input value) - * of transliterator i+1. Finally, the overall limit is fixed - * up before we return. - * - * Assumptions we make here: - * (1) contextStart <= start <= limit <= contextLimit <= text.length() - * (2) start <= start' <= limit' ;cursor doesn't move back - * (3) start <= limit' ;text before cursor unchanged - * - start' is the value of start after calling handleKT - * - limit' is the value of limit after calling handleKT - */ - - /** - * Example: 3 transliterators. This example illustrates the - * mechanics we need to implement. C, S, and L are the contextStart, - * start, and limit. gl is the globalLimit. contextLimit is - * equal to limit throughout. - * - * 1. h-u, changes hex to Unicode - * - * 4 7 a d 0 4 7 a - * abc/u0061/u => abca/u - * C S L C S L gl=f->a - * - * 2. upup, changes "x" to "XX" - * - * 4 7 a 4 7 a - * abca/u => abcAA/u - * C SL C S - * L gl=a->b - * 3. u-h, changes Unicode to hex - * - * 4 7 a 4 7 a d 0 3 - * abcAA/u => abc/u0041/u0041/u - * C S L C S - * L gl=b->15 - * 4. return - * - * 4 7 a d 0 3 - * abc/u0041/u0041/u - * C S L - */ - - if (count < 1) { - index.start = index.limit; - return; // Short circuit for empty compound transliterators - } - - // compoundLimit is the limit value for the entire compound - // operation. We overwrite index.limit with the previous - // index.start. After each transliteration, we update - // compoundLimit for insertions or deletions that have happened. - int32_t compoundLimit = index.limit; - - // compoundStart is the start for the entire compound - // operation. - int32_t compoundStart = index.start; - - int32_t delta = 0; // delta in length - - // Give each transliterator a crack at the run of characters. - // See comments at the top of the method for more detail. - for (int32_t i=0; i<count; ++i) { - index.start = compoundStart; // Reset start - int32_t limit = index.limit; - - if (index.start == index.limit) { - // Short circuit for empty range - break; - } - - trans[i]->filteredTransliterate(text, index, incremental); - - // In a properly written transliterator, start == limit after - // handleTransliterate() returns when incremental is false. - // Catch cases where the subclass doesn't do this, and throw - // an exception. (Just pinning start to limit is a bad idea, - // because what's probably happening is that the subclass - // isn't transliterating all the way to the end, and it should - // in non-incremental mode.) - if (!incremental && index.start != index.limit) { - // We can't throw an exception, so just fudge things - index.start = index.limit; - } - - // Cumulative delta for insertions/deletions - delta += index.limit - limit; - - if (incremental) { - // In the incremental case, only allow subsequent - // transliterators to modify what has already been - // completely processed by prior transliterators. In the - // non-incrmental case, allow each transliterator to - // process the entire text. - index.limit = index.start; - } - } - - compoundLimit += delta; - - // Start is good where it is -- where the last transliterator left - // it. Limit needs to be put back where it was, modulo - // adjustments for deletions/insertions. - index.limit = compoundLimit; -} - -/** - * Sets the length of the longest context required by this transliterator. - * This is <em>preceding</em> context. - */ -void CompoundTransliterator::computeMaximumContextLength(void) { - int32_t max = 0; - for (int32_t i=0; i<count; ++i) { - int32_t len = trans[i]->getMaximumContextLength(); - if (len > max) { - max = len; - } - } - setMaximumContextLength(max); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_TRANSLITERATION */ - -/* eof */ |