diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/common/normlzr.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/common/normlzr.cpp | 529 |
1 files changed, 0 insertions, 529 deletions
diff --git a/deps/node/deps/icu-small/source/common/normlzr.cpp b/deps/node/deps/icu-small/source/common/normlzr.cpp deleted file mode 100644 index 3911c90b..00000000 --- a/deps/node/deps/icu-small/source/common/normlzr.cpp +++ /dev/null @@ -1,529 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* - ************************************************************************* - * COPYRIGHT: - * Copyright (c) 1996-2012, International Business Machines Corporation and - * others. All Rights Reserved. - ************************************************************************* - */ - -#include "unicode/utypes.h" - -#if !UCONFIG_NO_NORMALIZATION - -#include "unicode/uniset.h" -#include "unicode/unistr.h" -#include "unicode/chariter.h" -#include "unicode/schriter.h" -#include "unicode/uchriter.h" -#include "unicode/normlzr.h" -#include "unicode/utf16.h" -#include "cmemory.h" -#include "normalizer2impl.h" -#include "uprops.h" // for uniset_getUnicode32Instance() - -#if defined(_ARM64_) && defined(move32) - // System can define move32 intrinsics, but the char iters define move32 method - // using same undef trick in headers, so undef here to re-enable the method. -#undef move32 -#endif - -U_NAMESPACE_BEGIN - -UOBJECT_DEFINE_RTTI_IMPLEMENTATION(Normalizer) - -//------------------------------------------------------------------------- -// Constructors and other boilerplate -//------------------------------------------------------------------------- - -Normalizer::Normalizer(const UnicodeString& str, UNormalizationMode mode) : - UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), - text(new StringCharacterIterator(str)), - currentIndex(0), nextIndex(0), - buffer(), bufferPos(0) -{ - init(); -} - -Normalizer::Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode) : - UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), - text(new UCharCharacterIterator(str, length)), - currentIndex(0), nextIndex(0), - buffer(), bufferPos(0) -{ - init(); -} - -Normalizer::Normalizer(const CharacterIterator& iter, UNormalizationMode mode) : - UObject(), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(mode), fOptions(0), - text(iter.clone()), - currentIndex(0), nextIndex(0), - buffer(), bufferPos(0) -{ - init(); -} - -Normalizer::Normalizer(const Normalizer ©) : - UObject(copy), fFilteredNorm2(NULL), fNorm2(NULL), fUMode(copy.fUMode), fOptions(copy.fOptions), - text(copy.text->clone()), - currentIndex(copy.currentIndex), nextIndex(copy.nextIndex), - buffer(copy.buffer), bufferPos(copy.bufferPos) -{ - init(); -} - -void -Normalizer::init() { - UErrorCode errorCode=U_ZERO_ERROR; - fNorm2=Normalizer2Factory::getInstance(fUMode, errorCode); - if(fOptions&UNORM_UNICODE_3_2) { - delete fFilteredNorm2; - fNorm2=fFilteredNorm2= - new FilteredNormalizer2(*fNorm2, *uniset_getUnicode32Instance(errorCode)); - } - if(U_FAILURE(errorCode)) { - errorCode=U_ZERO_ERROR; - fNorm2=Normalizer2Factory::getNoopInstance(errorCode); - } -} - -Normalizer::~Normalizer() -{ - delete fFilteredNorm2; - delete text; -} - -Normalizer* -Normalizer::clone() const -{ - return new Normalizer(*this); -} - -/** - * Generates a hash code for this iterator. - */ -int32_t Normalizer::hashCode() const -{ - return text->hashCode() + fUMode + fOptions + buffer.hashCode() + bufferPos + currentIndex + nextIndex; -} - -UBool Normalizer::operator==(const Normalizer& that) const -{ - return - this==&that || - (fUMode==that.fUMode && - fOptions==that.fOptions && - *text==*that.text && - buffer==that.buffer && - bufferPos==that.bufferPos && - nextIndex==that.nextIndex); -} - -//------------------------------------------------------------------------- -// Static utility methods -//------------------------------------------------------------------------- - -void U_EXPORT2 -Normalizer::normalize(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UnicodeString& result, - UErrorCode &status) { - if(source.isBogus() || U_FAILURE(status)) { - result.setToBogus(); - if(U_SUCCESS(status)) { - status=U_ILLEGAL_ARGUMENT_ERROR; - } - } else { - UnicodeString localDest; - UnicodeString *dest; - - if(&source!=&result) { - dest=&result; - } else { - // the source and result strings are the same object, use a temporary one - dest=&localDest; - } - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); - if(U_SUCCESS(status)) { - if(options&UNORM_UNICODE_3_2) { - FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). - normalize(source, *dest, status); - } else { - n2->normalize(source, *dest, status); - } - } - if(dest==&localDest && U_SUCCESS(status)) { - result=*dest; - } - } -} - -void U_EXPORT2 -Normalizer::compose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status) { - normalize(source, compat ? UNORM_NFKC : UNORM_NFC, options, result, status); -} - -void U_EXPORT2 -Normalizer::decompose(const UnicodeString& source, - UBool compat, int32_t options, - UnicodeString& result, - UErrorCode &status) { - normalize(source, compat ? UNORM_NFKD : UNORM_NFD, options, result, status); -} - -UNormalizationCheckResult -Normalizer::quickCheck(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); - if(U_SUCCESS(status)) { - if(options&UNORM_UNICODE_3_2) { - return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). - quickCheck(source, status); - } else { - return n2->quickCheck(source, status); - } - } else { - return UNORM_MAYBE; - } -} - -UBool -Normalizer::isNormalized(const UnicodeString& source, - UNormalizationMode mode, int32_t options, - UErrorCode &status) { - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, status); - if(U_SUCCESS(status)) { - if(options&UNORM_UNICODE_3_2) { - return FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(status)). - isNormalized(source, status); - } else { - return n2->isNormalized(source, status); - } - } else { - return FALSE; - } -} - -UnicodeString & U_EXPORT2 -Normalizer::concatenate(const UnicodeString &left, const UnicodeString &right, - UnicodeString &result, - UNormalizationMode mode, int32_t options, - UErrorCode &errorCode) { - if(left.isBogus() || right.isBogus() || U_FAILURE(errorCode)) { - result.setToBogus(); - if(U_SUCCESS(errorCode)) { - errorCode=U_ILLEGAL_ARGUMENT_ERROR; - } - } else { - UnicodeString localDest; - UnicodeString *dest; - - if(&right!=&result) { - dest=&result; - } else { - // the right and result strings are the same object, use a temporary one - dest=&localDest; - } - *dest=left; - const Normalizer2 *n2=Normalizer2Factory::getInstance(mode, errorCode); - if(U_SUCCESS(errorCode)) { - if(options&UNORM_UNICODE_3_2) { - FilteredNormalizer2(*n2, *uniset_getUnicode32Instance(errorCode)). - append(*dest, right, errorCode); - } else { - n2->append(*dest, right, errorCode); - } - } - if(dest==&localDest && U_SUCCESS(errorCode)) { - result=*dest; - } - } - return result; -} - -//------------------------------------------------------------------------- -// Iteration API -//------------------------------------------------------------------------- - -/** - * Return the current character in the normalized text. - */ -UChar32 Normalizer::current() { - if(bufferPos<buffer.length() || nextNormalize()) { - return buffer.char32At(bufferPos); - } else { - return DONE; - } -} - -/** - * Return the next character in the normalized text and advance - * the iteration position by one. If the end - * of the text has already been reached, {@link #DONE} is returned. - */ -UChar32 Normalizer::next() { - if(bufferPos<buffer.length() || nextNormalize()) { - UChar32 c=buffer.char32At(bufferPos); - bufferPos+=U16_LENGTH(c); - return c; - } else { - return DONE; - } -} - -/** - * Return the previous character in the normalized text and decrement - * the iteration position by one. If the beginning - * of the text has already been reached, {@link #DONE} is returned. - */ -UChar32 Normalizer::previous() { - if(bufferPos>0 || previousNormalize()) { - UChar32 c=buffer.char32At(bufferPos-1); - bufferPos-=U16_LENGTH(c); - return c; - } else { - return DONE; - } -} - -void Normalizer::reset() { - currentIndex=nextIndex=text->setToStart(); - clearBuffer(); -} - -void -Normalizer::setIndexOnly(int32_t index) { - text->setIndex(index); // pins index - currentIndex=nextIndex=text->getIndex(); - clearBuffer(); -} - -/** - * Return the first character in the normalized text. This resets - * the <tt>Normalizer's</tt> position to the beginning of the text. - */ -UChar32 Normalizer::first() { - reset(); - return next(); -} - -/** - * Return the last character in the normalized text. This resets - * the <tt>Normalizer's</tt> position to be just before the - * the input text corresponding to that normalized character. - */ -UChar32 Normalizer::last() { - currentIndex=nextIndex=text->setToEnd(); - clearBuffer(); - return previous(); -} - -/** - * Retrieve the current iteration position in the input text that is - * being normalized. This method is useful in applications such as - * searching, where you need to be able to determine the position in - * the input text that corresponds to a given normalized output character. - * <p> - * <b>Note:</b> This method sets the position in the <em>input</em>, while - * {@link #next} and {@link #previous} iterate through characters in the - * <em>output</em>. This means that there is not necessarily a one-to-one - * correspondence between characters returned by <tt>next</tt> and - * <tt>previous</tt> and the indices passed to and returned from - * <tt>setIndex</tt> and {@link #getIndex}. - * - */ -int32_t Normalizer::getIndex() const { - if(bufferPos<buffer.length()) { - return currentIndex; - } else { - return nextIndex; - } -} - -/** - * Retrieve the index of the start of the input text. This is the begin index - * of the <tt>CharacterIterator</tt> or the start (i.e. 0) of the <tt>String</tt> - * over which this <tt>Normalizer</tt> is iterating - */ -int32_t Normalizer::startIndex() const { - return text->startIndex(); -} - -/** - * Retrieve the index of the end of the input text. This is the end index - * of the <tt>CharacterIterator</tt> or the length of the <tt>String</tt> - * over which this <tt>Normalizer</tt> is iterating - */ -int32_t Normalizer::endIndex() const { - return text->endIndex(); -} - -//------------------------------------------------------------------------- -// Property access methods -//------------------------------------------------------------------------- - -void -Normalizer::setMode(UNormalizationMode newMode) -{ - fUMode = newMode; - init(); -} - -UNormalizationMode -Normalizer::getUMode() const -{ - return fUMode; -} - -void -Normalizer::setOption(int32_t option, - UBool value) -{ - if (value) { - fOptions |= option; - } else { - fOptions &= (~option); - } - init(); -} - -UBool -Normalizer::getOption(int32_t option) const -{ - return (fOptions & option) != 0; -} - -/** - * Set the input text over which this <tt>Normalizer</tt> will iterate. - * The iteration position is set to the beginning of the input text. - */ -void -Normalizer::setText(const UnicodeString& newText, - UErrorCode &status) -{ - if (U_FAILURE(status)) { - return; - } - CharacterIterator *newIter = new StringCharacterIterator(newText); - if (newIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - delete text; - text = newIter; - reset(); -} - -/** - * Set the input text over which this <tt>Normalizer</tt> will iterate. - * The iteration position is set to the beginning of the string. - */ -void -Normalizer::setText(const CharacterIterator& newText, - UErrorCode &status) -{ - if (U_FAILURE(status)) { - return; - } - CharacterIterator *newIter = newText.clone(); - if (newIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - delete text; - text = newIter; - reset(); -} - -void -Normalizer::setText(ConstChar16Ptr newText, - int32_t length, - UErrorCode &status) -{ - if (U_FAILURE(status)) { - return; - } - CharacterIterator *newIter = new UCharCharacterIterator(newText, length); - if (newIter == NULL) { - status = U_MEMORY_ALLOCATION_ERROR; - return; - } - delete text; - text = newIter; - reset(); -} - -/** - * Copies the text under iteration into the UnicodeString referred to by "result". - * @param result Receives a copy of the text under iteration. - */ -void -Normalizer::getText(UnicodeString& result) -{ - text->getText(result); -} - -//------------------------------------------------------------------------- -// Private utility methods -//------------------------------------------------------------------------- - -void Normalizer::clearBuffer() { - buffer.remove(); - bufferPos=0; -} - -UBool -Normalizer::nextNormalize() { - clearBuffer(); - currentIndex=nextIndex; - text->setIndex(nextIndex); - if(!text->hasNext()) { - return FALSE; - } - // Skip at least one character so we make progress. - UnicodeString segment(text->next32PostInc()); - while(text->hasNext()) { - UChar32 c; - if(fNorm2->hasBoundaryBefore(c=text->next32PostInc())) { - text->move32(-1, CharacterIterator::kCurrent); - break; - } - segment.append(c); - } - nextIndex=text->getIndex(); - UErrorCode errorCode=U_ZERO_ERROR; - fNorm2->normalize(segment, buffer, errorCode); - return U_SUCCESS(errorCode) && !buffer.isEmpty(); -} - -UBool -Normalizer::previousNormalize() { - clearBuffer(); - nextIndex=currentIndex; - text->setIndex(currentIndex); - if(!text->hasPrevious()) { - return FALSE; - } - UnicodeString segment; - while(text->hasPrevious()) { - UChar32 c=text->previous32(); - segment.insert(0, c); - if(fNorm2->hasBoundaryBefore(c)) { - break; - } - } - currentIndex=text->getIndex(); - UErrorCode errorCode=U_ZERO_ERROR; - fNorm2->normalize(segment, buffer, errorCode); - bufferPos=buffer.length(); - return U_SUCCESS(errorCode) && !buffer.isEmpty(); -} - -U_NAMESPACE_END - -#endif /* #if !UCONFIG_NO_NORMALIZATION */ |