summaryrefslogtreecommitdiff
path: root/deps/node/deps/icu-small/source/i18n/csdetect.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/node/deps/icu-small/source/i18n/csdetect.cpp')
-rw-r--r--deps/node/deps/icu-small/source/i18n/csdetect.cpp487
1 files changed, 0 insertions, 487 deletions
diff --git a/deps/node/deps/icu-small/source/i18n/csdetect.cpp b/deps/node/deps/icu-small/source/i18n/csdetect.cpp
deleted file mode 100644
index 0afecb28..00000000
--- a/deps/node/deps/icu-small/source/i18n/csdetect.cpp
+++ /dev/null
@@ -1,487 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
- **********************************************************************
- * Copyright (C) 2005-2016, International Business Machines
- * Corporation and others. All Rights Reserved.
- **********************************************************************
- */
-
-#include "unicode/utypes.h"
-
-#if !UCONFIG_NO_CONVERSION
-
-#include "unicode/ucsdet.h"
-
-#include "csdetect.h"
-#include "csmatch.h"
-#include "uenumimp.h"
-
-#include "cmemory.h"
-#include "cstring.h"
-#include "umutex.h"
-#include "ucln_in.h"
-#include "uarrsort.h"
-#include "inputext.h"
-#include "csrsbcs.h"
-#include "csrmbcs.h"
-#include "csrutf8.h"
-#include "csrucode.h"
-#include "csr2022.h"
-
-#define NEW_ARRAY(type,count) (type *) uprv_malloc((count) * sizeof(type))
-#define DELETE_ARRAY(array) uprv_free((void *) (array))
-
-U_NAMESPACE_BEGIN
-
-struct CSRecognizerInfo : public UMemory {
- CSRecognizerInfo(CharsetRecognizer *recognizer, UBool isDefaultEnabled)
- : recognizer(recognizer), isDefaultEnabled(isDefaultEnabled) {};
-
- ~CSRecognizerInfo() {delete recognizer;};
-
- CharsetRecognizer *recognizer;
- UBool isDefaultEnabled;
-};
-
-U_NAMESPACE_END
-
-static icu::CSRecognizerInfo **fCSRecognizers = NULL;
-static icu::UInitOnce gCSRecognizersInitOnce;
-static int32_t fCSRecognizers_size = 0;
-
-U_CDECL_BEGIN
-static UBool U_CALLCONV csdet_cleanup(void)
-{
- U_NAMESPACE_USE
- if (fCSRecognizers != NULL) {
- for(int32_t r = 0; r < fCSRecognizers_size; r += 1) {
- delete fCSRecognizers[r];
- fCSRecognizers[r] = NULL;
- }
-
- DELETE_ARRAY(fCSRecognizers);
- fCSRecognizers = NULL;
- fCSRecognizers_size = 0;
- }
- gCSRecognizersInitOnce.reset();
-
- return TRUE;
-}
-
-static int32_t U_CALLCONV
-charsetMatchComparator(const void * /*context*/, const void *left, const void *right)
-{
- U_NAMESPACE_USE
-
- const CharsetMatch **csm_l = (const CharsetMatch **) left;
- const CharsetMatch **csm_r = (const CharsetMatch **) right;
-
- // NOTE: compare is backwards to sort from highest to lowest.
- return (*csm_r)->getConfidence() - (*csm_l)->getConfidence();
-}
-
-static void U_CALLCONV initRecognizers(UErrorCode &status) {
- U_NAMESPACE_USE
- ucln_i18n_registerCleanup(UCLN_I18N_CSDET, csdet_cleanup);
- CSRecognizerInfo *tempArray[] = {
- new CSRecognizerInfo(new CharsetRecog_UTF8(), TRUE),
-
- new CSRecognizerInfo(new CharsetRecog_UTF_16_BE(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_UTF_16_LE(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_UTF_32_BE(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_UTF_32_LE(), TRUE),
-
- new CSRecognizerInfo(new CharsetRecog_8859_1(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_8859_2(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_8859_5_ru(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_8859_6_ar(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_8859_7_el(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_8859_8_I_he(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_8859_8_he(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_windows_1251(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_windows_1256(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_KOI8_R(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_8859_9_tr(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_sjis(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_gb_18030(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_euc_jp(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_euc_kr(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_big5(), TRUE),
-
- new CSRecognizerInfo(new CharsetRecog_2022JP(), TRUE),
-#if !UCONFIG_ONLY_HTML_CONVERSION
- new CSRecognizerInfo(new CharsetRecog_2022KR(), TRUE),
- new CSRecognizerInfo(new CharsetRecog_2022CN(), TRUE),
-
- new CSRecognizerInfo(new CharsetRecog_IBM424_he_rtl(), FALSE),
- new CSRecognizerInfo(new CharsetRecog_IBM424_he_ltr(), FALSE),
- new CSRecognizerInfo(new CharsetRecog_IBM420_ar_rtl(), FALSE),
- new CSRecognizerInfo(new CharsetRecog_IBM420_ar_ltr(), FALSE)
-#endif
- };
- int32_t rCount = UPRV_LENGTHOF(tempArray);
-
- fCSRecognizers = NEW_ARRAY(CSRecognizerInfo *, rCount);
-
- if (fCSRecognizers == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- else {
- fCSRecognizers_size = rCount;
- for (int32_t r = 0; r < rCount; r += 1) {
- fCSRecognizers[r] = tempArray[r];
- if (fCSRecognizers[r] == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- }
- }
- }
-}
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-void CharsetDetector::setRecognizers(UErrorCode &status)
-{
- umtx_initOnce(gCSRecognizersInitOnce, &initRecognizers, status);
-}
-
-CharsetDetector::CharsetDetector(UErrorCode &status)
- : textIn(new InputText(status)), resultArray(NULL),
- resultCount(0), fStripTags(FALSE), fFreshTextSet(FALSE),
- fEnabledRecognizers(NULL)
-{
- if (U_FAILURE(status)) {
- return;
- }
-
- setRecognizers(status);
-
- if (U_FAILURE(status)) {
- return;
- }
-
- resultArray = (CharsetMatch **)uprv_malloc(sizeof(CharsetMatch *)*fCSRecognizers_size);
-
- if (resultArray == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
-
- for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
- resultArray[i] = new CharsetMatch();
-
- if (resultArray[i] == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- }
-}
-
-CharsetDetector::~CharsetDetector()
-{
- delete textIn;
-
- for(int32_t i = 0; i < fCSRecognizers_size; i += 1) {
- delete resultArray[i];
- }
-
- uprv_free(resultArray);
-
- if (fEnabledRecognizers) {
- uprv_free(fEnabledRecognizers);
- }
-}
-
-void CharsetDetector::setText(const char *in, int32_t len)
-{
- textIn->setText(in, len);
- fFreshTextSet = TRUE;
-}
-
-UBool CharsetDetector::setStripTagsFlag(UBool flag)
-{
- UBool temp = fStripTags;
- fStripTags = flag;
- fFreshTextSet = TRUE;
- return temp;
-}
-
-UBool CharsetDetector::getStripTagsFlag() const
-{
- return fStripTags;
-}
-
-void CharsetDetector::setDeclaredEncoding(const char *encoding, int32_t len) const
-{
- textIn->setDeclaredEncoding(encoding,len);
-}
-
-int32_t CharsetDetector::getDetectableCount()
-{
- UErrorCode status = U_ZERO_ERROR;
-
- setRecognizers(status);
-
- return fCSRecognizers_size;
-}
-
-const CharsetMatch *CharsetDetector::detect(UErrorCode &status)
-{
- int32_t maxMatchesFound = 0;
-
- detectAll(maxMatchesFound, status);
-
- if(maxMatchesFound > 0) {
- return resultArray[0];
- } else {
- return NULL;
- }
-}
-
-const CharsetMatch * const *CharsetDetector::detectAll(int32_t &maxMatchesFound, UErrorCode &status)
-{
- if(!textIn->isSet()) {
- status = U_MISSING_RESOURCE_ERROR;// TODO: Need to set proper status code for input text not set
-
- return NULL;
- } else if (fFreshTextSet) {
- CharsetRecognizer *csr;
- int32_t i;
-
- textIn->MungeInput(fStripTags);
-
- // Iterate over all possible charsets, remember all that
- // give a match quality > 0.
- resultCount = 0;
- for (i = 0; i < fCSRecognizers_size; i += 1) {
- csr = fCSRecognizers[i]->recognizer;
- if (csr->match(textIn, resultArray[resultCount])) {
- resultCount++;
- }
- }
-
- if (resultCount > 1) {
- uprv_sortArray(resultArray, resultCount, sizeof resultArray[0], charsetMatchComparator, NULL, TRUE, &status);
- }
- fFreshTextSet = FALSE;
- }
-
- maxMatchesFound = resultCount;
-
- return resultArray;
-}
-
-void CharsetDetector::setDetectableCharset(const char *encoding, UBool enabled, UErrorCode &status)
-{
- if (U_FAILURE(status)) {
- return;
- }
-
- int32_t modIdx = -1;
- UBool isDefaultVal = FALSE;
- for (int32_t i = 0; i < fCSRecognizers_size; i++) {
- CSRecognizerInfo *csrinfo = fCSRecognizers[i];
- if (uprv_strcmp(csrinfo->recognizer->getName(), encoding) == 0) {
- modIdx = i;
- isDefaultVal = (csrinfo->isDefaultEnabled == enabled);
- break;
- }
- }
- if (modIdx < 0) {
- // No matching encoding found
- status = U_ILLEGAL_ARGUMENT_ERROR;
- return;
- }
-
- if (fEnabledRecognizers == NULL && !isDefaultVal) {
- // Create an array storing the non default setting
- fEnabledRecognizers = NEW_ARRAY(UBool, fCSRecognizers_size);
- if (fEnabledRecognizers == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return;
- }
- // Initialize the array with default info
- for (int32_t i = 0; i < fCSRecognizers_size; i++) {
- fEnabledRecognizers[i] = fCSRecognizers[i]->isDefaultEnabled;
- }
- }
-
- if (fEnabledRecognizers != NULL) {
- fEnabledRecognizers[modIdx] = enabled;
- }
-}
-
-/*const char *CharsetDetector::getCharsetName(int32_t index, UErrorCode &status) const
-{
- if( index > fCSRecognizers_size-1 || index < 0) {
- status = U_INDEX_OUTOFBOUNDS_ERROR;
-
- return 0;
- } else {
- return fCSRecognizers[index]->getName();
- }
-}*/
-
-U_NAMESPACE_END
-
-U_CDECL_BEGIN
-typedef struct {
- int32_t currIndex;
- UBool all;
- UBool *enabledRecognizers;
-} Context;
-
-
-
-static void U_CALLCONV
-enumClose(UEnumeration *en) {
- if(en->context != NULL) {
- DELETE_ARRAY(en->context);
- }
-
- DELETE_ARRAY(en);
-}
-
-static int32_t U_CALLCONV
-enumCount(UEnumeration *en, UErrorCode *) {
- if (((Context *)en->context)->all) {
- // ucsdet_getAllDetectableCharsets, all charset detector names
- return fCSRecognizers_size;
- }
-
- // Otherwise, ucsdet_getDetectableCharsets - only enabled ones
- int32_t count = 0;
- UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
- if (enabledArray != NULL) {
- // custom set
- for (int32_t i = 0; i < fCSRecognizers_size; i++) {
- if (enabledArray[i]) {
- count++;
- }
- }
- } else {
- // default set
- for (int32_t i = 0; i < fCSRecognizers_size; i++) {
- if (fCSRecognizers[i]->isDefaultEnabled) {
- count++;
- }
- }
- }
- return count;
-}
-
-static const char* U_CALLCONV
-enumNext(UEnumeration *en, int32_t *resultLength, UErrorCode * /*status*/) {
- const char *currName = NULL;
-
- if (((Context *)en->context)->currIndex < fCSRecognizers_size) {
- if (((Context *)en->context)->all) {
- // ucsdet_getAllDetectableCharsets, all charset detector names
- currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
- ((Context *)en->context)->currIndex++;
- } else {
- // ucsdet_getDetectableCharsets
- UBool *enabledArray = ((Context *)en->context)->enabledRecognizers;
- if (enabledArray != NULL) {
- // custome set
- while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
- if (enabledArray[((Context *)en->context)->currIndex]) {
- currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
- }
- ((Context *)en->context)->currIndex++;
- }
- } else {
- // default set
- while (currName == NULL && ((Context *)en->context)->currIndex < fCSRecognizers_size) {
- if (fCSRecognizers[((Context *)en->context)->currIndex]->isDefaultEnabled) {
- currName = fCSRecognizers[((Context *)en->context)->currIndex]->recognizer->getName();
- }
- ((Context *)en->context)->currIndex++;
- }
- }
- }
- }
-
- if(resultLength != NULL) {
- *resultLength = currName == NULL ? 0 : (int32_t)uprv_strlen(currName);
- }
-
- return currName;
-}
-
-
-static void U_CALLCONV
-enumReset(UEnumeration *en, UErrorCode *) {
- ((Context *)en->context)->currIndex = 0;
-}
-
-static const UEnumeration gCSDetEnumeration = {
- NULL,
- NULL,
- enumClose,
- enumCount,
- uenum_unextDefault,
- enumNext,
- enumReset
-};
-
-U_CDECL_END
-
-U_NAMESPACE_BEGIN
-
-UEnumeration * CharsetDetector::getAllDetectableCharsets(UErrorCode &status)
-{
-
- /* Initialize recognized charsets. */
- setRecognizers(status);
-
- if(U_FAILURE(status)) {
- return 0;
- }
-
- UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
- if (en == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
- en->context = (void*)NEW_ARRAY(Context, 1);
- if (en->context == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- DELETE_ARRAY(en);
- return 0;
- }
- uprv_memset(en->context, 0, sizeof(Context));
- ((Context*)en->context)->all = TRUE;
- return en;
-}
-
-UEnumeration * CharsetDetector::getDetectableCharsets(UErrorCode &status) const
-{
- if(U_FAILURE(status)) {
- return 0;
- }
-
- UEnumeration *en = NEW_ARRAY(UEnumeration, 1);
- if (en == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
- memcpy(en, &gCSDetEnumeration, sizeof(UEnumeration));
- en->context = (void*)NEW_ARRAY(Context, 1);
- if (en->context == NULL) {
- status = U_MEMORY_ALLOCATION_ERROR;
- DELETE_ARRAY(en);
- return 0;
- }
- uprv_memset(en->context, 0, sizeof(Context));
- ((Context*)en->context)->all = FALSE;
- ((Context*)en->context)->enabledRecognizers = fEnabledRecognizers;
- return en;
-}
-
-U_NAMESPACE_END
-
-#endif