diff options
Diffstat (limited to 'deps/node/deps/icu-small/source/common/uloc_tag.cpp')
-rw-r--r-- | deps/node/deps/icu-small/source/common/uloc_tag.cpp | 2884 |
1 files changed, 0 insertions, 2884 deletions
diff --git a/deps/node/deps/icu-small/source/common/uloc_tag.cpp b/deps/node/deps/icu-small/source/common/uloc_tag.cpp deleted file mode 100644 index 8120331c..00000000 --- a/deps/node/deps/icu-small/source/common/uloc_tag.cpp +++ /dev/null @@ -1,2884 +0,0 @@ -// © 2016 and later: Unicode, Inc. and others. -// License & terms of use: http://www.unicode.org/copyright.html -/* -********************************************************************** -* Copyright (C) 2009-2015, International Business Machines -* Corporation and others. All Rights Reserved. -********************************************************************** -*/ - -#include "unicode/utypes.h" -#include "unicode/ures.h" -#include "unicode/putil.h" -#include "unicode/uloc.h" -#include "ustr_imp.h" -#include "charstr.h" -#include "cmemory.h" -#include "cstring.h" -#include "putilimp.h" -#include "uinvchar.h" -#include "ulocimp.h" -#include "uvector.h" -#include "uassert.h" - - -/* struct holding a single variant */ -typedef struct VariantListEntry { - const char *variant; - struct VariantListEntry *next; -} VariantListEntry; - -/* struct holding a single attribute value */ -typedef struct AttributeListEntry { - const char *attribute; - struct AttributeListEntry *next; -} AttributeListEntry; - -/* struct holding a single extension */ -typedef struct ExtensionListEntry { - const char *key; - const char *value; - struct ExtensionListEntry *next; -} ExtensionListEntry; - -#define MAXEXTLANG 3 -typedef struct ULanguageTag { - char *buf; /* holding parsed subtags */ - const char *language; - const char *extlang[MAXEXTLANG]; - const char *script; - const char *region; - VariantListEntry *variants; - ExtensionListEntry *extensions; - const char *privateuse; - const char *grandfathered; -} ULanguageTag; - -#define MINLEN 2 -#define SEP '-' -#define PRIVATEUSE 'x' -#define LDMLEXT 'u' - -#define LOCALE_SEP '_' -#define LOCALE_EXT_SEP '@' -#define LOCALE_KEYWORD_SEP ';' -#define LOCALE_KEY_TYPE_SEP '=' - -#define ISALPHA(c) uprv_isASCIILetter(c) -#define ISNUMERIC(c) ((c)>='0' && (c)<='9') - -static const char EMPTY[] = ""; -static const char LANG_UND[] = "und"; -static const char PRIVATEUSE_KEY[] = "x"; -static const char _POSIX[] = "_POSIX"; -static const char POSIX_KEY[] = "va"; -static const char POSIX_VALUE[] = "posix"; -static const char LOCALE_ATTRIBUTE_KEY[] = "attribute"; -static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant"; -static const char LOCALE_TYPE_YES[] = "yes"; - -#define LANG_UND_LEN 3 - -/* - Updated on 2018-09-12 from - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . - - This table has 2 parts. The parts for Grandfathered tags is generated by the - following scripts from the IANA language tag registry. - - curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ - egrep -A 7 'Type: grandfathered' | \ - egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \ - awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\ - tr 'A-Z' 'a-z' - - - The 2nd part is made of five ICU-specific entries. They're kept for - the backward compatibility for now, even though there are no preferred - values. They may have to be removed for the strict BCP 47 compliance. - -*/ -static const char* const GRANDFATHERED[] = { -/* grandfathered preferred */ - "art-lojban", "jbo", - "en-gb-oed", "en-gb-oxendict", - "i-ami", "ami", - "i-bnn", "bnn", - "i-hak", "hak", - "i-klingon", "tlh", - "i-lux", "lb", - "i-navajo", "nv", - "i-pwn", "pwn", - "i-tao", "tao", - "i-tay", "tay", - "i-tsu", "tsu", - "no-bok", "nb", - "no-nyn", "nn", - "sgn-be-fr", "sfb", - "sgn-be-nl", "vgt", - "sgn-ch-de", "sgg", - "zh-guoyu", "cmn", - "zh-hakka", "hak", - "zh-min-nan", "nan", - "zh-xiang", "hsn", - - // Grandfathered tags with no preferred value in the IANA - // registry. Kept for now for the backward compatibility - // because ICU has mapped them this way. - "cel-gaulish", "xtg-x-cel-gaulish", - "i-default", "en-x-i-default", - "i-enochian", "und-x-i-enochian", - "i-mingo", "see-x-i-mingo", - "zh-min", "nan-x-zh-min", -}; - -/* - Updated on 2018-09-12 from - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . - - The table lists redundant tags with preferred value in the IANA languate tag registry. - It's generated with the following command: - - curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\ - grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \ - awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \ - tr 'A-Z' 'a-z' - - In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because - a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'. -*/ - -static const char* const REDUNDANT[] = { -// redundant preferred - "sgn-br", "bzs", - "sgn-co", "csn", - "sgn-de", "gsg", - "sgn-dk", "dsl", - "sgn-es", "ssp", - "sgn-fr", "fsl", - "sgn-gb", "bfi", - "sgn-gr", "gss", - "sgn-ie", "isg", - "sgn-it", "ise", - "sgn-jp", "jsl", - "sgn-mx", "mfs", - "sgn-ni", "ncs", - "sgn-nl", "dse", - "sgn-no", "nsl", - "sgn-pt", "psr", - "sgn-se", "swl", - "sgn-us", "ase", - "sgn-za", "sfs", - "zh-cmn", "cmn", - "zh-cmn-hans", "cmn-hans", - "zh-cmn-hant", "cmn-hant", - "zh-gan", "gan", - "zh-wuu", "wuu", - "zh-yue", "yue", - - // variant tag with preferred value - "ja-latn-hepburn-heploc", "ja-latn-alalc97", -}; - -/* - Updated on 2018-09-12 from - https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry . - - grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \ - grep -B1 'Preferred' | grep -v '^--' | \ - awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' - - Make sure that 2-letter language subtags come before 3-letter subtags. -*/ -static const char DEPRECATEDLANGS[][4] = { -/* deprecated new */ - "in", "id", - "iw", "he", - "ji", "yi", - "jw", "jv", - "mo", "ro", - "aam", "aas", - "adp", "dz", - "aue", "ktz", - "ayx", "nun", - "bgm", "bcg", - "bjd", "drl", - "ccq", "rki", - "cjr", "mom", - "cka", "cmr", - "cmk", "xch", - "coy", "pij", - "cqu", "quh", - "drh", "khk", - "drw", "prs", - "gav", "dev", - "gfx", "vaj", - "ggn", "gvr", - "gti", "nyc", - "guv", "duz", - "hrr", "jal", - "ibi", "opa", - "ilw", "gal", - "jeg", "oyb", - "kgc", "tdf", - "kgh", "kml", - "koj", "kwv", - "krm", "bmf", - "ktr", "dtp", - "kvs", "gdj", - "kwq", "yam", - "kxe", "tvd", - "kzj", "dtp", - "kzt", "dtp", - "lii", "raq", - "lmm", "rmx", - "meg", "cir", - "mst", "mry", - "mwj", "vaj", - "myt", "mry", - "nad", "xny", - "ncp", "kdz", - "nnx", "ngv", - "nts", "pij", - "oun", "vaj", - "pcr", "adx", - "pmc", "huw", - "pmu", "phr", - "ppa", "bfy", - "ppr", "lcq", - "pry", "prt", - "puz", "pub", - "sca", "hle", - "skk", "oyb", - "tdu", "dtp", - "thc", "tpo", - "thx", "oyb", - "tie", "ras", - "tkk", "twm", - "tlw", "weo", - "tmp", "tyj", - "tne", "kak", - "tnf", "prs", - "tsf", "taj", - "uok", "ema", - "xba", "cax", - "xia", "acn", - "xkh", "waw", - "xsj", "suj", - "ybd", "rki", - "yma", "lrr", - "ymt", "mtm", - "yos", "zom", - "yuu", "yug", -}; - -/* - Updated on 2018-04-24 from - - curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \ - grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \ - grep -B1 'Preferred' | \ - awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' -*/ -static const char DEPRECATEDREGIONS[][3] = { -/* deprecated new */ - "BU", "MM", - "DD", "DE", - "FX", "FR", - "TP", "TL", - "YD", "YE", - "ZR", "CD", -}; - -/* -* ------------------------------------------------- -* -* These ultag_ functions may be exposed as APIs later -* -* ------------------------------------------------- -*/ - -static ULanguageTag* -ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status); - -static void -ultag_close(ULanguageTag* langtag); - -static const char* -ultag_getLanguage(const ULanguageTag* langtag); - -#if 0 -static const char* -ultag_getJDKLanguage(const ULanguageTag* langtag); -#endif - -static const char* -ultag_getExtlang(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getExtlangSize(const ULanguageTag* langtag); - -static const char* -ultag_getScript(const ULanguageTag* langtag); - -static const char* -ultag_getRegion(const ULanguageTag* langtag); - -static const char* -ultag_getVariant(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getVariantsSize(const ULanguageTag* langtag); - -static const char* -ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx); - -static const char* -ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx); - -static int32_t -ultag_getExtensionsSize(const ULanguageTag* langtag); - -static const char* -ultag_getPrivateUse(const ULanguageTag* langtag); - -#if 0 -static const char* -ultag_getGrandfathered(const ULanguageTag* langtag); -#endif - -namespace { - -// Helper class to memory manage CharString objects. -// Only ever stack-allocated, does not need to inherit UMemory. -class CharStringPool { -public: - CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {} - ~CharStringPool() = default; - - CharStringPool(const CharStringPool&) = delete; - CharStringPool& operator=(const CharStringPool&) = delete; - - icu::CharString* create() { - if (U_FAILURE(status)) { - return nullptr; - } - icu::CharString* const obj = new icu::CharString; - if (obj == nullptr) { - status = U_MEMORY_ALLOCATION_ERROR; - return nullptr; - } - pool.addElement(obj, status); - if (U_FAILURE(status)) { - delete obj; - return nullptr; - } - return obj; - } - -private: - static void U_CALLCONV deleter(void* obj) { - delete static_cast<icu::CharString*>(obj); - } - - UErrorCode status; - icu::UVector pool; -}; - -} // namespace - -/* -* ------------------------------------------------- -* -* Language subtag syntax validation functions -* -* ------------------------------------------------- -*/ - -static UBool -_isAlphaString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISALPHA(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isNumericString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISNUMERIC(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isAlphaNumericString(const char* s, int32_t len) { - int32_t i; - for (i = 0; i < len; i++) { - if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) { - return FALSE; - } - } - return TRUE; -} - -static UBool -_isLanguageSubtag(const char* s, int32_t len) { - /* - * language = 2*3ALPHA ; shortest ISO 639 code - * ["-" extlang] ; sometimes followed by - * ; extended language subtags - * / 4ALPHA ; or reserved for future use - * / 5*8ALPHA ; or registered language subtag - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 2 && len <= 8 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtlangSubtag(const char* s, int32_t len) { - /* - * extlang = 3ALPHA ; selected ISO 639 codes - * *2("-" 3ALPHA) ; permanently reserved - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 3 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isScriptSubtag(const char* s, int32_t len) { - /* - * script = 4ALPHA ; ISO 15924 code - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 4 && _isAlphaString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isRegionSubtag(const char* s, int32_t len) { - /* - * region = 2ALPHA ; ISO 3166-1 code - * / 3DIGIT ; UN M.49 code - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 2 && _isAlphaString(s, len)) { - return TRUE; - } - if (len == 3 && _isNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isVariantSubtag(const char* s, int32_t len) { - /* - * variant = 5*8alphanum ; registered variants - * / (DIGIT 3alphanum) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isPrivateuseVariantSubtag(const char* s, int32_t len) { - /* - * variant = 1*8alphanum ; registered variants - * / (DIGIT 3alphanum) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSingleton(const char* s, int32_t len) { - /* - * extension = singleton 1*("-" (2*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSubtag(const char* s, int32_t len) { - /* - * extension = singleton 1*("-" (2*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isExtensionSubtags(const char* s, int32_t len) { - const char *p = s; - const char *pSubtag = NULL; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - while ((p - s) < len) { - if (*p == SEP) { - if (pSubtag == NULL) { - return FALSE; - } - if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) { - return FALSE; - } - pSubtag = NULL; - } else if (pSubtag == NULL) { - pSubtag = p; - } - p++; - } - if (pSubtag == NULL) { - return FALSE; - } - return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag)); -} - -static UBool -_isPrivateuseValueSubtag(const char* s, int32_t len) { - /* - * privateuse = "x" 1*("-" (1*8alphanum)) - */ - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -static UBool -_isPrivateuseValueSubtags(const char* s, int32_t len) { - const char *p = s; - const char *pSubtag = NULL; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - while ((p - s) < len) { - if (*p == SEP) { - if (pSubtag == NULL) { - return FALSE; - } - if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) { - return FALSE; - } - pSubtag = NULL; - } else if (pSubtag == NULL) { - pSubtag = p; - } - p++; - } - if (pSubtag == NULL) { - return FALSE; - } - return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag)); -} - -U_CFUNC UBool -ultag_isUnicodeLocaleKey(const char* s, int32_t len) { - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - if (len == 2 && _isAlphaNumericString(s, len)) { - return TRUE; - } - return FALSE; -} - -U_CFUNC UBool -ultag_isUnicodeLocaleType(const char*s, int32_t len) { - const char* p; - int32_t subtagLen = 0; - - if (len < 0) { - len = (int32_t)uprv_strlen(s); - } - - for (p = s; len > 0; p++, len--) { - if (*p == SEP) { - if (subtagLen < 3) { - return FALSE; - } - subtagLen = 0; - } else if (ISALPHA(*p) || ISNUMERIC(*p)) { - subtagLen++; - if (subtagLen > 8) { - return FALSE; - } - } else { - return FALSE; - } - } - - return (subtagLen >= 3); -} -/* -* ------------------------------------------------- -* -* Helper functions -* -* ------------------------------------------------- -*/ - -static UBool -_addVariantToList(VariantListEntry **first, VariantListEntry *var) { - UBool bAdded = TRUE; - - if (*first == NULL) { - var->next = NULL; - *first = var; - } else { - VariantListEntry *prev, *cur; - int32_t cmp; - - /* variants order should be preserved */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = var; - var->next = NULL; - break; - } - - /* Checking for duplicate variant */ - cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant); - if (cmp == 0) { - /* duplicated variant */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - -static UBool -_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) { - UBool bAdded = TRUE; - - if (*first == NULL) { - attr->next = NULL; - *first = attr; - } else { - AttributeListEntry *prev, *cur; - int32_t cmp; - - /* reorder variants in alphabetical order */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = attr; - attr->next = NULL; - break; - } - cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute); - if (cmp < 0) { - if (prev == NULL) { - *first = attr; - } else { - prev->next = attr; - } - attr->next = cur; - break; - } - if (cmp == 0) { - /* duplicated variant */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - - -static UBool -_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) { - UBool bAdded = TRUE; - - if (*first == NULL) { - ext->next = NULL; - *first = ext; - } else { - ExtensionListEntry *prev, *cur; - int32_t cmp; - - /* reorder variants in alphabetical order */ - prev = NULL; - cur = *first; - while (TRUE) { - if (cur == NULL) { - prev->next = ext; - ext->next = NULL; - break; - } - if (localeToBCP) { - /* special handling for locale to bcp conversion */ - int32_t len, curlen; - - len = (int32_t)uprv_strlen(ext->key); - curlen = (int32_t)uprv_strlen(cur->key); - - if (len == 1 && curlen == 1) { - if (*(ext->key) == *(cur->key)) { - cmp = 0; - } else if (*(ext->key) == PRIVATEUSE) { - cmp = 1; - } else if (*(cur->key) == PRIVATEUSE) { - cmp = -1; - } else { - cmp = *(ext->key) - *(cur->key); - } - } else if (len == 1) { - cmp = *(ext->key) - LDMLEXT; - } else if (curlen == 1) { - cmp = LDMLEXT - *(cur->key); - } else { - cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); - /* Both are u extension keys - we need special handling for 'attribute' */ - if (cmp != 0) { - if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) { - cmp = 1; - } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { - cmp = -1; - } - } - } - } else { - cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key); - } - if (cmp < 0) { - if (prev == NULL) { - *first = ext; - } else { - prev->next = ext; - } - ext->next = cur; - break; - } - if (cmp == 0) { - /* duplicated extension key */ - bAdded = FALSE; - break; - } - prev = cur; - cur = cur->next; - } - } - - return bAdded; -} - -static void -_initializeULanguageTag(ULanguageTag* langtag) { - int32_t i; - - langtag->buf = NULL; - - langtag->language = EMPTY; - for (i = 0; i < MAXEXTLANG; i++) { - langtag->extlang[i] = NULL; - } - - langtag->script = EMPTY; - langtag->region = EMPTY; - - langtag->variants = NULL; - langtag->extensions = NULL; - - langtag->grandfathered = EMPTY; - langtag->privateuse = EMPTY; -} - -static int32_t -_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_LANG_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - len = 0; - } - - /* Note: returned language code is in lower case letters */ - - if (len == 0) { - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); - } - reslen += LANG_UND_LEN; - } else if (!_isLanguageSubtag(buf, len)) { - /* invalid language code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen)); - } - reslen += LANG_UND_LEN; - } else { - /* resolve deprecated */ - for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) { - // 2-letter deprecated subtags are listede before 3-letter - // ones in DEPRECATEDLANGS[]. Get out of loop on coming - // across the 1st 3-letter subtag, if the input is a 2-letter code. - // to avoid continuing to try when there's no match. - if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break; - if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) { - uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]); - len = (int32_t)uprv_strlen(buf); - break; - } - } - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_SCRIPT_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - if (!_isScriptSubtag(buf, len)) { - /* invalid script code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) { - char buf[ULOC_COUNTRY_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - if (!_isRegionSubtag(buf, len)) { - /* invalid region code */ - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - /* resolve deprecated */ - for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) { - if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) { - uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]); - len = (int32_t)uprv_strlen(buf); - break; - } - } - - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) { - char buf[ULOC_FULLNAME_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - char *p, *pVar; - UBool bNext = TRUE; - VariantListEntry *var; - VariantListEntry *varFirst = NULL; - - pVar = NULL; - p = buf; - while (bNext) { - if (*p == SEP || *p == LOCALE_SEP || *p == 0) { - if (*p == 0) { - bNext = FALSE; - } else { - *p = 0; /* terminate */ - } - if (pVar == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - /* ignore empty variant */ - } else { - /* ICU uses upper case letters for variants, but - the canonical format is lowercase in BCP47 */ - for (i = 0; *(pVar + i) != 0; i++) { - *(pVar + i) = uprv_tolower(*(pVar + i)); - } - - /* validate */ - if (_isVariantSubtag(pVar, -1)) { - if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) { - /* emit the variant to the list */ - var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); - if (var == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - var->variant = pVar; - if (!_addVariantToList(&varFirst, var)) { - /* duplicated variant */ - uprv_free(var); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } else { - /* Special handling for POSIX variant, need to remember that we had it and then */ - /* treat it like an extension later. */ - *hadPosix = TRUE; - } - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } else if (_isPrivateuseValueSubtag(pVar, -1)) { - /* Handle private use subtags separately */ - break; - } - } - /* reset variant starting position */ - pVar = NULL; - } else if (pVar == NULL) { - pVar = p; - } - p++; - } - - if (U_SUCCESS(*status)) { - if (varFirst != NULL) { - int32_t varLen; - - /* write out validated/normalized variants to the target */ - var = varFirst; - while (var != NULL) { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - varLen = (int32_t)uprv_strlen(var->variant); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen)); - } - reslen += varLen; - var = var->next; - } - } - } - - /* clean up */ - var = varFirst; - while (var != NULL) { - VariantListEntry *tmpVar = var->next; - uprv_free(var); - var = tmpVar; - } - - if (U_FAILURE(*status)) { - return 0; - } - } - - u_terminateChars(appendAt, capacity, reslen, status); - return reslen; -} - -static int32_t -_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 }; - int32_t attrBufLength = 0; - UEnumeration *keywordEnum = NULL; - int32_t reslen = 0; - - keywordEnum = uloc_openKeywords(localeID, status); - if (U_FAILURE(*status) && !hadPosix) { - uenum_close(keywordEnum); - return 0; - } - if (keywordEnum != NULL || hadPosix) { - /* reorder extensions */ - int32_t len; - const char *key; - ExtensionListEntry *firstExt = NULL; - ExtensionListEntry *ext; - AttributeListEntry *firstAttr = NULL; - AttributeListEntry *attr; - char *attrValue; - CharStringPool extBufPool; - const char *bcpKey=nullptr, *bcpValue=nullptr; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t keylen; - UBool isBcpUExt; - - while (TRUE) { - icu::CharString buf; - key = uenum_next(keywordEnum, NULL, status); - if (key == NULL) { - break; - } - char* buffer; - int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY; - - for (;;) { - buffer = buf.getAppendBuffer( - /*minCapacity=*/resultCapacity, - /*desiredCapacityHint=*/resultCapacity, - resultCapacity, - tmpStatus); - - if (U_FAILURE(tmpStatus)) { - break; - } - - len = uloc_getKeywordValue( - localeID, key, buffer, resultCapacity, &tmpStatus); - - if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - resultCapacity = len; - tmpStatus = U_ZERO_ERROR; - } - - if (U_FAILURE(tmpStatus)) { - if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - /* ignore this keyword */ - tmpStatus = U_ZERO_ERROR; - continue; - } - - buf.append(buffer, len, tmpStatus); - if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. - } - - keylen = (int32_t)uprv_strlen(key); - isBcpUExt = (keylen > 1); - - /* special keyword used for representing Unicode locale attributes */ - if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) { - if (len > 0) { - int32_t i = 0; - while (TRUE) { - attrBufLength = 0; - for (; i < len; i++) { - if (buf[i] != '-') { - attrBuf[attrBufLength++] = buf[i]; - } else { - i++; - break; - } - } - if (attrBufLength > 0) { - attrBuf[attrBufLength] = 0; - - } else if (i >= len){ - break; - } - - /* create AttributeListEntry */ - attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); - if (attr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - attrValue = (char*)uprv_malloc(attrBufLength + 1); - if (attrValue == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - uprv_strcpy(attrValue, attrBuf); - attr->attribute = attrValue; - - if (!_addAttributeToList(&firstAttr, attr)) { - uprv_free(attr); - uprv_free(attrValue); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - /* for a place holder ExtensionListEntry */ - bcpKey = LOCALE_ATTRIBUTE_KEY; - bcpValue = NULL; - } - } else if (isBcpUExt) { - bcpKey = uloc_toUnicodeLocaleKey(key); - if (bcpKey == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - - /* we've checked buf is null-terminated above */ - bcpValue = uloc_toUnicodeLocaleType(key, buf.data()); - if (bcpValue == NULL) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - if (bcpValue == buf.data()) { - /* - When uloc_toUnicodeLocaleType(key, buf) returns the - input value as is, the value is well-formed, but has - no known mapping. This implementation normalizes the - value to lower case - */ - icu::CharString* extBuf = extBufPool.create(); - if (extBuf == nullptr) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue)); - int32_t resultCapacity; - char* pExtBuf = extBuf->getAppendBuffer( - /*minCapacity=*/bcpValueLen, - /*desiredCapacityHint=*/bcpValueLen, - resultCapacity, - tmpStatus); - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - break; - } - - uprv_strcpy(pExtBuf, bcpValue); - T_CString_toLowerCase(pExtBuf); - - extBuf->append(pExtBuf, bcpValueLen, tmpStatus); - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - break; - } - - bcpValue = extBuf->data(); - } - } else { - if (*key == PRIVATEUSE) { - if (!_isPrivateuseValueSubtags(buf.data(), len)) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - } else { - if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - continue; - } - } - bcpKey = key; - icu::CharString* extBuf = extBufPool.create(); - if (extBuf == nullptr) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - extBuf->append(buf.data(), len, tmpStatus); - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - break; - } - bcpValue = extBuf->data(); - } - - /* create ExtensionListEntry */ - ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (ext == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - ext->key = bcpKey; - ext->value = bcpValue; - - if (!_addExtensionToList(&firstExt, ext, TRUE)) { - uprv_free(ext); - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - - /* Special handling for POSIX variant - add the keywords for POSIX */ - if (hadPosix) { - /* create ExtensionListEntry for POSIX */ - ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (ext == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - ext->key = POSIX_KEY; - ext->value = POSIX_VALUE; - - if (!_addExtensionToList(&firstExt, ext, TRUE)) { - uprv_free(ext); - } - } - - if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) { - UBool startLDMLExtension = FALSE; - for (ext = firstExt; ext; ext = ext->next) { - if (!startLDMLExtension && uprv_strlen(ext->key) > 1) { - /* first LDML u singlton extension */ - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - if (reslen < capacity) { - *(appendAt + reslen) = LDMLEXT; - } - reslen++; - - startLDMLExtension = TRUE; - } - - /* write out the sorted BCP47 attributes, extensions and private use */ - if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) { - /* write the value for the attributes */ - for (attr = firstAttr; attr; attr = attr->next) { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(attr->attribute); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } else { - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(ext->key); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen)); - } - reslen += len; - if (reslen < capacity) { - *(appendAt + reslen) = SEP; - } - reslen++; - len = (int32_t)uprv_strlen(ext->value); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - } -cleanup: - /* clean up */ - ext = firstExt; - while (ext != NULL) { - ExtensionListEntry *tmpExt = ext->next; - uprv_free(ext); - ext = tmpExt; - } - - attr = firstAttr; - while (attr != NULL) { - AttributeListEntry *tmpAttr = attr->next; - char *pValue = (char *)attr->attribute; - uprv_free(pValue); - uprv_free(attr); - attr = tmpAttr; - } - - uenum_close(keywordEnum); - - if (U_FAILURE(*status)) { - return 0; - } - } - - return u_terminateChars(appendAt, capacity, reslen, status); -} - -/** - * Append keywords parsed from LDML extension value - * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional} - * Note: char* buf is used for storing keywords - */ -static void -_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) { - const char *pTag; /* beginning of current subtag */ - const char *pKwds; /* beginning of key-type pairs */ - UBool variantExists = *posixVariant; - - ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */ - ExtensionListEntry *kwd, *nextKwd; - - AttributeListEntry *attrFirst = NULL; /* first attribute */ - AttributeListEntry *attr, *nextAttr; - - int32_t len; - int32_t bufIdx = 0; - - char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - int32_t attrBufIdx = 0; - - /* Reset the posixVariant value */ - *posixVariant = FALSE; - - pTag = ldmlext; - pKwds = NULL; - - /* Iterate through u extension attributes */ - while (*pTag) { - /* locate next separator char */ - for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); - - if (ultag_isUnicodeLocaleKey(pTag, len)) { - pKwds = pTag; - break; - } - - /* add this attribute to the list */ - attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry)); - if (attr == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) { - uprv_memcpy(&attrBuf[attrBufIdx], pTag, len); - attrBuf[attrBufIdx + len] = 0; - attr->attribute = &attrBuf[attrBufIdx]; - attrBufIdx += (len + 1); - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(attr); - goto cleanup; - } - - if (!_addAttributeToList(&attrFirst, attr)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(attr); - goto cleanup; - } - - /* next tag */ - pTag += len; - if (*pTag) { - /* next to the separator */ - pTag++; - } - } - - if (attrFirst) { - /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */ - - if (attrBufIdx > bufSize) { - /* attrBufIdx == <total length of attribute subtag> + 1 */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - kwd->key = LOCALE_ATTRIBUTE_KEY; - kwd->value = buf; - - /* attribute subtags sorted in alphabetical order as type */ - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - - /* buffer size check is done above */ - if (attr != attrFirst) { - *(buf + bufIdx) = SEP; - bufIdx++; - } - - len = static_cast<int32_t>(uprv_strlen(attr->attribute)); - uprv_memcpy(buf + bufIdx, attr->attribute, len); - bufIdx += len; - - attr = nextAttr; - } - *(buf + bufIdx) = 0; - bufIdx++; - - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - uprv_free(kwd); - goto cleanup; - } - - /* once keyword entry is created, delete the attribute list */ - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - uprv_free(attr); - attr = nextAttr; - } - attrFirst = NULL; - } - - if (pKwds) { - const char *pBcpKey = NULL; /* u extenstion key subtag */ - const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */ - int32_t bcpKeyLen = 0; - int32_t bcpTypeLen = 0; - UBool isDone = FALSE; - - pTag = pKwds; - /* BCP47 representation of LDML key/type pairs */ - while (!isDone) { - const char *pNextBcpKey = NULL; - int32_t nextBcpKeyLen = 0; - UBool emitKeyword = FALSE; - - if (*pTag) { - /* locate next separator char */ - for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++); - - if (ultag_isUnicodeLocaleKey(pTag, len)) { - if (pBcpKey) { - emitKeyword = TRUE; - pNextBcpKey = pTag; - nextBcpKeyLen = len; - } else { - pBcpKey = pTag; - bcpKeyLen = len; - } - } else { - U_ASSERT(pBcpKey != NULL); - /* within LDML type subtags */ - if (pBcpType) { - bcpTypeLen += (len + 1); - } else { - pBcpType = pTag; - bcpTypeLen = len; - } - } - - /* next tag */ - pTag += len; - if (*pTag) { - /* next to the separator */ - pTag++; - } - } else { - /* processing last one */ - emitKeyword = TRUE; - isDone = TRUE; - } - - if (emitKeyword) { - const char *pKey = NULL; /* LDML key */ - const char *pType = NULL; /* LDML type */ - - char bcpKeyBuf[9]; /* BCP key length is always 2 for now */ - - U_ASSERT(pBcpKey != NULL); - - if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) { - /* the BCP key is invalid */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen); - bcpKeyBuf[bcpKeyLen] = 0; - - /* u extension key to LDML key */ - pKey = uloc_toLegacyKey(bcpKeyBuf); - if (pKey == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - if (pKey == bcpKeyBuf) { - /* - The key returned by toLegacyKey points to the input buffer. - We normalize the result key to lower case. - */ - T_CString_toLowerCase(bcpKeyBuf); - if (bufSize - bufIdx - 1 >= bcpKeyLen) { - uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen); - pKey = buf + bufIdx; - bufIdx += bcpKeyLen; - *(buf + bufIdx) = 0; - bufIdx++; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - goto cleanup; - } - } - - if (pBcpType) { - char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */ - if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) { - /* the BCP type is too long */ - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - - uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen); - bcpTypeBuf[bcpTypeLen] = 0; - - /* BCP type to locale type */ - pType = uloc_toLegacyType(pKey, bcpTypeBuf); - if (pType == NULL) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - goto cleanup; - } - if (pType == bcpTypeBuf) { - /* - The type returned by toLegacyType points to the input buffer. - We normalize the result type to lower case. - */ - /* normalize to lower case */ - T_CString_toLowerCase(bcpTypeBuf); - if (bufSize - bufIdx - 1 >= bcpTypeLen) { - uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen); - pType = buf + bufIdx; - bufIdx += bcpTypeLen; - *(buf + bufIdx) = 0; - bufIdx++; - } else { - *status = U_BUFFER_OVERFLOW_ERROR; - goto cleanup; - } - } - } else { - /* typeless - default type value is "yes" */ - pType = LOCALE_TYPE_YES; - } - - /* Special handling for u-va-posix, since we want to treat this as a variant, - not as a keyword */ - if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) { - *posixVariant = TRUE; - } else { - /* create an ExtensionListEntry for this keyword */ - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto cleanup; - } - - kwd->key = pKey; - kwd->value = pType; - - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - // duplicate keyword is allowed, Only the first - // is honored. - uprv_free(kwd); - } - } - - pBcpKey = pNextBcpKey; - bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0; - pBcpType = NULL; - bcpTypeLen = 0; - } - } - } - - kwd = kwdFirst; - while (kwd != NULL) { - nextKwd = kwd->next; - _addExtensionToList(appendTo, kwd, FALSE); - kwd = nextKwd; - } - - return; - -cleanup: - attr = attrFirst; - while (attr != NULL) { - nextAttr = attr->next; - uprv_free(attr); - attr = nextAttr; - } - - kwd = kwdFirst; - while (kwd != NULL) { - nextKwd = kwd->next; - uprv_free(kwd); - kwd = nextKwd; - } -} - - -static int32_t -_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) { - int32_t reslen = 0; - int32_t i, n; - int32_t len; - ExtensionListEntry *kwdFirst = NULL; - ExtensionListEntry *kwd; - const char *key, *type; - char *kwdBuf = NULL; - int32_t kwdBufLength = capacity; - UBool posixVariant = FALSE; - - if (U_FAILURE(*status)) { - return 0; - } - - kwdBuf = (char*)uprv_malloc(kwdBufLength); - if (kwdBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return 0; - } - - /* Determine if variants already exists */ - if (ultag_getVariantsSize(langtag)) { - posixVariant = TRUE; - } - - n = ultag_getExtensionsSize(langtag); - - /* resolve locale keywords and reordering keys */ - for (i = 0; i < n; i++) { - key = ultag_getExtensionKey(langtag, i); - type = ultag_getExtensionValue(langtag, i); - if (*key == LDMLEXT) { - _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status); - if (U_FAILURE(*status)) { - break; - } - } else { - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - break; - } - kwd->key = key; - kwd->value = type; - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - uprv_free(kwd); - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } - } - } - - if (U_SUCCESS(*status)) { - type = ultag_getPrivateUse(langtag); - if ((int32_t)uprv_strlen(type) > 0) { - /* add private use as a keyword */ - kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (kwd == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - } else { - kwd->key = PRIVATEUSE_KEY; - kwd->value = type; - if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) { - uprv_free(kwd); - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - } - } - } - - /* If a POSIX variant was in the extensions, write it out before writing the keywords. */ - - if (U_SUCCESS(*status) && posixVariant) { - len = (int32_t) uprv_strlen(_POSIX); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - - if (U_SUCCESS(*status) && kwdFirst != NULL) { - /* write out the sorted keywords */ - UBool firstValue = TRUE; - kwd = kwdFirst; - do { - if (reslen < capacity) { - if (firstValue) { - /* '@' */ - *(appendAt + reslen) = LOCALE_EXT_SEP; - firstValue = FALSE; - } else { - /* ';' */ - *(appendAt + reslen) = LOCALE_KEYWORD_SEP; - } - } - reslen++; - - /* key */ - len = (int32_t)uprv_strlen(kwd->key); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen)); - } - reslen += len; - - /* '=' */ - if (reslen < capacity) { - *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP; - } - reslen++; - - /* type */ - len = (int32_t)uprv_strlen(kwd->value); - if (reslen < capacity) { - uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen)); - } - reslen += len; - - kwd = kwd->next; - } while (kwd); - } - - /* clean up */ - kwd = kwdFirst; - while (kwd != NULL) { - ExtensionListEntry *tmpKwd = kwd->next; - uprv_free(kwd); - kwd = tmpKwd; - } - - uprv_free(kwdBuf); - - if (U_FAILURE(*status)) { - return 0; - } - - return u_terminateChars(appendAt, capacity, reslen, status); -} - -static int32_t -_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) { - (void)hadPosix; - char buf[ULOC_FULLNAME_CAPACITY]; - char tmpAppend[ULOC_FULLNAME_CAPACITY]; - UErrorCode tmpStatus = U_ZERO_ERROR; - int32_t len, i; - int32_t reslen = 0; - - if (U_FAILURE(*status)) { - return 0; - } - - len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus); - if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - } - return 0; - } - - if (len > 0) { - char *p, *pPriv; - UBool bNext = TRUE; - UBool firstValue = TRUE; - UBool writeValue; - - pPriv = NULL; - p = buf; - while (bNext) { - writeValue = FALSE; - if (*p == SEP || *p == LOCALE_SEP || *p == 0) { - if (*p == 0) { - bNext = FALSE; - } else { - *p = 0; /* terminate */ - } - if (pPriv != NULL) { - /* Private use in the canonical format is lowercase in BCP47 */ - for (i = 0; *(pPriv + i) != 0; i++) { - *(pPriv + i) = uprv_tolower(*(pPriv + i)); - } - - /* validate */ - if (_isPrivateuseValueSubtag(pPriv, -1)) { - if (firstValue) { - if (!_isVariantSubtag(pPriv, -1)) { - writeValue = TRUE; - } - } else { - writeValue = TRUE; - } - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - break; - } else { - break; - } - - if (writeValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - if (firstValue) { - if (reslen < capacity) { - tmpAppend[reslen++] = *PRIVATEUSE_KEY; - } - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen)); - } - reslen += len; - - if (reslen < capacity) { - tmpAppend[reslen++] = SEP; - } - - firstValue = FALSE; - } - - len = (int32_t)uprv_strlen(pPriv); - if (reslen < capacity) { - uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen)); - } - reslen += len; - } - } - /* reset private use starting position */ - pPriv = NULL; - } else if (pPriv == NULL) { - pPriv = p; - } - p++; - } - - if (U_FAILURE(*status)) { - return 0; - } - } - - if (U_SUCCESS(*status)) { - len = reslen; - if (reslen < capacity) { - uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen)); - } - } - - u_terminateChars(appendAt, capacity, reslen, status); - - return reslen; -} - -/* -* ------------------------------------------------- -* -* ultag_ functions -* -* ------------------------------------------------- -*/ - -/* Bit flags used by the parser */ -#define LANG 0x0001 -#define EXTL 0x0002 -#define SCRT 0x0004 -#define REGN 0x0008 -#define VART 0x0010 -#define EXTS 0x0020 -#define EXTV 0x0040 -#define PRIV 0x0080 - -/** - * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing - * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ ) - * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above. - */ -#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) -#pragma optimize( "", off ) -#endif - -static ULanguageTag* -ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) { - ULanguageTag *t; - char *tagBuf; - int16_t next; - char *pSubtag, *pNext, *pLastGoodPosition; - int32_t subtagLen; - int32_t extlangIdx; - ExtensionListEntry *pExtension; - char *pExtValueSubtag, *pExtValueSubtagEnd; - int32_t i; - UBool privateuseVar = FALSE; - int32_t grandfatheredLen = 0; - - if (parsedLen != NULL) { - *parsedLen = 0; - } - - if (U_FAILURE(*status)) { - return NULL; - } - - if (tagLen < 0) { - tagLen = (int32_t)uprv_strlen(tag); - } - - /* copy the entire string */ - tagBuf = (char*)uprv_malloc(tagLen + 1); - if (tagBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - uprv_memcpy(tagBuf, tag, tagLen); - *(tagBuf + tagLen) = 0; - - /* create a ULanguageTag */ - t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag)); - if (t == NULL) { - uprv_free(tagBuf); - *status = U_MEMORY_ALLOCATION_ERROR; - return NULL; - } - _initializeULanguageTag(t); - t->buf = tagBuf; - - if (tagLen < MINLEN) { - /* the input tag is too short - return empty ULanguageTag */ - return t; - } - - /* check if the tag is grandfathered */ - for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) { - if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) { - int32_t newTagLength; - - grandfatheredLen = tagLen; /* back up for output parsedLen */ - newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1])); - if (tagLen < newTagLength) { - uprv_free(tagBuf); - tagBuf = (char*)uprv_malloc(newTagLength + 1); - if (tagBuf == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - ultag_close(t); - return NULL; - } - t->buf = tagBuf; - tagLen = newTagLength; - } - uprv_strcpy(t->buf, GRANDFATHERED[i + 1]); - break; - } - } - - size_t parsedLenDelta = 0; - if (grandfatheredLen == 0) { - for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) { - const char* redundantTag = REDUNDANT[i]; - size_t redundantTagLen = uprv_strlen(redundantTag); - // The preferred tag for a redundant tag is always shorter than redundant - // tag. A redundant tag may or may not be followed by other subtags. - // (i.e. "zh-yue" or "zh-yue-u-co-pinyin"). - if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) { - const char* redundantTagEnd = tagBuf + redundantTagLen; - if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) { - const char* preferredTag = REDUNDANT[i + 1]; - size_t preferredTagLen = uprv_strlen(preferredTag); - uprv_strncpy(t->buf, preferredTag, preferredTagLen); - if (*redundantTagEnd == SEP) { - uprv_memmove(tagBuf + preferredTagLen, - redundantTagEnd, - tagLen - redundantTagLen + 1); - } else { - tagBuf[preferredTagLen] = '\0'; - } - // parsedLen should be the length of the input - // before redundantTag is replaced by preferredTag. - // Save the delta to add it back later. - parsedLenDelta = redundantTagLen - preferredTagLen; - break; - } - } - } - } - - /* - * langtag = language - * ["-" script] - * ["-" region] - * *("-" variant) - * *("-" extension) - * ["-" privateuse] - */ - - next = LANG | PRIV; - pNext = pLastGoodPosition = tagBuf; - extlangIdx = 0; - pExtension = NULL; - pExtValueSubtag = NULL; - pExtValueSubtagEnd = NULL; - - while (pNext) { - char *pSep; - - pSubtag = pNext; - - /* locate next separator char */ - pSep = pSubtag; - while (*pSep) { - if (*pSep == SEP) { - break; - } - pSep++; - } - if (*pSep == 0) { - /* last subtag */ - pNext = NULL; - } else { - pNext = pSep + 1; - } - subtagLen = (int32_t)(pSep - pSubtag); - - if (next & LANG) { - if (_isLanguageSubtag(pSubtag, subtagLen)) { - *pSep = 0; /* terminate */ - // TODO: move deprecated language code handling here. - t->language = T_CString_toLowerCase(pSubtag); - - pLastGoodPosition = pSep; - next = SCRT | REGN | VART | EXTS | PRIV; - if (subtagLen <= 3) - next |= EXTL; - continue; - } - } - if (next & EXTL) { - if (_isExtlangSubtag(pSubtag, subtagLen)) { - *pSep = 0; - t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag); - - pLastGoodPosition = pSep; - if (extlangIdx < 3) { - next = EXTL | SCRT | REGN | VART | EXTS | PRIV; - } else { - next = SCRT | REGN | VART | EXTS | PRIV; - } - continue; - } - } - if (next & SCRT) { - if (_isScriptSubtag(pSubtag, subtagLen)) { - char *p = pSubtag; - - *pSep = 0; - - /* to title case */ - *p = uprv_toupper(*p); - p++; - for (; *p; p++) { - *p = uprv_tolower(*p); - } - - t->script = pSubtag; - - pLastGoodPosition = pSep; - next = REGN | VART | EXTS | PRIV; - continue; - } - } - if (next & REGN) { - if (_isRegionSubtag(pSubtag, subtagLen)) { - *pSep = 0; - // TODO: move deprecated region code handling here. - t->region = T_CString_toUpperCase(pSubtag); - - pLastGoodPosition = pSep; - next = VART | EXTS | PRIV; - continue; - } - } - if (next & VART) { - if (_isVariantSubtag(pSubtag, subtagLen) || - (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) { - VariantListEntry *var; - UBool isAdded; - - var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry)); - if (var == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto error; - } - *pSep = 0; - var->variant = T_CString_toUpperCase(pSubtag); - isAdded = _addVariantToList(&(t->variants), var); - if (!isAdded) { - /* duplicated variant entry */ - uprv_free(var); - break; - } - pLastGoodPosition = pSep; - next = VART | EXTS | PRIV; - continue; - } - } - if (next & EXTS) { - if (_isExtensionSingleton(pSubtag, subtagLen)) { - if (pExtension != NULL) { - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - } else { - /* stop parsing here */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - } - - /* create a new extension */ - pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry)); - if (pExtension == NULL) { - *status = U_MEMORY_ALLOCATION_ERROR; - goto error; - } - *pSep = 0; - pExtension->key = T_CString_toLowerCase(pSubtag); - pExtension->value = NULL; /* will be set later */ - - /* - * reset the start and the end location of extension value - * subtags for this extension - */ - pExtValueSubtag = NULL; - pExtValueSubtagEnd = NULL; - - next = EXTV; - continue; - } - } - if (next & EXTV) { - if (_isExtensionSubtag(pSubtag, subtagLen)) { - if (pExtValueSubtag == NULL) { - /* if the start postion of this extension's value is not yet, - this one is the first value subtag */ - pExtValueSubtag = pSubtag; - } - - /* Mark the end of this subtag */ - pExtValueSubtagEnd = pSep; - next = EXTS | EXTV | PRIV; - - continue; - } - } - if (next & PRIV) { - if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) { - char *pPrivuseVal; - - if (pExtension != NULL) { - /* Process the last extension */ - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - pExtension = NULL; - break; - } else { - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - pExtension = NULL; - } else { - /* stop parsing here */ - uprv_free(pExtension); - pExtension = NULL; - break; - } - } - } - - /* The rest of part will be private use value subtags */ - if (pNext == NULL) { - /* empty private use subtag */ - break; - } - /* back up the private use value start position */ - pPrivuseVal = pNext; - - /* validate private use value subtags */ - while (pNext) { - pSubtag = pNext; - pSep = pSubtag; - while (*pSep) { - if (*pSep == SEP) { - break; - } - pSep++; - } - if (*pSep == 0) { - /* last subtag */ - pNext = NULL; - } else { - pNext = pSep + 1; - } - subtagLen = (int32_t)(pSep - pSubtag); - - if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) { - *pSep = 0; - next = VART; - privateuseVar = TRUE; - break; - } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) { - pLastGoodPosition = pSep; - } else { - break; - } - } - - if (next == VART) { - continue; - } - - if (pLastGoodPosition - pPrivuseVal > 0) { - *pLastGoodPosition = 0; - t->privateuse = T_CString_toLowerCase(pPrivuseVal); - } - /* No more subtags, exiting the parse loop */ - break; - } - break; - } - - /* If we fell through here, it means this subtag is illegal - quit parsing */ - break; - } - - if (pExtension != NULL) { - /* Process the last extension */ - if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) { - /* the previous extension is incomplete */ - uprv_free(pExtension); - } else { - /* terminate the previous extension value */ - *pExtValueSubtagEnd = 0; - pExtension->value = T_CString_toLowerCase(pExtValueSubtag); - /* insert the extension to the list */ - if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) { - pLastGoodPosition = pExtValueSubtagEnd; - } else { - uprv_free(pExtension); - } - } - } - - if (parsedLen != NULL) { - *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen : - (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta); - } - - return t; - -error: - ultag_close(t); - return NULL; -} - -/** -* Ticket #12705 - Turn optimization back on. -*/ -#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210)) -#pragma optimize( "", on ) -#endif - -static void -ultag_close(ULanguageTag* langtag) { - - if (langtag == NULL) { - return; - } - - uprv_free(langtag->buf); - - if (langtag->variants) { - VariantListEntry *curVar = langtag->variants; - while (curVar) { - VariantListEntry *nextVar = curVar->next; - uprv_free(curVar); - curVar = nextVar; - } - } - - if (langtag->extensions) { - ExtensionListEntry *curExt = langtag->extensions; - while (curExt) { - ExtensionListEntry *nextExt = curExt->next; - uprv_free(curExt); - curExt = nextExt; - } - } - - uprv_free(langtag); -} - -static const char* -ultag_getLanguage(const ULanguageTag* langtag) { - return langtag->language; -} - -#if 0 -static const char* -ultag_getJDKLanguage(const ULanguageTag* langtag) { - int32_t i; - for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) { - if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) { - return DEPRECATEDLANGS[i + 1]; - } - } - return langtag->language; -} -#endif - -static const char* -ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) { - if (idx >= 0 && idx < MAXEXTLANG) { - return langtag->extlang[idx]; - } - return NULL; -} - -static int32_t -ultag_getExtlangSize(const ULanguageTag* langtag) { - int32_t size = 0; - int32_t i; - for (i = 0; i < MAXEXTLANG; i++) { - if (langtag->extlang[i]) { - size++; - } - } - return size; -} - -static const char* -ultag_getScript(const ULanguageTag* langtag) { - return langtag->script; -} - -static const char* -ultag_getRegion(const ULanguageTag* langtag) { - return langtag->region; -} - -static const char* -ultag_getVariant(const ULanguageTag* langtag, int32_t idx) { - const char *var = NULL; - VariantListEntry *cur = langtag->variants; - int32_t i = 0; - while (cur) { - if (i == idx) { - var = cur->variant; - break; - } - cur = cur->next; - i++; - } - return var; -} - -static int32_t -ultag_getVariantsSize(const ULanguageTag* langtag) { - int32_t size = 0; - VariantListEntry *cur = langtag->variants; - while (TRUE) { - if (cur == NULL) { - break; - } - size++; - cur = cur->next; - } - return size; -} - -static const char* -ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) { - const char *key = NULL; - ExtensionListEntry *cur = langtag->extensions; - int32_t i = 0; - while (cur) { - if (i == idx) { - key = cur->key; - break; - } - cur = cur->next; - i++; - } - return key; -} - -static const char* -ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) { - const char *val = NULL; - ExtensionListEntry *cur = langtag->extensions; - int32_t i = 0; - while (cur) { - if (i == idx) { - val = cur->value; - break; - } - cur = cur->next; - i++; - } - return val; -} - -static int32_t -ultag_getExtensionsSize(const ULanguageTag* langtag) { - int32_t size = 0; - ExtensionListEntry *cur = langtag->extensions; - while (TRUE) { - if (cur == NULL) { - break; - } - size++; - cur = cur->next; - } - return size; -} - -static const char* -ultag_getPrivateUse(const ULanguageTag* langtag) { - return langtag->privateuse; -} - -#if 0 -static const char* -ultag_getGrandfathered(const ULanguageTag* langtag) { - return langtag->grandfathered; -} -#endif - - -/* -* ------------------------------------------------- -* -* Locale/BCP47 conversion APIs, exposed as uloc_* -* -* ------------------------------------------------- -*/ -U_CAPI int32_t U_EXPORT2 -uloc_toLanguageTag(const char* localeID, - char* langtag, - int32_t langtagCapacity, - UBool strict, - UErrorCode* status) { - icu::CharString canonical; - int32_t reslen; - UErrorCode tmpStatus = U_ZERO_ERROR; - UBool hadPosix = FALSE; - const char* pKeywordStart; - - /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */ - int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID)); - if (resultCapacity > 0) { - char* buffer; - - for (;;) { - buffer = canonical.getAppendBuffer( - /*minCapacity=*/resultCapacity, - /*desiredCapacityHint=*/resultCapacity, - resultCapacity, - tmpStatus); - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return 0; - } - - reslen = - uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus); - - if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) { - break; - } - - resultCapacity = reslen; - tmpStatus = U_ZERO_ERROR; - } - - if (U_FAILURE(tmpStatus)) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - return 0; - } - - canonical.append(buffer, reslen, tmpStatus); - if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) { - tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString. - } - - if (U_FAILURE(tmpStatus)) { - *status = tmpStatus; - return 0; - } - } - - reslen = 0; - - /* For handling special case - private use only tag */ - pKeywordStart = locale_getKeywordsStart(canonical.data()); - if (pKeywordStart == canonical.data()) { - UEnumeration *kwdEnum; - int kwdCnt = 0; - UBool done = FALSE; - - kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus); - if (kwdEnum != NULL) { - kwdCnt = uenum_count(kwdEnum, &tmpStatus); - if (kwdCnt == 1) { - const char *key; - int32_t len = 0; - - key = uenum_next(kwdEnum, &len, &tmpStatus); - if (len == 1 && *key == PRIVATEUSE) { - char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY]; - buf[0] = PRIVATEUSE; - buf[1] = SEP; - len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus); - if (U_SUCCESS(tmpStatus)) { - if (_isPrivateuseValueSubtags(&buf[2], len)) { - /* return private use only tag */ - reslen = len + 2; - uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity)); - u_terminateChars(langtag, langtagCapacity, reslen, status); - done = TRUE; - } else if (strict) { - *status = U_ILLEGAL_ARGUMENT_ERROR; - done = TRUE; - } - /* if not strict mode, then "und" will be returned */ - } else { - *status = U_ILLEGAL_ARGUMENT_ERROR; - done = TRUE; - } - } - } - uenum_close(kwdEnum); - if (done) { - return reslen; - } - } - } - - reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status); - reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status); - reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status); - reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status); - - return reslen; -} - - -U_CAPI int32_t U_EXPORT2 -uloc_forLanguageTag(const char* langtag, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* status) { - return ulocimp_forLanguageTag( - langtag, - -1, - localeID, - localeIDCapacity, - parsedLength, - status); -} - - -U_CAPI int32_t U_EXPORT2 -ulocimp_forLanguageTag(const char* langtag, - int32_t tagLen, - char* localeID, - int32_t localeIDCapacity, - int32_t* parsedLength, - UErrorCode* status) { - ULanguageTag *lt; - int32_t reslen = 0; - const char *subtag, *p; - int32_t len; - int32_t i, n; - UBool noRegion = TRUE; - - lt = ultag_parse(langtag, tagLen, parsedLength, status); - if (U_FAILURE(*status)) { - return 0; - } - - /* language */ - subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt); - if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) { - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen)); - } - reslen += len; - } - } - - /* script */ - subtag = ultag_getScript(lt); - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - - /* write out the script in title case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - if (p == subtag) { - *(localeID + reslen) = uprv_toupper(*p); - } else { - *(localeID + reslen) = *p; - } - } - reslen++; - p++; - } - } - - /* region */ - subtag = ultag_getRegion(lt); - len = (int32_t)uprv_strlen(subtag); - if (len > 0) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - /* write out the retion in upper case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = uprv_toupper(*p); - } - reslen++; - p++; - } - noRegion = FALSE; - } - - /* variants */ - n = ultag_getVariantsSize(lt); - if (n > 0) { - if (noRegion) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - } - - for (i = 0; i < n; i++) { - subtag = ultag_getVariant(lt, i); - if (reslen < localeIDCapacity) { - *(localeID + reslen) = LOCALE_SEP; - } - reslen++; - /* write out the variant in upper case */ - p = subtag; - while (*p) { - if (reslen < localeIDCapacity) { - *(localeID + reslen) = uprv_toupper(*p); - } - reslen++; - p++; - } - } - } - - /* keywords */ - n = ultag_getExtensionsSize(lt); - subtag = ultag_getPrivateUse(lt); - if (n > 0 || uprv_strlen(subtag) > 0) { - if (reslen == 0 && n > 0) { - /* need a language */ - if (reslen < localeIDCapacity) { - uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen)); - } - reslen += LANG_UND_LEN; - } - len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status); - reslen += len; - } - - ultag_close(lt); - return u_terminateChars(localeID, localeIDCapacity, reslen, status); -} |