summaryrefslogtreecommitdiff
path: root/deps/node/deps/icu-small/source/common/uloc_tag.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'deps/node/deps/icu-small/source/common/uloc_tag.cpp')
-rw-r--r--deps/node/deps/icu-small/source/common/uloc_tag.cpp2884
1 files changed, 0 insertions, 2884 deletions
diff --git a/deps/node/deps/icu-small/source/common/uloc_tag.cpp b/deps/node/deps/icu-small/source/common/uloc_tag.cpp
deleted file mode 100644
index 8120331c..00000000
--- a/deps/node/deps/icu-small/source/common/uloc_tag.cpp
+++ /dev/null
@@ -1,2884 +0,0 @@
-// © 2016 and later: Unicode, Inc. and others.
-// License & terms of use: http://www.unicode.org/copyright.html
-/*
-**********************************************************************
-* Copyright (C) 2009-2015, International Business Machines
-* Corporation and others. All Rights Reserved.
-**********************************************************************
-*/
-
-#include "unicode/utypes.h"
-#include "unicode/ures.h"
-#include "unicode/putil.h"
-#include "unicode/uloc.h"
-#include "ustr_imp.h"
-#include "charstr.h"
-#include "cmemory.h"
-#include "cstring.h"
-#include "putilimp.h"
-#include "uinvchar.h"
-#include "ulocimp.h"
-#include "uvector.h"
-#include "uassert.h"
-
-
-/* struct holding a single variant */
-typedef struct VariantListEntry {
- const char *variant;
- struct VariantListEntry *next;
-} VariantListEntry;
-
-/* struct holding a single attribute value */
-typedef struct AttributeListEntry {
- const char *attribute;
- struct AttributeListEntry *next;
-} AttributeListEntry;
-
-/* struct holding a single extension */
-typedef struct ExtensionListEntry {
- const char *key;
- const char *value;
- struct ExtensionListEntry *next;
-} ExtensionListEntry;
-
-#define MAXEXTLANG 3
-typedef struct ULanguageTag {
- char *buf; /* holding parsed subtags */
- const char *language;
- const char *extlang[MAXEXTLANG];
- const char *script;
- const char *region;
- VariantListEntry *variants;
- ExtensionListEntry *extensions;
- const char *privateuse;
- const char *grandfathered;
-} ULanguageTag;
-
-#define MINLEN 2
-#define SEP '-'
-#define PRIVATEUSE 'x'
-#define LDMLEXT 'u'
-
-#define LOCALE_SEP '_'
-#define LOCALE_EXT_SEP '@'
-#define LOCALE_KEYWORD_SEP ';'
-#define LOCALE_KEY_TYPE_SEP '='
-
-#define ISALPHA(c) uprv_isASCIILetter(c)
-#define ISNUMERIC(c) ((c)>='0' && (c)<='9')
-
-static const char EMPTY[] = "";
-static const char LANG_UND[] = "und";
-static const char PRIVATEUSE_KEY[] = "x";
-static const char _POSIX[] = "_POSIX";
-static const char POSIX_KEY[] = "va";
-static const char POSIX_VALUE[] = "posix";
-static const char LOCALE_ATTRIBUTE_KEY[] = "attribute";
-static const char PRIVUSE_VARIANT_PREFIX[] = "lvariant";
-static const char LOCALE_TYPE_YES[] = "yes";
-
-#define LANG_UND_LEN 3
-
-/*
- Updated on 2018-09-12 from
- https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
-
- This table has 2 parts. The parts for Grandfathered tags is generated by the
- following scripts from the IANA language tag registry.
-
- curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
- egrep -A 7 'Type: grandfathered' | \
- egrep 'Tag|Prefe' | grep -B1 'Preferred' | grep -v '^--' | \
- awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' |\
- tr 'A-Z' 'a-z'
-
-
- The 2nd part is made of five ICU-specific entries. They're kept for
- the backward compatibility for now, even though there are no preferred
- values. They may have to be removed for the strict BCP 47 compliance.
-
-*/
-static const char* const GRANDFATHERED[] = {
-/* grandfathered preferred */
- "art-lojban", "jbo",
- "en-gb-oed", "en-gb-oxendict",
- "i-ami", "ami",
- "i-bnn", "bnn",
- "i-hak", "hak",
- "i-klingon", "tlh",
- "i-lux", "lb",
- "i-navajo", "nv",
- "i-pwn", "pwn",
- "i-tao", "tao",
- "i-tay", "tay",
- "i-tsu", "tsu",
- "no-bok", "nb",
- "no-nyn", "nn",
- "sgn-be-fr", "sfb",
- "sgn-be-nl", "vgt",
- "sgn-ch-de", "sgg",
- "zh-guoyu", "cmn",
- "zh-hakka", "hak",
- "zh-min-nan", "nan",
- "zh-xiang", "hsn",
-
- // Grandfathered tags with no preferred value in the IANA
- // registry. Kept for now for the backward compatibility
- // because ICU has mapped them this way.
- "cel-gaulish", "xtg-x-cel-gaulish",
- "i-default", "en-x-i-default",
- "i-enochian", "und-x-i-enochian",
- "i-mingo", "see-x-i-mingo",
- "zh-min", "nan-x-zh-min",
-};
-
-/*
- Updated on 2018-09-12 from
- https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
-
- The table lists redundant tags with preferred value in the IANA languate tag registry.
- It's generated with the following command:
-
- curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry |\
- grep 'Type: redundant' -A 5 | egrep '^(Tag:|Prefer)' | grep -B1 'Preferred' | \
- awk -n '/Tag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}' | \
- tr 'A-Z' 'a-z'
-
- In addition, ja-latn-hepburn-heploc is mapped to ja-latn-alalc97 because
- a variant tag 'hepburn-heploc' has the preferred subtag, 'alaic97'.
-*/
-
-static const char* const REDUNDANT[] = {
-// redundant preferred
- "sgn-br", "bzs",
- "sgn-co", "csn",
- "sgn-de", "gsg",
- "sgn-dk", "dsl",
- "sgn-es", "ssp",
- "sgn-fr", "fsl",
- "sgn-gb", "bfi",
- "sgn-gr", "gss",
- "sgn-ie", "isg",
- "sgn-it", "ise",
- "sgn-jp", "jsl",
- "sgn-mx", "mfs",
- "sgn-ni", "ncs",
- "sgn-nl", "dse",
- "sgn-no", "nsl",
- "sgn-pt", "psr",
- "sgn-se", "swl",
- "sgn-us", "ase",
- "sgn-za", "sfs",
- "zh-cmn", "cmn",
- "zh-cmn-hans", "cmn-hans",
- "zh-cmn-hant", "cmn-hant",
- "zh-gan", "gan",
- "zh-wuu", "wuu",
- "zh-yue", "yue",
-
- // variant tag with preferred value
- "ja-latn-hepburn-heploc", "ja-latn-alalc97",
-};
-
-/*
- Updated on 2018-09-12 from
- https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry .
-
- grep 'Type: language' -A 7 language-subtag-registry | egrep 'Subtag|Prefe' | \
- grep -B1 'Preferred' | grep -v '^--' | \
- awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
-
- Make sure that 2-letter language subtags come before 3-letter subtags.
-*/
-static const char DEPRECATEDLANGS[][4] = {
-/* deprecated new */
- "in", "id",
- "iw", "he",
- "ji", "yi",
- "jw", "jv",
- "mo", "ro",
- "aam", "aas",
- "adp", "dz",
- "aue", "ktz",
- "ayx", "nun",
- "bgm", "bcg",
- "bjd", "drl",
- "ccq", "rki",
- "cjr", "mom",
- "cka", "cmr",
- "cmk", "xch",
- "coy", "pij",
- "cqu", "quh",
- "drh", "khk",
- "drw", "prs",
- "gav", "dev",
- "gfx", "vaj",
- "ggn", "gvr",
- "gti", "nyc",
- "guv", "duz",
- "hrr", "jal",
- "ibi", "opa",
- "ilw", "gal",
- "jeg", "oyb",
- "kgc", "tdf",
- "kgh", "kml",
- "koj", "kwv",
- "krm", "bmf",
- "ktr", "dtp",
- "kvs", "gdj",
- "kwq", "yam",
- "kxe", "tvd",
- "kzj", "dtp",
- "kzt", "dtp",
- "lii", "raq",
- "lmm", "rmx",
- "meg", "cir",
- "mst", "mry",
- "mwj", "vaj",
- "myt", "mry",
- "nad", "xny",
- "ncp", "kdz",
- "nnx", "ngv",
- "nts", "pij",
- "oun", "vaj",
- "pcr", "adx",
- "pmc", "huw",
- "pmu", "phr",
- "ppa", "bfy",
- "ppr", "lcq",
- "pry", "prt",
- "puz", "pub",
- "sca", "hle",
- "skk", "oyb",
- "tdu", "dtp",
- "thc", "tpo",
- "thx", "oyb",
- "tie", "ras",
- "tkk", "twm",
- "tlw", "weo",
- "tmp", "tyj",
- "tne", "kak",
- "tnf", "prs",
- "tsf", "taj",
- "uok", "ema",
- "xba", "cax",
- "xia", "acn",
- "xkh", "waw",
- "xsj", "suj",
- "ybd", "rki",
- "yma", "lrr",
- "ymt", "mtm",
- "yos", "zom",
- "yuu", "yug",
-};
-
-/*
- Updated on 2018-04-24 from
-
- curl https://www.iana.org/assignments/language-subtag-registry/language-subtag-registry | \
- grep 'Type: region' -A 7 | egrep 'Subtag|Prefe' | \
- grep -B1 'Preferred' | \
- awk -n '/Subtag/ {printf(" \"%s\", ", $2);} /Preferred/ {printf("\"%s\",\n", $2);}'
-*/
-static const char DEPRECATEDREGIONS[][3] = {
-/* deprecated new */
- "BU", "MM",
- "DD", "DE",
- "FX", "FR",
- "TP", "TL",
- "YD", "YE",
- "ZR", "CD",
-};
-
-/*
-* -------------------------------------------------
-*
-* These ultag_ functions may be exposed as APIs later
-*
-* -------------------------------------------------
-*/
-
-static ULanguageTag*
-ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status);
-
-static void
-ultag_close(ULanguageTag* langtag);
-
-static const char*
-ultag_getLanguage(const ULanguageTag* langtag);
-
-#if 0
-static const char*
-ultag_getJDKLanguage(const ULanguageTag* langtag);
-#endif
-
-static const char*
-ultag_getExtlang(const ULanguageTag* langtag, int32_t idx);
-
-static int32_t
-ultag_getExtlangSize(const ULanguageTag* langtag);
-
-static const char*
-ultag_getScript(const ULanguageTag* langtag);
-
-static const char*
-ultag_getRegion(const ULanguageTag* langtag);
-
-static const char*
-ultag_getVariant(const ULanguageTag* langtag, int32_t idx);
-
-static int32_t
-ultag_getVariantsSize(const ULanguageTag* langtag);
-
-static const char*
-ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx);
-
-static const char*
-ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx);
-
-static int32_t
-ultag_getExtensionsSize(const ULanguageTag* langtag);
-
-static const char*
-ultag_getPrivateUse(const ULanguageTag* langtag);
-
-#if 0
-static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag);
-#endif
-
-namespace {
-
-// Helper class to memory manage CharString objects.
-// Only ever stack-allocated, does not need to inherit UMemory.
-class CharStringPool {
-public:
- CharStringPool() : status(U_ZERO_ERROR), pool(&deleter, nullptr, status) {}
- ~CharStringPool() = default;
-
- CharStringPool(const CharStringPool&) = delete;
- CharStringPool& operator=(const CharStringPool&) = delete;
-
- icu::CharString* create() {
- if (U_FAILURE(status)) {
- return nullptr;
- }
- icu::CharString* const obj = new icu::CharString;
- if (obj == nullptr) {
- status = U_MEMORY_ALLOCATION_ERROR;
- return nullptr;
- }
- pool.addElement(obj, status);
- if (U_FAILURE(status)) {
- delete obj;
- return nullptr;
- }
- return obj;
- }
-
-private:
- static void U_CALLCONV deleter(void* obj) {
- delete static_cast<icu::CharString*>(obj);
- }
-
- UErrorCode status;
- icu::UVector pool;
-};
-
-} // namespace
-
-/*
-* -------------------------------------------------
-*
-* Language subtag syntax validation functions
-*
-* -------------------------------------------------
-*/
-
-static UBool
-_isAlphaString(const char* s, int32_t len) {
- int32_t i;
- for (i = 0; i < len; i++) {
- if (!ISALPHA(*(s + i))) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-static UBool
-_isNumericString(const char* s, int32_t len) {
- int32_t i;
- for (i = 0; i < len; i++) {
- if (!ISNUMERIC(*(s + i))) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-static UBool
-_isAlphaNumericString(const char* s, int32_t len) {
- int32_t i;
- for (i = 0; i < len; i++) {
- if (!ISALPHA(*(s + i)) && !ISNUMERIC(*(s + i))) {
- return FALSE;
- }
- }
- return TRUE;
-}
-
-static UBool
-_isLanguageSubtag(const char* s, int32_t len) {
- /*
- * language = 2*3ALPHA ; shortest ISO 639 code
- * ["-" extlang] ; sometimes followed by
- * ; extended language subtags
- * / 4ALPHA ; or reserved for future use
- * / 5*8ALPHA ; or registered language subtag
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 2 && len <= 8 && _isAlphaString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isExtlangSubtag(const char* s, int32_t len) {
- /*
- * extlang = 3ALPHA ; selected ISO 639 codes
- * *2("-" 3ALPHA) ; permanently reserved
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 3 && _isAlphaString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isScriptSubtag(const char* s, int32_t len) {
- /*
- * script = 4ALPHA ; ISO 15924 code
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 4 && _isAlphaString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isRegionSubtag(const char* s, int32_t len) {
- /*
- * region = 2ALPHA ; ISO 3166-1 code
- * / 3DIGIT ; UN M.49 code
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 2 && _isAlphaString(s, len)) {
- return TRUE;
- }
- if (len == 3 && _isNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isVariantSubtag(const char* s, int32_t len) {
- /*
- * variant = 5*8alphanum ; registered variants
- * / (DIGIT 3alphanum)
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 5 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- if (len == 4 && ISNUMERIC(*s) && _isAlphaNumericString(s + 1, 3)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isPrivateuseVariantSubtag(const char* s, int32_t len) {
- /*
- * variant = 1*8alphanum ; registered variants
- * / (DIGIT 3alphanum)
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isExtensionSingleton(const char* s, int32_t len) {
- /*
- * extension = singleton 1*("-" (2*8alphanum))
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 1 && ISALPHA(*s) && (uprv_tolower(*s) != PRIVATEUSE)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isExtensionSubtag(const char* s, int32_t len) {
- /*
- * extension = singleton 1*("-" (2*8alphanum))
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 2 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isExtensionSubtags(const char* s, int32_t len) {
- const char *p = s;
- const char *pSubtag = NULL;
-
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
-
- while ((p - s) < len) {
- if (*p == SEP) {
- if (pSubtag == NULL) {
- return FALSE;
- }
- if (!_isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag))) {
- return FALSE;
- }
- pSubtag = NULL;
- } else if (pSubtag == NULL) {
- pSubtag = p;
- }
- p++;
- }
- if (pSubtag == NULL) {
- return FALSE;
- }
- return _isExtensionSubtag(pSubtag, (int32_t)(p - pSubtag));
-}
-
-static UBool
-_isPrivateuseValueSubtag(const char* s, int32_t len) {
- /*
- * privateuse = "x" 1*("-" (1*8alphanum))
- */
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len >= 1 && len <= 8 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-static UBool
-_isPrivateuseValueSubtags(const char* s, int32_t len) {
- const char *p = s;
- const char *pSubtag = NULL;
-
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
-
- while ((p - s) < len) {
- if (*p == SEP) {
- if (pSubtag == NULL) {
- return FALSE;
- }
- if (!_isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag))) {
- return FALSE;
- }
- pSubtag = NULL;
- } else if (pSubtag == NULL) {
- pSubtag = p;
- }
- p++;
- }
- if (pSubtag == NULL) {
- return FALSE;
- }
- return _isPrivateuseValueSubtag(pSubtag, (int32_t)(p - pSubtag));
-}
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleKey(const char* s, int32_t len) {
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
- if (len == 2 && _isAlphaNumericString(s, len)) {
- return TRUE;
- }
- return FALSE;
-}
-
-U_CFUNC UBool
-ultag_isUnicodeLocaleType(const char*s, int32_t len) {
- const char* p;
- int32_t subtagLen = 0;
-
- if (len < 0) {
- len = (int32_t)uprv_strlen(s);
- }
-
- for (p = s; len > 0; p++, len--) {
- if (*p == SEP) {
- if (subtagLen < 3) {
- return FALSE;
- }
- subtagLen = 0;
- } else if (ISALPHA(*p) || ISNUMERIC(*p)) {
- subtagLen++;
- if (subtagLen > 8) {
- return FALSE;
- }
- } else {
- return FALSE;
- }
- }
-
- return (subtagLen >= 3);
-}
-/*
-* -------------------------------------------------
-*
-* Helper functions
-*
-* -------------------------------------------------
-*/
-
-static UBool
-_addVariantToList(VariantListEntry **first, VariantListEntry *var) {
- UBool bAdded = TRUE;
-
- if (*first == NULL) {
- var->next = NULL;
- *first = var;
- } else {
- VariantListEntry *prev, *cur;
- int32_t cmp;
-
- /* variants order should be preserved */
- prev = NULL;
- cur = *first;
- while (TRUE) {
- if (cur == NULL) {
- prev->next = var;
- var->next = NULL;
- break;
- }
-
- /* Checking for duplicate variant */
- cmp = uprv_compareInvCharsAsAscii(var->variant, cur->variant);
- if (cmp == 0) {
- /* duplicated variant */
- bAdded = FALSE;
- break;
- }
- prev = cur;
- cur = cur->next;
- }
- }
-
- return bAdded;
-}
-
-static UBool
-_addAttributeToList(AttributeListEntry **first, AttributeListEntry *attr) {
- UBool bAdded = TRUE;
-
- if (*first == NULL) {
- attr->next = NULL;
- *first = attr;
- } else {
- AttributeListEntry *prev, *cur;
- int32_t cmp;
-
- /* reorder variants in alphabetical order */
- prev = NULL;
- cur = *first;
- while (TRUE) {
- if (cur == NULL) {
- prev->next = attr;
- attr->next = NULL;
- break;
- }
- cmp = uprv_compareInvCharsAsAscii(attr->attribute, cur->attribute);
- if (cmp < 0) {
- if (prev == NULL) {
- *first = attr;
- } else {
- prev->next = attr;
- }
- attr->next = cur;
- break;
- }
- if (cmp == 0) {
- /* duplicated variant */
- bAdded = FALSE;
- break;
- }
- prev = cur;
- cur = cur->next;
- }
- }
-
- return bAdded;
-}
-
-
-static UBool
-_addExtensionToList(ExtensionListEntry **first, ExtensionListEntry *ext, UBool localeToBCP) {
- UBool bAdded = TRUE;
-
- if (*first == NULL) {
- ext->next = NULL;
- *first = ext;
- } else {
- ExtensionListEntry *prev, *cur;
- int32_t cmp;
-
- /* reorder variants in alphabetical order */
- prev = NULL;
- cur = *first;
- while (TRUE) {
- if (cur == NULL) {
- prev->next = ext;
- ext->next = NULL;
- break;
- }
- if (localeToBCP) {
- /* special handling for locale to bcp conversion */
- int32_t len, curlen;
-
- len = (int32_t)uprv_strlen(ext->key);
- curlen = (int32_t)uprv_strlen(cur->key);
-
- if (len == 1 && curlen == 1) {
- if (*(ext->key) == *(cur->key)) {
- cmp = 0;
- } else if (*(ext->key) == PRIVATEUSE) {
- cmp = 1;
- } else if (*(cur->key) == PRIVATEUSE) {
- cmp = -1;
- } else {
- cmp = *(ext->key) - *(cur->key);
- }
- } else if (len == 1) {
- cmp = *(ext->key) - LDMLEXT;
- } else if (curlen == 1) {
- cmp = LDMLEXT - *(cur->key);
- } else {
- cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
- /* Both are u extension keys - we need special handling for 'attribute' */
- if (cmp != 0) {
- if (uprv_strcmp(cur->key, LOCALE_ATTRIBUTE_KEY) == 0) {
- cmp = 1;
- } else if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
- cmp = -1;
- }
- }
- }
- } else {
- cmp = uprv_compareInvCharsAsAscii(ext->key, cur->key);
- }
- if (cmp < 0) {
- if (prev == NULL) {
- *first = ext;
- } else {
- prev->next = ext;
- }
- ext->next = cur;
- break;
- }
- if (cmp == 0) {
- /* duplicated extension key */
- bAdded = FALSE;
- break;
- }
- prev = cur;
- cur = cur->next;
- }
- }
-
- return bAdded;
-}
-
-static void
-_initializeULanguageTag(ULanguageTag* langtag) {
- int32_t i;
-
- langtag->buf = NULL;
-
- langtag->language = EMPTY;
- for (i = 0; i < MAXEXTLANG; i++) {
- langtag->extlang[i] = NULL;
- }
-
- langtag->script = EMPTY;
- langtag->region = EMPTY;
-
- langtag->variants = NULL;
- langtag->extensions = NULL;
-
- langtag->grandfathered = EMPTY;
- langtag->privateuse = EMPTY;
-}
-
-static int32_t
-_appendLanguageToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
- char buf[ULOC_LANG_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len, i;
- int32_t reslen = 0;
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- len = uloc_getLanguage(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- len = 0;
- }
-
- /* Note: returned language code is in lower case letters */
-
- if (len == 0) {
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
- }
- reslen += LANG_UND_LEN;
- } else if (!_isLanguageSubtag(buf, len)) {
- /* invalid language code */
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, LANG_UND, uprv_min(LANG_UND_LEN, capacity - reslen));
- }
- reslen += LANG_UND_LEN;
- } else {
- /* resolve deprecated */
- for (i = 0; i < UPRV_LENGTHOF(DEPRECATEDLANGS); i += 2) {
- // 2-letter deprecated subtags are listede before 3-letter
- // ones in DEPRECATEDLANGS[]. Get out of loop on coming
- // across the 1st 3-letter subtag, if the input is a 2-letter code.
- // to avoid continuing to try when there's no match.
- if (uprv_strlen(buf) < uprv_strlen(DEPRECATEDLANGS[i])) break;
- if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDLANGS[i]) == 0) {
- uprv_strcpy(buf, DEPRECATEDLANGS[i + 1]);
- len = (int32_t)uprv_strlen(buf);
- break;
- }
- }
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
- u_terminateChars(appendAt, capacity, reslen, status);
- return reslen;
-}
-
-static int32_t
-_appendScriptToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
- char buf[ULOC_SCRIPT_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len;
- int32_t reslen = 0;
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- len = uloc_getScript(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return 0;
- }
-
- if (len > 0) {
- if (!_isScriptSubtag(buf, len)) {
- /* invalid script code */
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return 0;
- } else {
- if (reslen < capacity) {
- *(appendAt + reslen) = SEP;
- }
- reslen++;
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
- }
- u_terminateChars(appendAt, capacity, reslen, status);
- return reslen;
-}
-
-static int32_t
-_appendRegionToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UErrorCode* status) {
- char buf[ULOC_COUNTRY_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len;
- int32_t reslen = 0;
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- len = uloc_getCountry(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return 0;
- }
-
- if (len > 0) {
- if (!_isRegionSubtag(buf, len)) {
- /* invalid region code */
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return 0;
- } else {
- if (reslen < capacity) {
- *(appendAt + reslen) = SEP;
- }
- reslen++;
- /* resolve deprecated */
- for (int i = 0; i < UPRV_LENGTHOF(DEPRECATEDREGIONS); i += 2) {
- if (uprv_compareInvCharsAsAscii(buf, DEPRECATEDREGIONS[i]) == 0) {
- uprv_strcpy(buf, DEPRECATEDREGIONS[i + 1]);
- len = (int32_t)uprv_strlen(buf);
- break;
- }
- }
-
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, buf, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
- }
- u_terminateChars(appendAt, capacity, reslen, status);
- return reslen;
-}
-
-static int32_t
-_appendVariantsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool *hadPosix, UErrorCode* status) {
- char buf[ULOC_FULLNAME_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len, i;
- int32_t reslen = 0;
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return 0;
- }
-
- if (len > 0) {
- char *p, *pVar;
- UBool bNext = TRUE;
- VariantListEntry *var;
- VariantListEntry *varFirst = NULL;
-
- pVar = NULL;
- p = buf;
- while (bNext) {
- if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
- if (*p == 0) {
- bNext = FALSE;
- } else {
- *p = 0; /* terminate */
- }
- if (pVar == NULL) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- /* ignore empty variant */
- } else {
- /* ICU uses upper case letters for variants, but
- the canonical format is lowercase in BCP47 */
- for (i = 0; *(pVar + i) != 0; i++) {
- *(pVar + i) = uprv_tolower(*(pVar + i));
- }
-
- /* validate */
- if (_isVariantSubtag(pVar, -1)) {
- if (uprv_strcmp(pVar,POSIX_VALUE) || len != (int32_t)uprv_strlen(POSIX_VALUE)) {
- /* emit the variant to the list */
- var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
- if (var == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- var->variant = pVar;
- if (!_addVariantToList(&varFirst, var)) {
- /* duplicated variant */
- uprv_free(var);
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- } else {
- /* Special handling for POSIX variant, need to remember that we had it and then */
- /* treat it like an extension later. */
- *hadPosix = TRUE;
- }
- } else if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- } else if (_isPrivateuseValueSubtag(pVar, -1)) {
- /* Handle private use subtags separately */
- break;
- }
- }
- /* reset variant starting position */
- pVar = NULL;
- } else if (pVar == NULL) {
- pVar = p;
- }
- p++;
- }
-
- if (U_SUCCESS(*status)) {
- if (varFirst != NULL) {
- int32_t varLen;
-
- /* write out validated/normalized variants to the target */
- var = varFirst;
- while (var != NULL) {
- if (reslen < capacity) {
- *(appendAt + reslen) = SEP;
- }
- reslen++;
- varLen = (int32_t)uprv_strlen(var->variant);
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, var->variant, uprv_min(varLen, capacity - reslen));
- }
- reslen += varLen;
- var = var->next;
- }
- }
- }
-
- /* clean up */
- var = varFirst;
- while (var != NULL) {
- VariantListEntry *tmpVar = var->next;
- uprv_free(var);
- var = tmpVar;
- }
-
- if (U_FAILURE(*status)) {
- return 0;
- }
- }
-
- u_terminateChars(appendAt, capacity, reslen, status);
- return reslen;
-}
-
-static int32_t
-_appendKeywordsToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
- char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY] = { 0 };
- int32_t attrBufLength = 0;
- UEnumeration *keywordEnum = NULL;
- int32_t reslen = 0;
-
- keywordEnum = uloc_openKeywords(localeID, status);
- if (U_FAILURE(*status) && !hadPosix) {
- uenum_close(keywordEnum);
- return 0;
- }
- if (keywordEnum != NULL || hadPosix) {
- /* reorder extensions */
- int32_t len;
- const char *key;
- ExtensionListEntry *firstExt = NULL;
- ExtensionListEntry *ext;
- AttributeListEntry *firstAttr = NULL;
- AttributeListEntry *attr;
- char *attrValue;
- CharStringPool extBufPool;
- const char *bcpKey=nullptr, *bcpValue=nullptr;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t keylen;
- UBool isBcpUExt;
-
- while (TRUE) {
- icu::CharString buf;
- key = uenum_next(keywordEnum, NULL, status);
- if (key == NULL) {
- break;
- }
- char* buffer;
- int32_t resultCapacity = ULOC_KEYWORD_AND_VALUES_CAPACITY;
-
- for (;;) {
- buffer = buf.getAppendBuffer(
- /*minCapacity=*/resultCapacity,
- /*desiredCapacityHint=*/resultCapacity,
- resultCapacity,
- tmpStatus);
-
- if (U_FAILURE(tmpStatus)) {
- break;
- }
-
- len = uloc_getKeywordValue(
- localeID, key, buffer, resultCapacity, &tmpStatus);
-
- if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
- break;
- }
-
- resultCapacity = len;
- tmpStatus = U_ZERO_ERROR;
- }
-
- if (U_FAILURE(tmpStatus)) {
- if (tmpStatus == U_MEMORY_ALLOCATION_ERROR) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- /* ignore this keyword */
- tmpStatus = U_ZERO_ERROR;
- continue;
- }
-
- buf.append(buffer, len, tmpStatus);
- if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
- }
-
- keylen = (int32_t)uprv_strlen(key);
- isBcpUExt = (keylen > 1);
-
- /* special keyword used for representing Unicode locale attributes */
- if (uprv_strcmp(key, LOCALE_ATTRIBUTE_KEY) == 0) {
- if (len > 0) {
- int32_t i = 0;
- while (TRUE) {
- attrBufLength = 0;
- for (; i < len; i++) {
- if (buf[i] != '-') {
- attrBuf[attrBufLength++] = buf[i];
- } else {
- i++;
- break;
- }
- }
- if (attrBufLength > 0) {
- attrBuf[attrBufLength] = 0;
-
- } else if (i >= len){
- break;
- }
-
- /* create AttributeListEntry */
- attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
- if (attr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- attrValue = (char*)uprv_malloc(attrBufLength + 1);
- if (attrValue == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- uprv_strcpy(attrValue, attrBuf);
- attr->attribute = attrValue;
-
- if (!_addAttributeToList(&firstAttr, attr)) {
- uprv_free(attr);
- uprv_free(attrValue);
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- }
- /* for a place holder ExtensionListEntry */
- bcpKey = LOCALE_ATTRIBUTE_KEY;
- bcpValue = NULL;
- }
- } else if (isBcpUExt) {
- bcpKey = uloc_toUnicodeLocaleKey(key);
- if (bcpKey == NULL) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
-
- /* we've checked buf is null-terminated above */
- bcpValue = uloc_toUnicodeLocaleType(key, buf.data());
- if (bcpValue == NULL) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
- if (bcpValue == buf.data()) {
- /*
- When uloc_toUnicodeLocaleType(key, buf) returns the
- input value as is, the value is well-formed, but has
- no known mapping. This implementation normalizes the
- value to lower case
- */
- icu::CharString* extBuf = extBufPool.create();
- if (extBuf == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- int32_t bcpValueLen = static_cast<int32_t>(uprv_strlen(bcpValue));
- int32_t resultCapacity;
- char* pExtBuf = extBuf->getAppendBuffer(
- /*minCapacity=*/bcpValueLen,
- /*desiredCapacityHint=*/bcpValueLen,
- resultCapacity,
- tmpStatus);
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- break;
- }
-
- uprv_strcpy(pExtBuf, bcpValue);
- T_CString_toLowerCase(pExtBuf);
-
- extBuf->append(pExtBuf, bcpValueLen, tmpStatus);
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- break;
- }
-
- bcpValue = extBuf->data();
- }
- } else {
- if (*key == PRIVATEUSE) {
- if (!_isPrivateuseValueSubtags(buf.data(), len)) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
- } else {
- if (!_isExtensionSingleton(key, keylen) || !_isExtensionSubtags(buf.data(), len)) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- continue;
- }
- }
- bcpKey = key;
- icu::CharString* extBuf = extBufPool.create();
- if (extBuf == nullptr) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- extBuf->append(buf.data(), len, tmpStatus);
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- break;
- }
- bcpValue = extBuf->data();
- }
-
- /* create ExtensionListEntry */
- ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (ext == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- ext->key = bcpKey;
- ext->value = bcpValue;
-
- if (!_addExtensionToList(&firstExt, ext, TRUE)) {
- uprv_free(ext);
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- }
-
- /* Special handling for POSIX variant - add the keywords for POSIX */
- if (hadPosix) {
- /* create ExtensionListEntry for POSIX */
- ext = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (ext == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto cleanup;
- }
- ext->key = POSIX_KEY;
- ext->value = POSIX_VALUE;
-
- if (!_addExtensionToList(&firstExt, ext, TRUE)) {
- uprv_free(ext);
- }
- }
-
- if (U_SUCCESS(*status) && (firstExt != NULL || firstAttr != NULL)) {
- UBool startLDMLExtension = FALSE;
- for (ext = firstExt; ext; ext = ext->next) {
- if (!startLDMLExtension && uprv_strlen(ext->key) > 1) {
- /* first LDML u singlton extension */
- if (reslen < capacity) {
- *(appendAt + reslen) = SEP;
- }
- reslen++;
- if (reslen < capacity) {
- *(appendAt + reslen) = LDMLEXT;
- }
- reslen++;
-
- startLDMLExtension = TRUE;
- }
-
- /* write out the sorted BCP47 attributes, extensions and private use */
- if (uprv_strcmp(ext->key, LOCALE_ATTRIBUTE_KEY) == 0) {
- /* write the value for the attributes */
- for (attr = firstAttr; attr; attr = attr->next) {
- if (reslen < capacity) {
- *(appendAt + reslen) = SEP;
- }
- reslen++;
- len = (int32_t)uprv_strlen(attr->attribute);
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, attr->attribute, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
- } else {
- if (reslen < capacity) {
- *(appendAt + reslen) = SEP;
- }
- reslen++;
- len = (int32_t)uprv_strlen(ext->key);
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, ext->key, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- if (reslen < capacity) {
- *(appendAt + reslen) = SEP;
- }
- reslen++;
- len = (int32_t)uprv_strlen(ext->value);
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, ext->value, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
- }
- }
-cleanup:
- /* clean up */
- ext = firstExt;
- while (ext != NULL) {
- ExtensionListEntry *tmpExt = ext->next;
- uprv_free(ext);
- ext = tmpExt;
- }
-
- attr = firstAttr;
- while (attr != NULL) {
- AttributeListEntry *tmpAttr = attr->next;
- char *pValue = (char *)attr->attribute;
- uprv_free(pValue);
- uprv_free(attr);
- attr = tmpAttr;
- }
-
- uenum_close(keywordEnum);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
- }
-
- return u_terminateChars(appendAt, capacity, reslen, status);
-}
-
-/**
- * Append keywords parsed from LDML extension value
- * e.g. "u-ca-gregory-co-trad" -> {calendar = gregorian} {collation = traditional}
- * Note: char* buf is used for storing keywords
- */
-static void
-_appendLDMLExtensionAsKeywords(const char* ldmlext, ExtensionListEntry** appendTo, char* buf, int32_t bufSize, UBool *posixVariant, UErrorCode *status) {
- const char *pTag; /* beginning of current subtag */
- const char *pKwds; /* beginning of key-type pairs */
- UBool variantExists = *posixVariant;
-
- ExtensionListEntry *kwdFirst = NULL; /* first LDML keyword */
- ExtensionListEntry *kwd, *nextKwd;
-
- AttributeListEntry *attrFirst = NULL; /* first attribute */
- AttributeListEntry *attr, *nextAttr;
-
- int32_t len;
- int32_t bufIdx = 0;
-
- char attrBuf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
- int32_t attrBufIdx = 0;
-
- /* Reset the posixVariant value */
- *posixVariant = FALSE;
-
- pTag = ldmlext;
- pKwds = NULL;
-
- /* Iterate through u extension attributes */
- while (*pTag) {
- /* locate next separator char */
- for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
-
- if (ultag_isUnicodeLocaleKey(pTag, len)) {
- pKwds = pTag;
- break;
- }
-
- /* add this attribute to the list */
- attr = (AttributeListEntry*)uprv_malloc(sizeof(AttributeListEntry));
- if (attr == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto cleanup;
- }
-
- if (len < (int32_t)sizeof(attrBuf) - attrBufIdx) {
- uprv_memcpy(&attrBuf[attrBufIdx], pTag, len);
- attrBuf[attrBufIdx + len] = 0;
- attr->attribute = &attrBuf[attrBufIdx];
- attrBufIdx += (len + 1);
- } else {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- uprv_free(attr);
- goto cleanup;
- }
-
- if (!_addAttributeToList(&attrFirst, attr)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- uprv_free(attr);
- goto cleanup;
- }
-
- /* next tag */
- pTag += len;
- if (*pTag) {
- /* next to the separator */
- pTag++;
- }
- }
-
- if (attrFirst) {
- /* emit attributes as an LDML keyword, e.g. attribute=attr1-attr2 */
-
- if (attrBufIdx > bufSize) {
- /* attrBufIdx == <total length of attribute subtag> + 1 */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- goto cleanup;
- }
-
- kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto cleanup;
- }
-
- kwd->key = LOCALE_ATTRIBUTE_KEY;
- kwd->value = buf;
-
- /* attribute subtags sorted in alphabetical order as type */
- attr = attrFirst;
- while (attr != NULL) {
- nextAttr = attr->next;
-
- /* buffer size check is done above */
- if (attr != attrFirst) {
- *(buf + bufIdx) = SEP;
- bufIdx++;
- }
-
- len = static_cast<int32_t>(uprv_strlen(attr->attribute));
- uprv_memcpy(buf + bufIdx, attr->attribute, len);
- bufIdx += len;
-
- attr = nextAttr;
- }
- *(buf + bufIdx) = 0;
- bufIdx++;
-
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- uprv_free(kwd);
- goto cleanup;
- }
-
- /* once keyword entry is created, delete the attribute list */
- attr = attrFirst;
- while (attr != NULL) {
- nextAttr = attr->next;
- uprv_free(attr);
- attr = nextAttr;
- }
- attrFirst = NULL;
- }
-
- if (pKwds) {
- const char *pBcpKey = NULL; /* u extenstion key subtag */
- const char *pBcpType = NULL; /* beginning of u extension type subtag(s) */
- int32_t bcpKeyLen = 0;
- int32_t bcpTypeLen = 0;
- UBool isDone = FALSE;
-
- pTag = pKwds;
- /* BCP47 representation of LDML key/type pairs */
- while (!isDone) {
- const char *pNextBcpKey = NULL;
- int32_t nextBcpKeyLen = 0;
- UBool emitKeyword = FALSE;
-
- if (*pTag) {
- /* locate next separator char */
- for (len = 0; *(pTag + len) && *(pTag + len) != SEP; len++);
-
- if (ultag_isUnicodeLocaleKey(pTag, len)) {
- if (pBcpKey) {
- emitKeyword = TRUE;
- pNextBcpKey = pTag;
- nextBcpKeyLen = len;
- } else {
- pBcpKey = pTag;
- bcpKeyLen = len;
- }
- } else {
- U_ASSERT(pBcpKey != NULL);
- /* within LDML type subtags */
- if (pBcpType) {
- bcpTypeLen += (len + 1);
- } else {
- pBcpType = pTag;
- bcpTypeLen = len;
- }
- }
-
- /* next tag */
- pTag += len;
- if (*pTag) {
- /* next to the separator */
- pTag++;
- }
- } else {
- /* processing last one */
- emitKeyword = TRUE;
- isDone = TRUE;
- }
-
- if (emitKeyword) {
- const char *pKey = NULL; /* LDML key */
- const char *pType = NULL; /* LDML type */
-
- char bcpKeyBuf[9]; /* BCP key length is always 2 for now */
-
- U_ASSERT(pBcpKey != NULL);
-
- if (bcpKeyLen >= (int32_t)sizeof(bcpKeyBuf)) {
- /* the BCP key is invalid */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- goto cleanup;
- }
-
- uprv_strncpy(bcpKeyBuf, pBcpKey, bcpKeyLen);
- bcpKeyBuf[bcpKeyLen] = 0;
-
- /* u extension key to LDML key */
- pKey = uloc_toLegacyKey(bcpKeyBuf);
- if (pKey == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- goto cleanup;
- }
- if (pKey == bcpKeyBuf) {
- /*
- The key returned by toLegacyKey points to the input buffer.
- We normalize the result key to lower case.
- */
- T_CString_toLowerCase(bcpKeyBuf);
- if (bufSize - bufIdx - 1 >= bcpKeyLen) {
- uprv_memcpy(buf + bufIdx, bcpKeyBuf, bcpKeyLen);
- pKey = buf + bufIdx;
- bufIdx += bcpKeyLen;
- *(buf + bufIdx) = 0;
- bufIdx++;
- } else {
- *status = U_BUFFER_OVERFLOW_ERROR;
- goto cleanup;
- }
- }
-
- if (pBcpType) {
- char bcpTypeBuf[128]; /* practically long enough even considering multiple subtag type */
- if (bcpTypeLen >= (int32_t)sizeof(bcpTypeBuf)) {
- /* the BCP type is too long */
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- goto cleanup;
- }
-
- uprv_strncpy(bcpTypeBuf, pBcpType, bcpTypeLen);
- bcpTypeBuf[bcpTypeLen] = 0;
-
- /* BCP type to locale type */
- pType = uloc_toLegacyType(pKey, bcpTypeBuf);
- if (pType == NULL) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- goto cleanup;
- }
- if (pType == bcpTypeBuf) {
- /*
- The type returned by toLegacyType points to the input buffer.
- We normalize the result type to lower case.
- */
- /* normalize to lower case */
- T_CString_toLowerCase(bcpTypeBuf);
- if (bufSize - bufIdx - 1 >= bcpTypeLen) {
- uprv_memcpy(buf + bufIdx, bcpTypeBuf, bcpTypeLen);
- pType = buf + bufIdx;
- bufIdx += bcpTypeLen;
- *(buf + bufIdx) = 0;
- bufIdx++;
- } else {
- *status = U_BUFFER_OVERFLOW_ERROR;
- goto cleanup;
- }
- }
- } else {
- /* typeless - default type value is "yes" */
- pType = LOCALE_TYPE_YES;
- }
-
- /* Special handling for u-va-posix, since we want to treat this as a variant,
- not as a keyword */
- if (!variantExists && !uprv_strcmp(pKey, POSIX_KEY) && !uprv_strcmp(pType, POSIX_VALUE) ) {
- *posixVariant = TRUE;
- } else {
- /* create an ExtensionListEntry for this keyword */
- kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto cleanup;
- }
-
- kwd->key = pKey;
- kwd->value = pType;
-
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- // duplicate keyword is allowed, Only the first
- // is honored.
- uprv_free(kwd);
- }
- }
-
- pBcpKey = pNextBcpKey;
- bcpKeyLen = pNextBcpKey != NULL ? nextBcpKeyLen : 0;
- pBcpType = NULL;
- bcpTypeLen = 0;
- }
- }
- }
-
- kwd = kwdFirst;
- while (kwd != NULL) {
- nextKwd = kwd->next;
- _addExtensionToList(appendTo, kwd, FALSE);
- kwd = nextKwd;
- }
-
- return;
-
-cleanup:
- attr = attrFirst;
- while (attr != NULL) {
- nextAttr = attr->next;
- uprv_free(attr);
- attr = nextAttr;
- }
-
- kwd = kwdFirst;
- while (kwd != NULL) {
- nextKwd = kwd->next;
- uprv_free(kwd);
- kwd = nextKwd;
- }
-}
-
-
-static int32_t
-_appendKeywords(ULanguageTag* langtag, char* appendAt, int32_t capacity, UErrorCode* status) {
- int32_t reslen = 0;
- int32_t i, n;
- int32_t len;
- ExtensionListEntry *kwdFirst = NULL;
- ExtensionListEntry *kwd;
- const char *key, *type;
- char *kwdBuf = NULL;
- int32_t kwdBufLength = capacity;
- UBool posixVariant = FALSE;
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- kwdBuf = (char*)uprv_malloc(kwdBufLength);
- if (kwdBuf == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return 0;
- }
-
- /* Determine if variants already exists */
- if (ultag_getVariantsSize(langtag)) {
- posixVariant = TRUE;
- }
-
- n = ultag_getExtensionsSize(langtag);
-
- /* resolve locale keywords and reordering keys */
- for (i = 0; i < n; i++) {
- key = ultag_getExtensionKey(langtag, i);
- type = ultag_getExtensionValue(langtag, i);
- if (*key == LDMLEXT) {
- _appendLDMLExtensionAsKeywords(type, &kwdFirst, kwdBuf, kwdBufLength, &posixVariant, status);
- if (U_FAILURE(*status)) {
- break;
- }
- } else {
- kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- break;
- }
- kwd->key = key;
- kwd->value = type;
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- uprv_free(kwd);
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- }
- }
- }
-
- if (U_SUCCESS(*status)) {
- type = ultag_getPrivateUse(langtag);
- if ((int32_t)uprv_strlen(type) > 0) {
- /* add private use as a keyword */
- kwd = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (kwd == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- } else {
- kwd->key = PRIVATEUSE_KEY;
- kwd->value = type;
- if (!_addExtensionToList(&kwdFirst, kwd, FALSE)) {
- uprv_free(kwd);
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- }
- }
- }
-
- /* If a POSIX variant was in the extensions, write it out before writing the keywords. */
-
- if (U_SUCCESS(*status) && posixVariant) {
- len = (int32_t) uprv_strlen(_POSIX);
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, _POSIX, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
-
- if (U_SUCCESS(*status) && kwdFirst != NULL) {
- /* write out the sorted keywords */
- UBool firstValue = TRUE;
- kwd = kwdFirst;
- do {
- if (reslen < capacity) {
- if (firstValue) {
- /* '@' */
- *(appendAt + reslen) = LOCALE_EXT_SEP;
- firstValue = FALSE;
- } else {
- /* ';' */
- *(appendAt + reslen) = LOCALE_KEYWORD_SEP;
- }
- }
- reslen++;
-
- /* key */
- len = (int32_t)uprv_strlen(kwd->key);
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, kwd->key, uprv_min(len, capacity - reslen));
- }
- reslen += len;
-
- /* '=' */
- if (reslen < capacity) {
- *(appendAt + reslen) = LOCALE_KEY_TYPE_SEP;
- }
- reslen++;
-
- /* type */
- len = (int32_t)uprv_strlen(kwd->value);
- if (reslen < capacity) {
- uprv_memcpy(appendAt + reslen, kwd->value, uprv_min(len, capacity - reslen));
- }
- reslen += len;
-
- kwd = kwd->next;
- } while (kwd);
- }
-
- /* clean up */
- kwd = kwdFirst;
- while (kwd != NULL) {
- ExtensionListEntry *tmpKwd = kwd->next;
- uprv_free(kwd);
- kwd = tmpKwd;
- }
-
- uprv_free(kwdBuf);
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- return u_terminateChars(appendAt, capacity, reslen, status);
-}
-
-static int32_t
-_appendPrivateuseToLanguageTag(const char* localeID, char* appendAt, int32_t capacity, UBool strict, UBool hadPosix, UErrorCode* status) {
- (void)hadPosix;
- char buf[ULOC_FULLNAME_CAPACITY];
- char tmpAppend[ULOC_FULLNAME_CAPACITY];
- UErrorCode tmpStatus = U_ZERO_ERROR;
- int32_t len, i;
- int32_t reslen = 0;
-
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- len = uloc_getVariant(localeID, buf, sizeof(buf), &tmpStatus);
- if (U_FAILURE(tmpStatus) || tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- }
- return 0;
- }
-
- if (len > 0) {
- char *p, *pPriv;
- UBool bNext = TRUE;
- UBool firstValue = TRUE;
- UBool writeValue;
-
- pPriv = NULL;
- p = buf;
- while (bNext) {
- writeValue = FALSE;
- if (*p == SEP || *p == LOCALE_SEP || *p == 0) {
- if (*p == 0) {
- bNext = FALSE;
- } else {
- *p = 0; /* terminate */
- }
- if (pPriv != NULL) {
- /* Private use in the canonical format is lowercase in BCP47 */
- for (i = 0; *(pPriv + i) != 0; i++) {
- *(pPriv + i) = uprv_tolower(*(pPriv + i));
- }
-
- /* validate */
- if (_isPrivateuseValueSubtag(pPriv, -1)) {
- if (firstValue) {
- if (!_isVariantSubtag(pPriv, -1)) {
- writeValue = TRUE;
- }
- } else {
- writeValue = TRUE;
- }
- } else if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- break;
- } else {
- break;
- }
-
- if (writeValue) {
- if (reslen < capacity) {
- tmpAppend[reslen++] = SEP;
- }
-
- if (firstValue) {
- if (reslen < capacity) {
- tmpAppend[reslen++] = *PRIVATEUSE_KEY;
- }
-
- if (reslen < capacity) {
- tmpAppend[reslen++] = SEP;
- }
-
- len = (int32_t)uprv_strlen(PRIVUSE_VARIANT_PREFIX);
- if (reslen < capacity) {
- uprv_memcpy(tmpAppend + reslen, PRIVUSE_VARIANT_PREFIX, uprv_min(len, capacity - reslen));
- }
- reslen += len;
-
- if (reslen < capacity) {
- tmpAppend[reslen++] = SEP;
- }
-
- firstValue = FALSE;
- }
-
- len = (int32_t)uprv_strlen(pPriv);
- if (reslen < capacity) {
- uprv_memcpy(tmpAppend + reslen, pPriv, uprv_min(len, capacity - reslen));
- }
- reslen += len;
- }
- }
- /* reset private use starting position */
- pPriv = NULL;
- } else if (pPriv == NULL) {
- pPriv = p;
- }
- p++;
- }
-
- if (U_FAILURE(*status)) {
- return 0;
- }
- }
-
- if (U_SUCCESS(*status)) {
- len = reslen;
- if (reslen < capacity) {
- uprv_memcpy(appendAt, tmpAppend, uprv_min(len, capacity - reslen));
- }
- }
-
- u_terminateChars(appendAt, capacity, reslen, status);
-
- return reslen;
-}
-
-/*
-* -------------------------------------------------
-*
-* ultag_ functions
-*
-* -------------------------------------------------
-*/
-
-/* Bit flags used by the parser */
-#define LANG 0x0001
-#define EXTL 0x0002
-#define SCRT 0x0004
-#define REGN 0x0008
-#define VART 0x0010
-#define EXTS 0x0020
-#define EXTV 0x0040
-#define PRIV 0x0080
-
-/**
- * Ticket #12705 - Visual Studio 2015 Update 3 contains a new code optimizer which has problems optimizing
- * this function. (See https://blogs.msdn.microsoft.com/vcblog/2016/05/04/new-code-optimizer/ )
- * As a workaround, we will turn off optimization just for this function on VS2015 Update 3 and above.
- */
-#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
-#pragma optimize( "", off )
-#endif
-
-static ULanguageTag*
-ultag_parse(const char* tag, int32_t tagLen, int32_t* parsedLen, UErrorCode* status) {
- ULanguageTag *t;
- char *tagBuf;
- int16_t next;
- char *pSubtag, *pNext, *pLastGoodPosition;
- int32_t subtagLen;
- int32_t extlangIdx;
- ExtensionListEntry *pExtension;
- char *pExtValueSubtag, *pExtValueSubtagEnd;
- int32_t i;
- UBool privateuseVar = FALSE;
- int32_t grandfatheredLen = 0;
-
- if (parsedLen != NULL) {
- *parsedLen = 0;
- }
-
- if (U_FAILURE(*status)) {
- return NULL;
- }
-
- if (tagLen < 0) {
- tagLen = (int32_t)uprv_strlen(tag);
- }
-
- /* copy the entire string */
- tagBuf = (char*)uprv_malloc(tagLen + 1);
- if (tagBuf == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- uprv_memcpy(tagBuf, tag, tagLen);
- *(tagBuf + tagLen) = 0;
-
- /* create a ULanguageTag */
- t = (ULanguageTag*)uprv_malloc(sizeof(ULanguageTag));
- if (t == NULL) {
- uprv_free(tagBuf);
- *status = U_MEMORY_ALLOCATION_ERROR;
- return NULL;
- }
- _initializeULanguageTag(t);
- t->buf = tagBuf;
-
- if (tagLen < MINLEN) {
- /* the input tag is too short - return empty ULanguageTag */
- return t;
- }
-
- /* check if the tag is grandfathered */
- for (i = 0; i < UPRV_LENGTHOF(GRANDFATHERED); i += 2) {
- if (uprv_stricmp(GRANDFATHERED[i], tagBuf) == 0) {
- int32_t newTagLength;
-
- grandfatheredLen = tagLen; /* back up for output parsedLen */
- newTagLength = static_cast<int32_t>(uprv_strlen(GRANDFATHERED[i+1]));
- if (tagLen < newTagLength) {
- uprv_free(tagBuf);
- tagBuf = (char*)uprv_malloc(newTagLength + 1);
- if (tagBuf == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- ultag_close(t);
- return NULL;
- }
- t->buf = tagBuf;
- tagLen = newTagLength;
- }
- uprv_strcpy(t->buf, GRANDFATHERED[i + 1]);
- break;
- }
- }
-
- size_t parsedLenDelta = 0;
- if (grandfatheredLen == 0) {
- for (i = 0; i < UPRV_LENGTHOF(REDUNDANT); i += 2) {
- const char* redundantTag = REDUNDANT[i];
- size_t redundantTagLen = uprv_strlen(redundantTag);
- // The preferred tag for a redundant tag is always shorter than redundant
- // tag. A redundant tag may or may not be followed by other subtags.
- // (i.e. "zh-yue" or "zh-yue-u-co-pinyin").
- if (uprv_strnicmp(redundantTag, tagBuf, static_cast<uint32_t>(redundantTagLen)) == 0) {
- const char* redundantTagEnd = tagBuf + redundantTagLen;
- if (*redundantTagEnd == '\0' || *redundantTagEnd == SEP) {
- const char* preferredTag = REDUNDANT[i + 1];
- size_t preferredTagLen = uprv_strlen(preferredTag);
- uprv_strncpy(t->buf, preferredTag, preferredTagLen);
- if (*redundantTagEnd == SEP) {
- uprv_memmove(tagBuf + preferredTagLen,
- redundantTagEnd,
- tagLen - redundantTagLen + 1);
- } else {
- tagBuf[preferredTagLen] = '\0';
- }
- // parsedLen should be the length of the input
- // before redundantTag is replaced by preferredTag.
- // Save the delta to add it back later.
- parsedLenDelta = redundantTagLen - preferredTagLen;
- break;
- }
- }
- }
- }
-
- /*
- * langtag = language
- * ["-" script]
- * ["-" region]
- * *("-" variant)
- * *("-" extension)
- * ["-" privateuse]
- */
-
- next = LANG | PRIV;
- pNext = pLastGoodPosition = tagBuf;
- extlangIdx = 0;
- pExtension = NULL;
- pExtValueSubtag = NULL;
- pExtValueSubtagEnd = NULL;
-
- while (pNext) {
- char *pSep;
-
- pSubtag = pNext;
-
- /* locate next separator char */
- pSep = pSubtag;
- while (*pSep) {
- if (*pSep == SEP) {
- break;
- }
- pSep++;
- }
- if (*pSep == 0) {
- /* last subtag */
- pNext = NULL;
- } else {
- pNext = pSep + 1;
- }
- subtagLen = (int32_t)(pSep - pSubtag);
-
- if (next & LANG) {
- if (_isLanguageSubtag(pSubtag, subtagLen)) {
- *pSep = 0; /* terminate */
- // TODO: move deprecated language code handling here.
- t->language = T_CString_toLowerCase(pSubtag);
-
- pLastGoodPosition = pSep;
- next = SCRT | REGN | VART | EXTS | PRIV;
- if (subtagLen <= 3)
- next |= EXTL;
- continue;
- }
- }
- if (next & EXTL) {
- if (_isExtlangSubtag(pSubtag, subtagLen)) {
- *pSep = 0;
- t->extlang[extlangIdx++] = T_CString_toLowerCase(pSubtag);
-
- pLastGoodPosition = pSep;
- if (extlangIdx < 3) {
- next = EXTL | SCRT | REGN | VART | EXTS | PRIV;
- } else {
- next = SCRT | REGN | VART | EXTS | PRIV;
- }
- continue;
- }
- }
- if (next & SCRT) {
- if (_isScriptSubtag(pSubtag, subtagLen)) {
- char *p = pSubtag;
-
- *pSep = 0;
-
- /* to title case */
- *p = uprv_toupper(*p);
- p++;
- for (; *p; p++) {
- *p = uprv_tolower(*p);
- }
-
- t->script = pSubtag;
-
- pLastGoodPosition = pSep;
- next = REGN | VART | EXTS | PRIV;
- continue;
- }
- }
- if (next & REGN) {
- if (_isRegionSubtag(pSubtag, subtagLen)) {
- *pSep = 0;
- // TODO: move deprecated region code handling here.
- t->region = T_CString_toUpperCase(pSubtag);
-
- pLastGoodPosition = pSep;
- next = VART | EXTS | PRIV;
- continue;
- }
- }
- if (next & VART) {
- if (_isVariantSubtag(pSubtag, subtagLen) ||
- (privateuseVar && _isPrivateuseVariantSubtag(pSubtag, subtagLen))) {
- VariantListEntry *var;
- UBool isAdded;
-
- var = (VariantListEntry*)uprv_malloc(sizeof(VariantListEntry));
- if (var == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto error;
- }
- *pSep = 0;
- var->variant = T_CString_toUpperCase(pSubtag);
- isAdded = _addVariantToList(&(t->variants), var);
- if (!isAdded) {
- /* duplicated variant entry */
- uprv_free(var);
- break;
- }
- pLastGoodPosition = pSep;
- next = VART | EXTS | PRIV;
- continue;
- }
- }
- if (next & EXTS) {
- if (_isExtensionSingleton(pSubtag, subtagLen)) {
- if (pExtension != NULL) {
- if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
- /* the previous extension is incomplete */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- }
-
- /* terminate the previous extension value */
- *pExtValueSubtagEnd = 0;
- pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
-
- /* insert the extension to the list */
- if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
- pLastGoodPosition = pExtValueSubtagEnd;
- } else {
- /* stop parsing here */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- }
- }
-
- /* create a new extension */
- pExtension = (ExtensionListEntry*)uprv_malloc(sizeof(ExtensionListEntry));
- if (pExtension == NULL) {
- *status = U_MEMORY_ALLOCATION_ERROR;
- goto error;
- }
- *pSep = 0;
- pExtension->key = T_CString_toLowerCase(pSubtag);
- pExtension->value = NULL; /* will be set later */
-
- /*
- * reset the start and the end location of extension value
- * subtags for this extension
- */
- pExtValueSubtag = NULL;
- pExtValueSubtagEnd = NULL;
-
- next = EXTV;
- continue;
- }
- }
- if (next & EXTV) {
- if (_isExtensionSubtag(pSubtag, subtagLen)) {
- if (pExtValueSubtag == NULL) {
- /* if the start postion of this extension's value is not yet,
- this one is the first value subtag */
- pExtValueSubtag = pSubtag;
- }
-
- /* Mark the end of this subtag */
- pExtValueSubtagEnd = pSep;
- next = EXTS | EXTV | PRIV;
-
- continue;
- }
- }
- if (next & PRIV) {
- if (uprv_tolower(*pSubtag) == PRIVATEUSE && subtagLen == 1) {
- char *pPrivuseVal;
-
- if (pExtension != NULL) {
- /* Process the last extension */
- if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
- /* the previous extension is incomplete */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- } else {
- /* terminate the previous extension value */
- *pExtValueSubtagEnd = 0;
- pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
-
- /* insert the extension to the list */
- if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
- pLastGoodPosition = pExtValueSubtagEnd;
- pExtension = NULL;
- } else {
- /* stop parsing here */
- uprv_free(pExtension);
- pExtension = NULL;
- break;
- }
- }
- }
-
- /* The rest of part will be private use value subtags */
- if (pNext == NULL) {
- /* empty private use subtag */
- break;
- }
- /* back up the private use value start position */
- pPrivuseVal = pNext;
-
- /* validate private use value subtags */
- while (pNext) {
- pSubtag = pNext;
- pSep = pSubtag;
- while (*pSep) {
- if (*pSep == SEP) {
- break;
- }
- pSep++;
- }
- if (*pSep == 0) {
- /* last subtag */
- pNext = NULL;
- } else {
- pNext = pSep + 1;
- }
- subtagLen = (int32_t)(pSep - pSubtag);
-
- if (uprv_strncmp(pSubtag, PRIVUSE_VARIANT_PREFIX, uprv_strlen(PRIVUSE_VARIANT_PREFIX)) == 0) {
- *pSep = 0;
- next = VART;
- privateuseVar = TRUE;
- break;
- } else if (_isPrivateuseValueSubtag(pSubtag, subtagLen)) {
- pLastGoodPosition = pSep;
- } else {
- break;
- }
- }
-
- if (next == VART) {
- continue;
- }
-
- if (pLastGoodPosition - pPrivuseVal > 0) {
- *pLastGoodPosition = 0;
- t->privateuse = T_CString_toLowerCase(pPrivuseVal);
- }
- /* No more subtags, exiting the parse loop */
- break;
- }
- break;
- }
-
- /* If we fell through here, it means this subtag is illegal - quit parsing */
- break;
- }
-
- if (pExtension != NULL) {
- /* Process the last extension */
- if (pExtValueSubtag == NULL || pExtValueSubtagEnd == NULL) {
- /* the previous extension is incomplete */
- uprv_free(pExtension);
- } else {
- /* terminate the previous extension value */
- *pExtValueSubtagEnd = 0;
- pExtension->value = T_CString_toLowerCase(pExtValueSubtag);
- /* insert the extension to the list */
- if (_addExtensionToList(&(t->extensions), pExtension, FALSE)) {
- pLastGoodPosition = pExtValueSubtagEnd;
- } else {
- uprv_free(pExtension);
- }
- }
- }
-
- if (parsedLen != NULL) {
- *parsedLen = (grandfatheredLen > 0) ? grandfatheredLen :
- (int32_t)(pLastGoodPosition - t->buf + parsedLenDelta);
- }
-
- return t;
-
-error:
- ultag_close(t);
- return NULL;
-}
-
-/**
-* Ticket #12705 - Turn optimization back on.
-*/
-#if (defined(_MSC_VER) && (_MSC_VER >= 1900) && defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 190024210))
-#pragma optimize( "", on )
-#endif
-
-static void
-ultag_close(ULanguageTag* langtag) {
-
- if (langtag == NULL) {
- return;
- }
-
- uprv_free(langtag->buf);
-
- if (langtag->variants) {
- VariantListEntry *curVar = langtag->variants;
- while (curVar) {
- VariantListEntry *nextVar = curVar->next;
- uprv_free(curVar);
- curVar = nextVar;
- }
- }
-
- if (langtag->extensions) {
- ExtensionListEntry *curExt = langtag->extensions;
- while (curExt) {
- ExtensionListEntry *nextExt = curExt->next;
- uprv_free(curExt);
- curExt = nextExt;
- }
- }
-
- uprv_free(langtag);
-}
-
-static const char*
-ultag_getLanguage(const ULanguageTag* langtag) {
- return langtag->language;
-}
-
-#if 0
-static const char*
-ultag_getJDKLanguage(const ULanguageTag* langtag) {
- int32_t i;
- for (i = 0; DEPRECATEDLANGS[i] != NULL; i += 2) {
- if (uprv_compareInvCharsAsAscii(DEPRECATEDLANGS[i], langtag->language) == 0) {
- return DEPRECATEDLANGS[i + 1];
- }
- }
- return langtag->language;
-}
-#endif
-
-static const char*
-ultag_getExtlang(const ULanguageTag* langtag, int32_t idx) {
- if (idx >= 0 && idx < MAXEXTLANG) {
- return langtag->extlang[idx];
- }
- return NULL;
-}
-
-static int32_t
-ultag_getExtlangSize(const ULanguageTag* langtag) {
- int32_t size = 0;
- int32_t i;
- for (i = 0; i < MAXEXTLANG; i++) {
- if (langtag->extlang[i]) {
- size++;
- }
- }
- return size;
-}
-
-static const char*
-ultag_getScript(const ULanguageTag* langtag) {
- return langtag->script;
-}
-
-static const char*
-ultag_getRegion(const ULanguageTag* langtag) {
- return langtag->region;
-}
-
-static const char*
-ultag_getVariant(const ULanguageTag* langtag, int32_t idx) {
- const char *var = NULL;
- VariantListEntry *cur = langtag->variants;
- int32_t i = 0;
- while (cur) {
- if (i == idx) {
- var = cur->variant;
- break;
- }
- cur = cur->next;
- i++;
- }
- return var;
-}
-
-static int32_t
-ultag_getVariantsSize(const ULanguageTag* langtag) {
- int32_t size = 0;
- VariantListEntry *cur = langtag->variants;
- while (TRUE) {
- if (cur == NULL) {
- break;
- }
- size++;
- cur = cur->next;
- }
- return size;
-}
-
-static const char*
-ultag_getExtensionKey(const ULanguageTag* langtag, int32_t idx) {
- const char *key = NULL;
- ExtensionListEntry *cur = langtag->extensions;
- int32_t i = 0;
- while (cur) {
- if (i == idx) {
- key = cur->key;
- break;
- }
- cur = cur->next;
- i++;
- }
- return key;
-}
-
-static const char*
-ultag_getExtensionValue(const ULanguageTag* langtag, int32_t idx) {
- const char *val = NULL;
- ExtensionListEntry *cur = langtag->extensions;
- int32_t i = 0;
- while (cur) {
- if (i == idx) {
- val = cur->value;
- break;
- }
- cur = cur->next;
- i++;
- }
- return val;
-}
-
-static int32_t
-ultag_getExtensionsSize(const ULanguageTag* langtag) {
- int32_t size = 0;
- ExtensionListEntry *cur = langtag->extensions;
- while (TRUE) {
- if (cur == NULL) {
- break;
- }
- size++;
- cur = cur->next;
- }
- return size;
-}
-
-static const char*
-ultag_getPrivateUse(const ULanguageTag* langtag) {
- return langtag->privateuse;
-}
-
-#if 0
-static const char*
-ultag_getGrandfathered(const ULanguageTag* langtag) {
- return langtag->grandfathered;
-}
-#endif
-
-
-/*
-* -------------------------------------------------
-*
-* Locale/BCP47 conversion APIs, exposed as uloc_*
-*
-* -------------------------------------------------
-*/
-U_CAPI int32_t U_EXPORT2
-uloc_toLanguageTag(const char* localeID,
- char* langtag,
- int32_t langtagCapacity,
- UBool strict,
- UErrorCode* status) {
- icu::CharString canonical;
- int32_t reslen;
- UErrorCode tmpStatus = U_ZERO_ERROR;
- UBool hadPosix = FALSE;
- const char* pKeywordStart;
-
- /* Note: uloc_canonicalize returns "en_US_POSIX" for input locale ID "". See #6835 */
- int32_t resultCapacity = static_cast<int32_t>(uprv_strlen(localeID));
- if (resultCapacity > 0) {
- char* buffer;
-
- for (;;) {
- buffer = canonical.getAppendBuffer(
- /*minCapacity=*/resultCapacity,
- /*desiredCapacityHint=*/resultCapacity,
- resultCapacity,
- tmpStatus);
-
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- return 0;
- }
-
- reslen =
- uloc_canonicalize(localeID, buffer, resultCapacity, &tmpStatus);
-
- if (tmpStatus != U_BUFFER_OVERFLOW_ERROR) {
- break;
- }
-
- resultCapacity = reslen;
- tmpStatus = U_ZERO_ERROR;
- }
-
- if (U_FAILURE(tmpStatus)) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- return 0;
- }
-
- canonical.append(buffer, reslen, tmpStatus);
- if (tmpStatus == U_STRING_NOT_TERMINATED_WARNING) {
- tmpStatus = U_ZERO_ERROR; // Terminators provided by CharString.
- }
-
- if (U_FAILURE(tmpStatus)) {
- *status = tmpStatus;
- return 0;
- }
- }
-
- reslen = 0;
-
- /* For handling special case - private use only tag */
- pKeywordStart = locale_getKeywordsStart(canonical.data());
- if (pKeywordStart == canonical.data()) {
- UEnumeration *kwdEnum;
- int kwdCnt = 0;
- UBool done = FALSE;
-
- kwdEnum = uloc_openKeywords(canonical.data(), &tmpStatus);
- if (kwdEnum != NULL) {
- kwdCnt = uenum_count(kwdEnum, &tmpStatus);
- if (kwdCnt == 1) {
- const char *key;
- int32_t len = 0;
-
- key = uenum_next(kwdEnum, &len, &tmpStatus);
- if (len == 1 && *key == PRIVATEUSE) {
- char buf[ULOC_KEYWORD_AND_VALUES_CAPACITY];
- buf[0] = PRIVATEUSE;
- buf[1] = SEP;
- len = uloc_getKeywordValue(localeID, key, &buf[2], sizeof(buf) - 2, &tmpStatus);
- if (U_SUCCESS(tmpStatus)) {
- if (_isPrivateuseValueSubtags(&buf[2], len)) {
- /* return private use only tag */
- reslen = len + 2;
- uprv_memcpy(langtag, buf, uprv_min(reslen, langtagCapacity));
- u_terminateChars(langtag, langtagCapacity, reslen, status);
- done = TRUE;
- } else if (strict) {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- done = TRUE;
- }
- /* if not strict mode, then "und" will be returned */
- } else {
- *status = U_ILLEGAL_ARGUMENT_ERROR;
- done = TRUE;
- }
- }
- }
- uenum_close(kwdEnum);
- if (done) {
- return reslen;
- }
- }
- }
-
- reslen += _appendLanguageToLanguageTag(canonical.data(), langtag, langtagCapacity, strict, status);
- reslen += _appendScriptToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
- reslen += _appendRegionToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, status);
- reslen += _appendVariantsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, &hadPosix, status);
- reslen += _appendKeywordsToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
- reslen += _appendPrivateuseToLanguageTag(canonical.data(), langtag + reslen, langtagCapacity - reslen, strict, hadPosix, status);
-
- return reslen;
-}
-
-
-U_CAPI int32_t U_EXPORT2
-uloc_forLanguageTag(const char* langtag,
- char* localeID,
- int32_t localeIDCapacity,
- int32_t* parsedLength,
- UErrorCode* status) {
- return ulocimp_forLanguageTag(
- langtag,
- -1,
- localeID,
- localeIDCapacity,
- parsedLength,
- status);
-}
-
-
-U_CAPI int32_t U_EXPORT2
-ulocimp_forLanguageTag(const char* langtag,
- int32_t tagLen,
- char* localeID,
- int32_t localeIDCapacity,
- int32_t* parsedLength,
- UErrorCode* status) {
- ULanguageTag *lt;
- int32_t reslen = 0;
- const char *subtag, *p;
- int32_t len;
- int32_t i, n;
- UBool noRegion = TRUE;
-
- lt = ultag_parse(langtag, tagLen, parsedLength, status);
- if (U_FAILURE(*status)) {
- return 0;
- }
-
- /* language */
- subtag = ultag_getExtlangSize(lt) > 0 ? ultag_getExtlang(lt, 0) : ultag_getLanguage(lt);
- if (uprv_compareInvCharsAsAscii(subtag, LANG_UND) != 0) {
- len = (int32_t)uprv_strlen(subtag);
- if (len > 0) {
- if (reslen < localeIDCapacity) {
- uprv_memcpy(localeID, subtag, uprv_min(len, localeIDCapacity - reslen));
- }
- reslen += len;
- }
- }
-
- /* script */
- subtag = ultag_getScript(lt);
- len = (int32_t)uprv_strlen(subtag);
- if (len > 0) {
- if (reslen < localeIDCapacity) {
- *(localeID + reslen) = LOCALE_SEP;
- }
- reslen++;
-
- /* write out the script in title case */
- p = subtag;
- while (*p) {
- if (reslen < localeIDCapacity) {
- if (p == subtag) {
- *(localeID + reslen) = uprv_toupper(*p);
- } else {
- *(localeID + reslen) = *p;
- }
- }
- reslen++;
- p++;
- }
- }
-
- /* region */
- subtag = ultag_getRegion(lt);
- len = (int32_t)uprv_strlen(subtag);
- if (len > 0) {
- if (reslen < localeIDCapacity) {
- *(localeID + reslen) = LOCALE_SEP;
- }
- reslen++;
- /* write out the retion in upper case */
- p = subtag;
- while (*p) {
- if (reslen < localeIDCapacity) {
- *(localeID + reslen) = uprv_toupper(*p);
- }
- reslen++;
- p++;
- }
- noRegion = FALSE;
- }
-
- /* variants */
- n = ultag_getVariantsSize(lt);
- if (n > 0) {
- if (noRegion) {
- if (reslen < localeIDCapacity) {
- *(localeID + reslen) = LOCALE_SEP;
- }
- reslen++;
- }
-
- for (i = 0; i < n; i++) {
- subtag = ultag_getVariant(lt, i);
- if (reslen < localeIDCapacity) {
- *(localeID + reslen) = LOCALE_SEP;
- }
- reslen++;
- /* write out the variant in upper case */
- p = subtag;
- while (*p) {
- if (reslen < localeIDCapacity) {
- *(localeID + reslen) = uprv_toupper(*p);
- }
- reslen++;
- p++;
- }
- }
- }
-
- /* keywords */
- n = ultag_getExtensionsSize(lt);
- subtag = ultag_getPrivateUse(lt);
- if (n > 0 || uprv_strlen(subtag) > 0) {
- if (reslen == 0 && n > 0) {
- /* need a language */
- if (reslen < localeIDCapacity) {
- uprv_memcpy(localeID + reslen, LANG_UND, uprv_min(LANG_UND_LEN, localeIDCapacity - reslen));
- }
- reslen += LANG_UND_LEN;
- }
- len = _appendKeywords(lt, localeID + reslen, localeIDCapacity - reslen, status);
- reslen += len;
- }
-
- ultag_close(lt);
- return u_terminateChars(localeID, localeIDCapacity, reslen, status);
-}