// © 2019 and later: Unicode, Inc. and others. // License & terms of use: http://www.unicode.org/copyright.html #include #include "bytesinkutil.h" // CharStringByteSink #include "charstr.h" #include "cstring.h" #include "ulocimp.h" #include "unicode/localebuilder.h" #include "unicode/locid.h" U_NAMESPACE_BEGIN #define UPRV_ISDIGIT(c) (((c) >= '0') && ((c) <= '9')) #define UPRV_ISALPHANUM(c) (uprv_isASCIILetter(c) || UPRV_ISDIGIT(c) ) const char* kAttributeKey = "attribute"; static bool _isExtensionSubtags(char key, const char* s, int32_t len) { switch (uprv_tolower(key)) { case 'u': return ultag_isUnicodeExtensionSubtags(s, len); case 't': return ultag_isTransformedExtensionSubtags(s, len); case 'x': return ultag_isPrivateuseValueSubtags(s, len); default: return ultag_isExtensionSubtags(s, len); } } LocaleBuilder::LocaleBuilder() : UObject(), status_(U_ZERO_ERROR), language_(), script_(), region_(), variant_(nullptr), extensions_(nullptr) { language_[0] = 0; script_[0] = 0; region_[0] = 0; } LocaleBuilder::~LocaleBuilder() { delete variant_; delete extensions_; } LocaleBuilder& LocaleBuilder::setLocale(const Locale& locale) { clear(); setLanguage(locale.getLanguage()); setScript(locale.getScript()); setRegion(locale.getCountry()); setVariant(locale.getVariant()); extensions_ = locale.clone(); if (extensions_ == nullptr) { status_ = U_MEMORY_ALLOCATION_ERROR; } return *this; } LocaleBuilder& LocaleBuilder::setLanguageTag(StringPiece tag) { Locale l = Locale::forLanguageTag(tag, status_); if (U_FAILURE(status_)) { return *this; } // Because setLocale will reset status_ we need to return // first if we have error in forLanguageTag. setLocale(l); return *this; } static void setField(StringPiece input, char* dest, UErrorCode& errorCode, UBool (*test)(const char*, int32_t)) { if (U_FAILURE(errorCode)) { return; } if (input.empty()) { dest[0] = '\0'; } else if (test(input.data(), input.length())) { uprv_memcpy(dest, input.data(), input.length()); dest[input.length()] = '\0'; } else { errorCode = U_ILLEGAL_ARGUMENT_ERROR; } } LocaleBuilder& LocaleBuilder::setLanguage(StringPiece language) { setField(language, language_, status_, &ultag_isLanguageSubtag); return *this; } LocaleBuilder& LocaleBuilder::setScript(StringPiece script) { setField(script, script_, status_, &ultag_isScriptSubtag); return *this; } LocaleBuilder& LocaleBuilder::setRegion(StringPiece region) { setField(region, region_, status_, &ultag_isRegionSubtag); return *this; } static void transform(char* data, int32_t len) { for (int32_t i = 0; i < len; i++, data++) { if (*data == '_') { *data = '-'; } else { *data = uprv_tolower(*data); } } } LocaleBuilder& LocaleBuilder::setVariant(StringPiece variant) { if (U_FAILURE(status_)) { return *this; } if (variant.empty()) { delete variant_; variant_ = nullptr; return *this; } CharString* new_variant = new CharString(variant, status_); if (U_FAILURE(status_)) { return *this; } if (new_variant == nullptr) { status_ = U_MEMORY_ALLOCATION_ERROR; return *this; } transform(new_variant->data(), new_variant->length()); if (!ultag_isVariantSubtags(new_variant->data(), new_variant->length())) { delete new_variant; status_ = U_ILLEGAL_ARGUMENT_ERROR; return *this; } delete variant_; variant_ = new_variant; return *this; } static bool _isKeywordValue(const char* key, const char* value, int32_t value_len) { if (key[1] == '\0') { // one char key return (UPRV_ISALPHANUM(uprv_tolower(key[0])) && _isExtensionSubtags(key[0], value, value_len)); } else if (uprv_strcmp(key, kAttributeKey) == 0) { // unicode attributes return ultag_isUnicodeLocaleAttributes(value, value_len); } // otherwise: unicode extension value // We need to convert from legacy key/value to unicode // key/value const char* unicode_locale_key = uloc_toUnicodeLocaleKey(key); const char* unicode_locale_type = uloc_toUnicodeLocaleType(key, value); return unicode_locale_key && unicode_locale_type && ultag_isUnicodeLocaleKey(unicode_locale_key, -1) && ultag_isUnicodeLocaleType(unicode_locale_type, -1); } static void _copyExtensions(const Locale& from, Locale* to, bool validate, UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return; } LocalPointer iter(from.createKeywords(errorCode)); if (U_FAILURE(errorCode) || iter.isNull()) { return; } const char* key; while ((key = iter->next(nullptr, errorCode)) != nullptr) { CharString value; CharStringByteSink sink(&value); from.getKeywordValue(key, sink, errorCode); if (U_FAILURE(errorCode)) { return; } if (uprv_strcmp(key, kAttributeKey) == 0) { transform(value.data(), value.length()); } if (validate && !_isKeywordValue(key, value.data(), value.length())) { errorCode = U_ILLEGAL_ARGUMENT_ERROR; return; } to->setKeywordValue(key, value.data(), errorCode); if (U_FAILURE(errorCode)) { return; } } } void static _clearUAttributesAndKeyType(Locale* locale, UErrorCode& errorCode) { // Clear Unicode attributes locale->setKeywordValue(kAttributeKey, "", errorCode); // Clear all Unicode keyword values LocalPointer iter(locale->createUnicodeKeywords(errorCode)); if (U_FAILURE(errorCode) || iter.isNull()) { return; } const char* key; while ((key = iter->next(nullptr, errorCode)) != nullptr) { locale->setUnicodeKeywordValue(key, nullptr, errorCode); } } static void _setUnicodeExtensions(Locale* locale, const CharString& value, UErrorCode& errorCode) { // Add the unicode extensions to extensions_ CharString locale_str("und-u-", errorCode); locale_str.append(value, errorCode); _copyExtensions( Locale::forLanguageTag(locale_str.data(), errorCode), locale, false, errorCode); } LocaleBuilder& LocaleBuilder::setExtension(char key, StringPiece value) { if (U_FAILURE(status_)) { return *this; } if (!UPRV_ISALPHANUM(key)) { status_ = U_ILLEGAL_ARGUMENT_ERROR; return *this; } CharString value_str(value, status_); if (U_FAILURE(status_)) { return *this; } transform(value_str.data(), value_str.length()); if (!value_str.isEmpty() && !_isExtensionSubtags(key, value_str.data(), value_str.length())) { status_ = U_ILLEGAL_ARGUMENT_ERROR; return *this; } if (extensions_ == nullptr) { extensions_ = new Locale(); if (extensions_ == nullptr) { status_ = U_MEMORY_ALLOCATION_ERROR; return *this; } } if (uprv_tolower(key) != 'u') { // for t, x and others extension. extensions_->setKeywordValue(StringPiece(&key, 1), value_str.data(), status_); return *this; } _clearUAttributesAndKeyType(extensions_, status_); if (U_FAILURE(status_)) { return *this; } if (!value.empty()) { _setUnicodeExtensions(extensions_, value_str, status_); } return *this; } LocaleBuilder& LocaleBuilder::setUnicodeLocaleKeyword( StringPiece key, StringPiece type) { if (U_FAILURE(status_)) { return *this; } if (!ultag_isUnicodeLocaleKey(key.data(), key.length()) || (!type.empty() && !ultag_isUnicodeLocaleType(type.data(), type.length()))) { status_ = U_ILLEGAL_ARGUMENT_ERROR; return *this; } if (extensions_ == nullptr) { extensions_ = new Locale(); } if (extensions_ == nullptr) { status_ = U_MEMORY_ALLOCATION_ERROR; return *this; } extensions_->setUnicodeKeywordValue(key, type, status_); return *this; } LocaleBuilder& LocaleBuilder::addUnicodeLocaleAttribute( StringPiece value) { CharString value_str(value, status_); if (U_FAILURE(status_)) { return *this; } transform(value_str.data(), value_str.length()); if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { status_ = U_ILLEGAL_ARGUMENT_ERROR; return *this; } if (extensions_ == nullptr) { extensions_ = new Locale(); if (extensions_ == nullptr) { status_ = U_MEMORY_ALLOCATION_ERROR; return *this; } extensions_->setKeywordValue(kAttributeKey, value_str.data(), status_); return *this; } CharString attributes; CharStringByteSink sink(&attributes); UErrorCode localErrorCode = U_ZERO_ERROR; extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); if (U_FAILURE(localErrorCode)) { CharString new_attributes(value_str.data(), status_); // No attributes, set the attribute. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); return *this; } transform(attributes.data(),attributes.length()); const char* start = attributes.data(); const char* limit = attributes.data() + attributes.length(); CharString new_attributes; bool inserted = false; while (start < limit) { if (!inserted) { int cmp = uprv_strcmp(start, value_str.data()); if (cmp == 0) { return *this; } // Found it in attributes: Just return if (cmp > 0) { if (!new_attributes.isEmpty()) new_attributes.append('_', status_); new_attributes.append(value_str.data(), status_); inserted = true; } } if (!new_attributes.isEmpty()) { new_attributes.append('_', status_); } new_attributes.append(start, status_); start += uprv_strlen(start) + 1; } if (!inserted) { if (!new_attributes.isEmpty()) { new_attributes.append('_', status_); } new_attributes.append(value_str.data(), status_); } // Not yet in the attributes, set the attribute. extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); return *this; } LocaleBuilder& LocaleBuilder::removeUnicodeLocaleAttribute( StringPiece value) { CharString value_str(value, status_); if (U_FAILURE(status_)) { return *this; } transform(value_str.data(), value_str.length()); if (!ultag_isUnicodeLocaleAttribute(value_str.data(), value_str.length())) { status_ = U_ILLEGAL_ARGUMENT_ERROR; return *this; } if (extensions_ == nullptr) { return *this; } UErrorCode localErrorCode = U_ZERO_ERROR; CharString attributes; CharStringByteSink sink(&attributes); extensions_->getKeywordValue(kAttributeKey, sink, localErrorCode); // get failure, just return if (U_FAILURE(localErrorCode)) { return *this; } // Do not have any attributes, just return. if (attributes.isEmpty()) { return *this; } char* p = attributes.data(); // Replace null terminiator in place for _ and - so later // we can use uprv_strcmp to compare. for (int32_t i = 0; i < attributes.length(); i++, p++) { *p = (*p == '_' || *p == '-') ? '\0' : uprv_tolower(*p); } const char* start = attributes.data(); const char* limit = attributes.data() + attributes.length(); CharString new_attributes; bool found = false; while (start < limit) { if (uprv_strcmp(start, value_str.data()) == 0) { found = true; } else { if (!new_attributes.isEmpty()) { new_attributes.append('_', status_); } new_attributes.append(start, status_); } start += uprv_strlen(start) + 1; } // Found the value in attributes, set the attribute. if (found) { extensions_->setKeywordValue(kAttributeKey, new_attributes.data(), status_); } return *this; } LocaleBuilder& LocaleBuilder::clear() { status_ = U_ZERO_ERROR; language_[0] = 0; script_[0] = 0; region_[0] = 0; delete variant_; variant_ = nullptr; clearExtensions(); return *this; } LocaleBuilder& LocaleBuilder::clearExtensions() { delete extensions_; extensions_ = nullptr; return *this; } Locale makeBogusLocale() { Locale bogus; bogus.setToBogus(); return bogus; } Locale LocaleBuilder::build(UErrorCode& errorCode) { if (U_FAILURE(errorCode)) { return makeBogusLocale(); } if (U_FAILURE(status_)) { errorCode = status_; return makeBogusLocale(); } CharString locale_str(language_, errorCode); if (uprv_strlen(script_) > 0) { locale_str.append('-', errorCode).append(StringPiece(script_), errorCode); } if (uprv_strlen(region_) > 0) { locale_str.append('-', errorCode).append(StringPiece(region_), errorCode); } if (variant_ != nullptr) { locale_str.append('-', errorCode).append(StringPiece(variant_->data()), errorCode); } if (U_FAILURE(errorCode)) { return makeBogusLocale(); } Locale product(locale_str.data()); if (extensions_ != nullptr) { _copyExtensions(*extensions_, &product, true, errorCode); } if (U_FAILURE(errorCode)) { return makeBogusLocale(); } return product; } U_NAMESPACE_END