diff options
Diffstat (limited to 'deps/icu-small/source/common/unicode')
34 files changed, 1236 insertions, 522 deletions
diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h index b1e4cc68c6..9c1ac7531b 100644 --- a/deps/icu-small/source/common/unicode/brkiter.h +++ b/deps/icu-small/source/common/unicode/brkiter.h @@ -250,7 +250,7 @@ public: virtual int32_t next(void) = 0; /** - * Return character index of the current interator position within the text. + * Return character index of the current iterator position within the text. * @return The boundary most recently returned. * @stable ICU 2.0 */ @@ -277,7 +277,7 @@ public: virtual int32_t preceding(int32_t offset) = 0; /** - * Return true if the specfied position is a boundary position. + * Return true if the specified position is a boundary position. * As a side effect, the current position of the iterator is set * to the first boundary position at or following the specified offset. * @param offset the offset to check. @@ -331,7 +331,7 @@ public: * @param fillInVec an array to be filled in with the status values. * @param capacity the length of the supplied vector. A length of zero causes * the function to return the number of status values, in the - * normal way, without attemtping to store any values. + * normal way, without attempting to store any values. * @param status receives error codes. * @return The number of rule status values from rules that determined * the most recent boundary returned by the break iterator. @@ -469,7 +469,7 @@ public: static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count); /** - * Get name of the object for the desired Locale, in the desired langauge. + * Get name of the object for the desired Locale, in the desired language. * @param objectLocale must be from getAvailableLocales. * @param displayLocale specifies the desired locale for output. * @param name the fill-in parameter of the return value @@ -482,7 +482,7 @@ public: UnicodeString& name); /** - * Get name of the object for the desired Locale, in the langauge of the + * Get name of the object for the desired Locale, in the language of the * default locale. * @param objectLocale must be from getMatchingLocales * @param name the fill-in parameter of the return value @@ -629,10 +629,12 @@ protected: /** @internal */ BreakIterator(); /** @internal */ - BreakIterator (const BreakIterator &other) : UObject(other) {} + BreakIterator (const BreakIterator &other); #ifndef U_HIDE_INTERNAL_API /** @internal */ - BreakIterator (const Locale& valid, const Locale& actual); + BreakIterator (const Locale& valid, const Locale &actual); + /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */ + BreakIterator &operator = (const BreakIterator &other); #endif /* U_HIDE_INTERNAL_API */ private: @@ -640,12 +642,6 @@ private: /** @internal */ char actualLocale[ULOC_FULLNAME_CAPACITY]; char validLocale[ULOC_FULLNAME_CAPACITY]; - - /** - * The assignment operator has no real implementation. - * It's provided to make the compiler happy. Do not call. - */ - BreakIterator& operator=(const BreakIterator&); }; #ifndef U_HIDE_DEPRECATED_API @@ -661,5 +657,5 @@ U_NAMESPACE_END #endif /* #if !UCONFIG_NO_BREAK_ITERATION */ -#endif // _BRKITER +#endif // BRKITER_H //eof diff --git a/deps/icu-small/source/common/unicode/bytestream.h b/deps/icu-small/source/common/unicode/bytestream.h index 477892b275..9df23f79c5 100644 --- a/deps/icu-small/source/common/unicode/bytestream.h +++ b/deps/icu-small/source/common/unicode/bytestream.h @@ -126,8 +126,8 @@ public: virtual void Flush(); private: - ByteSink(const ByteSink &); // copy constructor not implemented - ByteSink &operator=(const ByteSink &); // assignment operator not implemented + ByteSink(const ByteSink &) = delete; + ByteSink &operator=(const ByteSink &) = delete; }; // ------------------------------------------------------------- @@ -217,9 +217,10 @@ private: int32_t size_; int32_t appended_; UBool overflowed_; - CheckedArrayByteSink(); ///< default constructor not implemented - CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented - CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented + + CheckedArrayByteSink() = delete; + CheckedArrayByteSink(const CheckedArrayByteSink &) = delete; + CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete; }; /** @@ -236,6 +237,21 @@ class StringByteSink : public ByteSink { * @stable ICU 4.2 */ StringByteSink(StringClass* dest) : dest_(dest) { } +#ifndef U_HIDE_DRAFT_API + /** + * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string. + * + * @param dest pointer to string object to append to + * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d + * @draft ICU 60 + */ + StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) { + if (initialAppendCapacity > 0 && + (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) { + dest->reserve(dest->length() + initialAppendCapacity); + } + } +#endif // U_HIDE_DRAFT_API /** * Append "bytes[0,n-1]" to this. * @param data the pointer to the bytes @@ -245,9 +261,10 @@ class StringByteSink : public ByteSink { virtual void Append(const char* data, int32_t n) { dest_->append(data, n); } private: StringClass* dest_; - StringByteSink(); ///< default constructor not implemented - StringByteSink(const StringByteSink &); ///< copy constructor not implemented - StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented + + StringByteSink() = delete; + StringByteSink(const StringByteSink &) = delete; + StringByteSink &operator=(const StringByteSink &) = delete; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h index 98184820d5..4a4917bdca 100644 --- a/deps/icu-small/source/common/unicode/casemap.h +++ b/deps/icu-small/source/common/unicode/casemap.h @@ -8,6 +8,7 @@ #define __CASEMAP_H__ #include "unicode/utypes.h" +#include "unicode/stringpiece.h" #include "unicode/uobject.h" /** @@ -20,6 +21,7 @@ U_NAMESPACE_BEGIN #ifndef U_HIDE_DRAFT_API class BreakIterator; +class ByteSink; class Edits; /** @@ -36,7 +38,7 @@ public: * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -48,7 +50,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -71,7 +74,7 @@ public: * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -83,7 +86,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -112,8 +116,10 @@ public: * all others. (This can be modified with options bits.) * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, - * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. * @param iter A break iterator to find the first characters of words that are to be titlecased. * It is set to the source string (setText()) * and used one or more times for iteration (first() and next()). @@ -130,7 +136,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -159,7 +166,7 @@ public: * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. @@ -172,7 +179,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -192,10 +200,133 @@ public: * Lowercases a UTF-8 string and optionally records edits. * Casing is locale-dependent and context-sensitive. * The result may be longer or shorter than the original. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToLower + * @draft ICU 60 + */ + static void utf8ToLower( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + + /** + * Uppercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToUpper + * @draft ICU 60 + */ + static void utf8ToUpper( + const char *locale, uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + +#if !UCONFIG_NO_BREAK_ITERATION + + /** + * Titlecases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. + * + * Titlecasing uses a break iterator to find the first characters of words + * that are to be titlecased. It titlecases those characters and lowercases + * all others. (This can be modified with options bits.) + * + * @param locale The locale ID. ("" = root locale, NULL = default locale.) + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. + * @param iter A break iterator to find the first characters of words that are to be titlecased. + * It is set to the source string (setUText()) + * and used one or more times for iteration (first() and next()). + * If NULL, then a word break iterator for the locale is used + * (or something equivalent). + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8ToTitle + * @draft ICU 60 + */ + static void utf8ToTitle( + const char *locale, uint32_t options, BreakIterator *iter, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + +#endif // UCONFIG_NO_BREAK_ITERATION + + /** + * Case-folds a UTF-8 string and optionally records edits. + * + * Case folding is locale-independent and not context-sensitive, + * but there is an option for whether to include or exclude mappings for dotted I + * and dotless i that are marked with 'T' in CaseFolding.txt. + * + * The result may be longer or shorter than the original. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src The original string. + * @param sink A ByteSink to which the result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. + * @param errorCode Reference to an in/out error code value + * which must not indicate a failure before the function call. + * + * @see ucasemap_utf8FoldCase + * @draft ICU 60 + */ + static void utf8Fold( + uint32_t options, + StringPiece src, ByteSink &sink, Edits *edits, + UErrorCode &errorCode); + + /** + * Lowercases a UTF-8 string and optionally records edits. + * Casing is locale-dependent and context-sensitive. + * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -207,7 +338,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -217,7 +349,7 @@ public: * @see ucasemap_utf8ToLower * @draft ICU 59 */ - static int32_t utf8ToLower( + static int32_t utf8ToLower( const char *locale, uint32_t options, const char *src, int32_t srcLength, char *dest, int32_t destCapacity, Edits *edits, @@ -230,7 +362,7 @@ public: * The source string and the destination buffer must not overlap. * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. * @param dest A buffer for the result string. The result will be NUL-terminated if @@ -242,7 +374,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -271,10 +404,12 @@ public: * all others. (This can be modified with options bits.) * * @param locale The locale ID. ("" = root locale, NULL = default locale.) - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, - * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT. + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, + * U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. * @param iter A break iterator to find the first characters of words that are to be titlecased. - * It is set to the source string (setText()) + * It is set to the source string (setUText()) * and used one or more times for iteration (first() and next()). * If NULL, then a word break iterator for the locale is used * (or something equivalent). @@ -289,7 +424,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. @@ -317,7 +453,7 @@ public: * The result may be longer or shorter than the original. * The source string and the destination buffer must not overlap. * - * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT, + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET, * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I. * @param src The original string. * @param srcLength The length of the original string. If -1, then src must be NUL-terminated. @@ -330,7 +466,8 @@ public: * @param edits Records edits for index mapping, working with styled text, * and getting only changes (if any). * The Edits contents is undefined if any error occurs. - * This function calls edits->reset() first. edits can be NULL. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be NULL. * @param errorCode Reference to an in/out error code value * which must not indicate a failure before the function call. * @return The length of the result string, if successful. diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h index fa17c62446..fbce177591 100644 --- a/deps/icu-small/source/common/unicode/char16ptr.h +++ b/deps/icu-small/source/common/unicode/char16ptr.h @@ -95,45 +95,45 @@ private: return reinterpret_cast<char16_t *>(t); } - char16_t *p; + char16_t *p_; #else union { char16_t *cp; uint16_t *up; wchar_t *wp; - } u; + } u_; #endif }; #ifdef U_ALIASING_BARRIER -Char16Ptr::Char16Ptr(char16_t *p) : p(p) {} +Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {} #if !U_CHAR16_IS_TYPEDEF -Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {} +Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {} #endif #if U_SIZEOF_WCHAR_T==2 -Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {} +Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {} #endif -Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {} +Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {} Char16Ptr::~Char16Ptr() { - U_ALIASING_BARRIER(p); + U_ALIASING_BARRIER(p_); } -char16_t *Char16Ptr::get() const { return p; } +char16_t *Char16Ptr::get() const { return p_; } #else -Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; } +Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; } #if !U_CHAR16_IS_TYPEDEF -Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; } +Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; } #endif #if U_SIZEOF_WCHAR_T==2 -Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; } +Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; } #endif -Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; } +Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; } Char16Ptr::~Char16Ptr() {} -char16_t *Char16Ptr::get() const { return u.cp; } +char16_t *Char16Ptr::get() const { return u_.cp; } #endif @@ -203,45 +203,45 @@ private: return reinterpret_cast<const char16_t *>(t); } - const char16_t *p; + const char16_t *p_; #else union { const char16_t *cp; const uint16_t *up; const wchar_t *wp; - } u; + } u_; #endif }; #ifdef U_ALIASING_BARRIER -ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {} +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {} #if !U_CHAR16_IS_TYPEDEF -ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {} +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {} #endif #if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {} +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {} #endif -ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {} +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {} ConstChar16Ptr::~ConstChar16Ptr() { - U_ALIASING_BARRIER(p); + U_ALIASING_BARRIER(p_); } -const char16_t *ConstChar16Ptr::get() const { return p; } +const char16_t *ConstChar16Ptr::get() const { return p_; } #else -ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; } +ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; } #if !U_CHAR16_IS_TYPEDEF -ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; } +ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; } #endif #if U_SIZEOF_WCHAR_T==2 -ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; } +ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; } #endif -ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; } +ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; } ConstChar16Ptr::~ConstChar16Ptr() {} -const char16_t *ConstChar16Ptr::get() const { return u.cp; } +const char16_t *ConstChar16Ptr::get() const { return u_.cp; } #endif diff --git a/deps/icu-small/source/common/unicode/docmain.h b/deps/icu-small/source/common/unicode/docmain.h index 6e59f3e388..3e645aee4a 100644 --- a/deps/icu-small/source/common/unicode/docmain.h +++ b/deps/icu-small/source/common/unicode/docmain.h @@ -140,7 +140,7 @@ * <tr> * <td>Number Formatting</td> * <td>unum.h</td> - * <td>icu::NumberFormat</td> + * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td> * </tr> * <tr> * <td>Number Spellout<br/>(Rule Based Number Formatting)</td> diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h index 8d3becb7a2..082c3733a8 100644 --- a/deps/icu-small/source/common/unicode/edits.h +++ b/deps/icu-small/source/common/unicode/edits.h @@ -36,8 +36,32 @@ public: * @draft ICU 59 */ Edits() : - array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), - errorCode(U_ZERO_ERROR) {} + array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0), + errorCode_(U_ZERO_ERROR) {} + /** + * Copy constructor. + * @param other source edits + * @draft ICU 60 + */ + Edits(const Edits &other) : + array(stackArray), capacity(STACK_CAPACITY), length(other.length), + delta(other.delta), numChanges(other.numChanges), + errorCode_(other.errorCode_) { + copyArray(other); + } + /** + * Move constructor, might leave src empty. + * This object will have the same contents that the source object had. + * @param src source edits + * @draft ICU 60 + */ + Edits(Edits &&src) U_NOEXCEPT : + array(stackArray), capacity(STACK_CAPACITY), length(src.length), + delta(src.delta), numChanges(src.numChanges), + errorCode_(src.errorCode_) { + moveArray(src); + } + /** * Destructor. * @draft ICU 59 @@ -45,10 +69,28 @@ public: ~Edits(); /** + * Assignment operator. + * @param other source edits + * @return *this + * @draft ICU 60 + */ + Edits &operator=(const Edits &other); + + /** + * Move assignment operator, might leave src empty. + * This object will have the same contents that the source object had. + * The behavior is undefined if *this and src are the same object. + * @param src source edits + * @return *this + * @draft ICU 60 + */ + Edits &operator=(Edits &&src) U_NOEXCEPT; + + /** * Resets the data but may not release memory. * @draft ICU 59 */ - void reset(); + void reset() U_NOEXCEPT; /** * Adds a record for an unchanged segment of text. @@ -66,6 +108,9 @@ public: * Sets the UErrorCode if an error occurred while recording edits. * Preserves older error codes in the outErrorCode. * Normally called from inside ICU string transformation functions, not user code. + * @param outErrorCode Set to an error code if it does not contain one already + * and an error occurred while recording edits. + * Otherwise unchanged. * @return TRUE if U_FAILURE(outErrorCode) * @draft ICU 59 */ @@ -81,7 +126,13 @@ public: * @return TRUE if there are any change edits * @draft ICU 59 */ - UBool hasChanges() const; + UBool hasChanges() const { return numChanges != 0; } + + /** + * @return the number of change edits + * @draft ICU 60 + */ + int32_t numberOfChanges() const { return numChanges; } /** * Access to the list of edits. @@ -91,6 +142,15 @@ public: */ struct U_COMMON_API Iterator U_FINAL : public UMemory { /** + * Default constructor, empty iterator. + * @draft ICU 60 + */ + Iterator() : + array(nullptr), index(0), length(0), + remaining(0), onlyChanges_(FALSE), coarse(FALSE), + dir(0), changed(FALSE), oldLength_(0), newLength_(0), + srcIndex(0), replIndex(0), destIndex(0) {} + /** * Copy constructor. * @draft ICU 59 */ @@ -103,6 +163,9 @@ public: /** * Advances to the next edit. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) * @return TRUE if there is another edit * @draft ICU 59 */ @@ -121,10 +184,86 @@ public: * if the source index is out of bounds for the source string. * * @param i source index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) * @return TRUE if the edit for the source index was found * @draft ICU 59 */ - UBool findSourceIndex(int32_t i, UErrorCode &errorCode); + UBool findSourceIndex(int32_t i, UErrorCode &errorCode) { + return findIndex(i, TRUE, errorCode) == 0; + } + + /** + * Finds the edit that contains the destination index. + * The destination index may be found in a non-change + * even if normal iteration would skip non-changes. + * Normal iteration can continue from a found edit. + * + * The iterator state before this search logically does not matter. + * (It may affect the performance of the search.) + * + * The iterator state after this search is undefined + * if the source index is out of bounds for the source string. + * + * @param i destination index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return TRUE if the edit for the destination index was found + * @draft ICU 60 + */ + UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) { + return findIndex(i, FALSE, errorCode) == 0; + } + + /** + * Returns the destination index corresponding to the given source index. + * If the source index is inside a change edit (not at its start), + * then the destination index at the end of that edit is returned, + * since there is no information about index mapping inside a change edit. + * + * (This means that indexes to the start and middle of an edit, + * for example around a grapheme cluster, are mapped to indexes + * encompassing the entire edit. + * The alternative, mapping an interior index to the start, + * would map such an interval to an empty one.) + * + * This operation will usually but not always modify this object. + * The iterator state after this search is undefined. + * + * @param i source index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return destination index; undefined if i is not 0..string length + * @draft ICU 60 + */ + int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode); + + /** + * Returns the source index corresponding to the given destination index. + * If the destination index is inside a change edit (not at its start), + * then the source index at the end of that edit is returned, + * since there is no information about index mapping inside a change edit. + * + * (This means that indexes to the start and middle of an edit, + * for example around a grapheme cluster, are mapped to indexes + * encompassing the entire edit. + * The alternative, mapping an interior index to the start, + * would map such an interval to an empty one.) + * + * This operation will usually but not always modify this object. + * The iterator state after this search is undefined. + * + * @param i destination index + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return source index; undefined if i is not 0..string length + * @draft ICU 60 + */ + int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode); /** * @return TRUE if this edit replaces oldLength() units with newLength() different ones. @@ -167,15 +306,22 @@ public: Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs); int32_t readLength(int32_t head); - void updateIndexes(); + void updateNextIndexes(); + void updatePreviousIndexes(); UBool noNext(); UBool next(UBool onlyChanges, UErrorCode &errorCode); + UBool previous(UErrorCode &errorCode); + /** @return -1: error or i<0; 0: found; 1: i>=string length */ + int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode); const uint16_t *array; int32_t index, length; + // 0 if we are not within compressed equal-length changes. + // Otherwise the number of remaining changes, including the current one. int32_t remaining; UBool onlyChanges_, coarse; + int8_t dir; // iteration direction: back(<0), initial(0), forward(>0) UBool changed; int32_t oldLength_, newLength_; int32_t srcIndex, replIndex, destIndex; @@ -219,9 +365,39 @@ public: return Iterator(array, length, FALSE, FALSE); } + /** + * Merges the two input Edits and appends the result to this object. + * + * Consider two string transformations (for example, normalization and case mapping) + * where each records Edits in addition to writing an output string.<br> + * Edits ab reflect how substrings of input string a + * map to substrings of intermediate string b.<br> + * Edits bc reflect how substrings of intermediate string b + * map to substrings of output string c.<br> + * This function merges ab and bc such that the additional edits + * recorded in this object reflect how substrings of input string a + * map to substrings of output string c. + * + * If unrelated Edits are passed in where the output string of the first + * has a different length than the input string of the second, + * then a U_ILLEGAL_ARGUMENT_ERROR is reported. + * + * @param ab reflects how substrings of input string a + * map to substrings of intermediate string b. + * @param bc reflects how substrings of intermediate string b + * map to substrings of output string c. + * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test, + * or else the function returns immediately. Check for U_FAILURE() + * on output or use with function chaining. (See User Guide for details.) + * @return *this, with the merged edits appended + * @draft ICU 60 + */ + Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode); + private: - Edits(const Edits &) = delete; - Edits &operator=(const Edits &) = delete; + void releaseArray() U_NOEXCEPT; + Edits ©Array(const Edits &other); + Edits &moveArray(Edits &src) U_NOEXCEPT; void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; } int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; } @@ -234,7 +410,8 @@ private: int32_t capacity; int32_t length; int32_t delta; - UErrorCode errorCode; + int32_t numChanges; + UErrorCode errorCode_; uint16_t stackArray[STACK_CAPACITY]; }; diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h index 51bb651fba..a0319bf0a7 100644 --- a/deps/icu-small/source/common/unicode/filteredbrk.h +++ b/deps/icu-small/source/common/unicode/filteredbrk.h @@ -55,14 +55,30 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { */ static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status); +#ifndef U_HIDE_DEPRECATED_API + /** + * This function has been deprecated in favor of createEmptyInstance, which has + * identical behavior. + * @param status The error code. + * @return the new builder + * @deprecated ICU 60 use createEmptyInstance instead + * @see createEmptyInstance() + */ + static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) { + return createEmptyInstance(status); + } +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_DRAFT_API /** * Construct an empty FilteredBreakIteratorBuilder. * In this state, it will not suppress any segment boundaries. * @param status The error code. * @return the new builder - * @stable ICU 56 + * @draft ICU 60 */ - static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status); + static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status); +#endif /* U_HIDE_DRAFT_API */ /** * Suppress a certain string from being the end of a segment. @@ -89,6 +105,20 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { */ virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0; +#ifndef U_HIDE_DEPRECATED_API + /** + * This function has been deprecated in favor of wrapIteratorWithFilter() + * The behavior is identical. + * @param adoptBreakIterator the break iterator to adopt + * @param status error code + * @return the new BreakIterator, owned by the caller. + * @deprecated ICU 60 use wrapIteratorWithFilter() instead + * @see wrapBreakIteratorWithFilter() + */ + virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; +#endif /* U_HIDE_DEPRECATED_API */ + +#ifndef U_HIDE_DRAFT_API /** * Wrap (adopt) an existing break iterator in a new filtered instance. * The resulting BreakIterator is owned by the caller. @@ -96,12 +126,16 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject { * Note that the adoptBreakIterator is adopted by the new BreakIterator * and should no longer be used by the caller. * The FilteredBreakIteratorBuilder may be reused. + * This function is an alias for build() * @param adoptBreakIterator the break iterator to adopt * @param status error code * @return the new BreakIterator, owned by the caller. - * @stable ICU 56 + * @draft ICU 60 */ - virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0; + inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) { + return build(adoptBreakIterator, status); + } +#endif /* U_HIDE_DRAFT_API */ protected: /** diff --git a/deps/icu-small/source/common/unicode/localpointer.h b/deps/icu-small/source/common/unicode/localpointer.h index 3ab820188f..e17ee3d886 100644 --- a/deps/icu-small/source/common/unicode/localpointer.h +++ b/deps/icu-small/source/common/unicode/localpointer.h @@ -213,7 +213,6 @@ public: errorCode=U_MEMORY_ALLOCATION_ERROR; } } -#if U_HAVE_RVALUE_REFERENCES /** * Move constructor, leaves src with isNull(). * @param src source smart pointer @@ -222,7 +221,6 @@ public: LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { src.ptr=NULL; } -#endif /** * Destructor deletes the object it owns. * @stable ICU 4.4 @@ -230,7 +228,6 @@ public: ~LocalPointer() { delete LocalPointerBase<T>::ptr; } -#if U_HAVE_RVALUE_REFERENCES /** * Move assignment operator, leaves src with isNull(). * The behavior is undefined if *this and src are the same object. @@ -241,7 +238,6 @@ public: LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT { return moveFrom(src); } -#endif // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API /** * Move assignment, leaves src with isNull(). @@ -362,7 +358,6 @@ public: errorCode=U_MEMORY_ALLOCATION_ERROR; } } -#if U_HAVE_RVALUE_REFERENCES /** * Move constructor, leaves src with isNull(). * @param src source smart pointer @@ -371,7 +366,6 @@ public: LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) { src.ptr=NULL; } -#endif /** * Destructor deletes the array it owns. * @stable ICU 4.4 @@ -379,7 +373,6 @@ public: ~LocalArray() { delete[] LocalPointerBase<T>::ptr; } -#if U_HAVE_RVALUE_REFERENCES /** * Move assignment operator, leaves src with isNull(). * The behavior is undefined if *this and src are the same object. @@ -390,7 +383,6 @@ public: LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT { return moveFrom(src); } -#endif // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API /** * Move assignment, leaves src with isNull(). @@ -492,7 +484,6 @@ public: * @see LocalPointer * @stable ICU 4.4 */ -#if U_HAVE_RVALUE_REFERENCES #define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ class LocalPointerClassName : public LocalPointerBase<Type> { \ public: \ @@ -526,34 +517,6 @@ public: ptr=p; \ } \ } -#else -#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \ - class LocalPointerClassName : public LocalPointerBase<Type> { \ - public: \ - using LocalPointerBase<Type>::operator*; \ - using LocalPointerBase<Type>::operator->; \ - explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \ - ~LocalPointerClassName() { closeFunction(ptr); } \ - LocalPointerClassName &moveFrom(LocalPointerClassName &src) U_NOEXCEPT { \ - if (ptr != NULL) { closeFunction(ptr); } \ - LocalPointerBase<Type>::ptr=src.ptr; \ - src.ptr=NULL; \ - return *this; \ - } \ - void swap(LocalPointerClassName &other) U_NOEXCEPT { \ - Type *temp=LocalPointerBase<Type>::ptr; \ - LocalPointerBase<Type>::ptr=other.ptr; \ - other.ptr=temp; \ - } \ - friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \ - p1.swap(p2); \ - } \ - void adoptInstead(Type *p) { \ - if (ptr != NULL) { closeFunction(ptr); } \ - ptr=p; \ - } \ - } -#endif U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h index 37a34f7140..c752344f33 100644 --- a/deps/icu-small/source/common/unicode/locid.h +++ b/deps/icu-small/source/common/unicode/locid.h @@ -88,7 +88,7 @@ class UnicodeString; * <P> * The third constructor requires a third argument--the <STRONG>Variant.</STRONG> * The Variant codes are vendor and browser-specific. - * For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX. + * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX. * Where there are two variants, separate them with an underscore, and * put the most important one first. For * example, a Traditional Spanish collation might be referenced, with diff --git a/deps/icu-small/source/common/unicode/normalizer2.h b/deps/icu-small/source/common/unicode/normalizer2.h index d326da948a..8a6d713802 100644 --- a/deps/icu-small/source/common/unicode/normalizer2.h +++ b/deps/icu-small/source/common/unicode/normalizer2.h @@ -28,12 +28,15 @@ #if !UCONFIG_NO_NORMALIZATION +#include "unicode/stringpiece.h" #include "unicode/uniset.h" #include "unicode/unistr.h" #include "unicode/unorm2.h" U_NAMESPACE_BEGIN +class ByteSink; + /** * Unicode normalization functionality for standard Unicode normalization or * for using custom mapping tables. @@ -215,6 +218,35 @@ public: normalize(const UnicodeString &src, UnicodeString &dest, UErrorCode &errorCode) const = 0; + + /** + * Normalizes a UTF-8 string and optionally records how source substrings + * relate to changed and unchanged result substrings. + * + * Currently implemented completely only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * Otherwise currently converts to & from UTF-16 and does not support edits. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src Source UTF-8 string. + * @param sink A ByteSink to which the normalized UTF-8 result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @draft ICU 60 + */ + virtual void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const; + /** * Appends the normalized form of the second string to the first string * (merging them at the boundary) and returns the first string. @@ -340,6 +372,30 @@ public: */ virtual UBool isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0; + /** + * Tests if the UTF-8 string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * + * This works for all normalization modes, + * but it is currently optimized for UTF-8 only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * For other modes it currently converts to UTF-16 and calls isNormalized(). + * + * @param s UTF-8 input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return TRUE if s is normalized + * @draft ICU 60 + */ + virtual UBool + isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const; + /** * Tests if the string is normalized. @@ -479,7 +535,36 @@ public: virtual UnicodeString & normalize(const UnicodeString &src, UnicodeString &dest, - UErrorCode &errorCode) const; + UErrorCode &errorCode) const U_OVERRIDE; + + /** + * Normalizes a UTF-8 string and optionally records how source substrings + * relate to changed and unchanged result substrings. + * + * Currently implemented completely only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * Otherwise currently converts to & from UTF-16 and does not support edits. + * + * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET. + * @param src Source UTF-8 string. + * @param sink A ByteSink to which the normalized UTF-8 result string is written. + * sink.Flush() is called at the end. + * @param edits Records edits for index mapping, working with styled text, + * and getting only changes (if any). + * The Edits contents is undefined if any error occurs. + * This function calls edits->reset() first unless + * options includes U_EDITS_NO_RESET. edits can be nullptr. + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @draft ICU 60 + */ + virtual void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE; + /** * Appends the normalized form of the second string to the first string * (merging them at the boundary) and returns the first string. @@ -497,7 +582,7 @@ public: virtual UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const; + UErrorCode &errorCode) const U_OVERRIDE; /** * Appends the second string to the first string * (merging them at the boundary) and returns the first string. @@ -515,7 +600,7 @@ public: virtual UnicodeString & append(UnicodeString &first, const UnicodeString &second, - UErrorCode &errorCode) const; + UErrorCode &errorCode) const U_OVERRIDE; /** * Gets the decomposition mapping of c. @@ -529,7 +614,7 @@ public: * @stable ICU 4.6 */ virtual UBool - getDecomposition(UChar32 c, UnicodeString &decomposition) const; + getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE; /** * Gets the raw decomposition mapping of c. @@ -543,7 +628,7 @@ public: * @stable ICU 49 */ virtual UBool - getRawDecomposition(UChar32 c, UnicodeString &decomposition) const; + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE; /** * Performs pairwise composition of a & b and returns the composite if there is one. @@ -556,7 +641,7 @@ public: * @stable ICU 49 */ virtual UChar32 - composePair(UChar32 a, UChar32 b) const; + composePair(UChar32 a, UChar32 b) const U_OVERRIDE; /** * Gets the combining class of c. @@ -567,7 +652,7 @@ public: * @stable ICU 49 */ virtual uint8_t - getCombiningClass(UChar32 c) const; + getCombiningClass(UChar32 c) const U_OVERRIDE; /** * Tests if the string is normalized. @@ -581,7 +666,30 @@ public: * @stable ICU 4.4 */ virtual UBool - isNormalized(const UnicodeString &s, UErrorCode &errorCode) const; + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; + /** + * Tests if the UTF-8 string is normalized. + * Internally, in cases where the quickCheck() method would return "maybe" + * (which is only possible for the two COMPOSE modes) this method + * resolves to "yes" or "no" to provide a definitive result, + * at the cost of doing more work in those cases. + * + * This works for all normalization modes, + * but it is currently optimized for UTF-8 only for "compose" modes, + * such as for NFC, NFKC, and NFKC_Casefold + * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS). + * For other modes it currently converts to UTF-16 and calls isNormalized(). + * + * @param s UTF-8 input string + * @param errorCode Standard ICU error code. Its input value must + * pass the U_SUCCESS() test, or else the function returns + * immediately. Check for U_FAILURE() on output or use with + * function chaining. (See User Guide for details.) + * @return TRUE if s is normalized + * @draft ICU 60 + */ + virtual UBool + isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE; /** * Tests if the string is normalized. * For details see the Normalizer2 base class documentation. @@ -594,7 +702,7 @@ public: * @stable ICU 4.4 */ virtual UNormalizationCheckResult - quickCheck(const UnicodeString &s, UErrorCode &errorCode) const; + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; /** * Returns the end of the normalized substring of the input string. * For details see the Normalizer2 base class documentation. @@ -607,7 +715,7 @@ public: * @stable ICU 4.4 */ virtual int32_t - spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const; + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE; /** * Tests if the character always has a normalization boundary before it, @@ -617,7 +725,7 @@ public: * @return TRUE if c has a normalization boundary before it * @stable ICU 4.4 */ - virtual UBool hasBoundaryBefore(UChar32 c) const; + virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE; /** * Tests if the character always has a normalization boundary after it, @@ -627,7 +735,7 @@ public: * @return TRUE if c has a normalization boundary after it * @stable ICU 4.4 */ - virtual UBool hasBoundaryAfter(UChar32 c) const; + virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE; /** * Tests if the character is normalization-inert. @@ -636,7 +744,7 @@ public: * @return TRUE if c is normalization-inert * @stable ICU 4.4 */ - virtual UBool isInert(UChar32 c) const; + virtual UBool isInert(UChar32 c) const U_OVERRIDE; private: UnicodeString & normalize(const UnicodeString &src, @@ -644,6 +752,12 @@ private: USetSpanCondition spanCondition, UErrorCode &errorCode) const; + void + normalizeUTF8(uint32_t options, const char *src, int32_t length, + ByteSink &sink, Edits *edits, + USetSpanCondition spanCondition, + UErrorCode &errorCode) const; + UnicodeString & normalizeSecondAndAppend(UnicodeString &first, const UnicodeString &second, diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h index 23b9464c65..12e2929d24 100644 --- a/deps/icu-small/source/common/unicode/platform.h +++ b/deps/icu-small/source/common/unicode/platform.h @@ -132,6 +132,8 @@ #define U_PF_BROWSER_NATIVE_CLIENT 4020 /** Android is based on Linux. @internal */ #define U_PF_ANDROID 4050 +/** Fuchsia is a POSIX-ish platform. @internal */ +#define U_PF_FUCHSIA 4100 /* Maximum value for Linux-based platform is 4499 */ /** z/OS is the successor to OS/390 which was the successor to MVS. @internal */ #define U_PF_OS390 9000 @@ -152,6 +154,8 @@ # include <android/api-level.h> #elif defined(__pnacl__) || defined(__native_client__) # define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT +#elif defined(__Fuchsia__) +# define U_PLATFORM U_PF_FUCHSIA #elif defined(linux) || defined(__linux__) || defined(__linux) # define U_PLATFORM U_PF_LINUX #elif defined(__APPLE__) && defined(__MACH__) @@ -193,6 +197,20 @@ #endif /** + * \def UPRV_INCOMPLETE_CPP11_SUPPORT + * This switch turns off ICU 60 NumberFormatter code. + * By default, this switch is enabled on AIX and z/OS, + * which have poor C++11 support. + * + * NOTE: This switch is intended to be temporary; see #13393. + * + * @internal + */ +#ifndef UPRV_INCOMPLETE_CPP11_SUPPORT +# define UPRV_INCOMPLETE_CPP11_SUPPORT (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_SOLARIS ) +#endif + +/** * \def CYGWINMSVC * Defined if this is Windows with Cygwin, but using MSVC rather than gcc. * Otherwise undefined. @@ -330,31 +348,6 @@ # define U_HAVE_INTTYPES_H U_HAVE_STDINT_H #endif -/** - * \def U_IOSTREAM_SOURCE - * Defines what support for C++ streams is available. - * - * If U_IOSTREAM_SOURCE is set to 199711, then <iostream> is available - * (the ISO/IEC C++ FDIS was published in November 1997), and then - * one should qualify streams using the std namespace in ICU header - * files. - * Starting with ICU 49, this is the only supported version. - * - * If U_IOSTREAM_SOURCE is set to 198506, then <iostream.h> is - * available instead (in June 1985 Stroustrup published - * "An Extensible I/O Facility for C++" at the summer USENIX conference). - * Starting with ICU 49, this version is not supported any more. - * - * If U_IOSTREAM_SOURCE is 0 (or any value less than 199711), - * then C++ streams are not available and - * support for them will be silently suppressed in ICU. - * - * @internal - */ -#ifndef U_IOSTREAM_SOURCE -#define U_IOSTREAM_SOURCE 199711 -#endif - /*===========================================================================*/ /** @{ Compiler and environment features */ /*===========================================================================*/ @@ -506,22 +499,6 @@ namespace std { #endif /** - * \def U_HAVE_RVALUE_REFERENCES - * Set to 1 if the compiler supports rvalue references. - * C++11 feature, necessary for move constructor & move assignment. - * @internal - */ -#ifdef U_HAVE_RVALUE_REFERENCES - /* Use the predefined value. */ -#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_rvalue_references) \ - || defined(__GXX_EXPERIMENTAL_CXX0X__) \ - || (defined(_MSC_VER) && _MSC_VER >= 1600) /* Visual Studio 2010 */ -# define U_HAVE_RVALUE_REFERENCES 1 -#else -# define U_HAVE_RVALUE_REFERENCES 0 -#endif - -/** * \def U_NOEXCEPT * "noexcept" if supported, otherwise empty. * Some code, especially STL containers, uses move semantics of objects only @@ -871,6 +848,16 @@ namespace std { # define U_CALLCONV U_EXPORT2 #endif +/** + * \def U_CALLCONV_FPTR + * Similar to U_CALLCONV, but only used on function pointers. + * @internal + */ +#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus) +# define U_CALLCONV_FPTR U_CALLCONV +#else +# define U_CALLCONV_FPTR +#endif /* @} */ #endif diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h index d654154008..c3c201dd35 100644 --- a/deps/icu-small/source/common/unicode/rbbi.h +++ b/deps/icu-small/source/common/unicode/rbbi.h @@ -31,23 +31,14 @@ #include "unicode/schriter.h" #include "unicode/uchriter.h" - -struct UTrie; - U_NAMESPACE_BEGIN /** @internal */ +class LanguageBreakEngine; struct RBBIDataHeader; -class RuleBasedBreakIteratorTables; -class BreakIterator; class RBBIDataWrapper; -class UStack; -class LanguageBreakEngine; class UnhandledEngine; -struct RBBIStateTable; - - - +class UStack; /** * @@ -96,47 +87,49 @@ private: */ RBBIDataWrapper *fData; - /** Index of the Rule {tag} values for the most recent match. + /** + * The iteration state - current position, rule status for the current position, + * and whether the iterator ran off the end, yielding UBRK_DONE. + * Current position is pinned to be 0 < position <= text.length. + * Current position is always set to a boundary. * @internal */ - int32_t fLastRuleStatusIndex; + /** + * The current position of the iterator. Pinned, 0 < fPosition <= text.length. + * Never has the value UBRK_DONE (-1). + */ + int32_t fPosition; /** - * Rule tag value valid flag. - * Some iterator operations don't intrinsically set the correct tag value. - * This flag lets us lazily compute the value if we are ever asked for it. - * @internal - */ - UBool fLastStatusIndexValid; + * TODO: + */ + int32_t fRuleStatusIndex; /** - * Counter for the number of characters encountered with the "dictionary" - * flag set. - * @internal - */ - uint32_t fDictionaryCharCount; + * True when iteration has run off the end, and iterator functions should return UBRK_DONE. + */ + UBool fDone; /** - * When a range of characters is divided up using the dictionary, the break - * positions that are discovered are stored here, preventing us from having - * to use either the dictionary or the state table again until the iterator - * leaves this range of text. Has the most impact for line breaking. - * @internal + * Cache of previously determined boundary positions. */ - int32_t* fCachedBreakPositions; - + public: // TODO: debug, return to private. + class BreakCache; + BreakCache *fBreakCache; + private: /** - * The number of elements in fCachedBreakPositions + * Counter for the number of characters encountered with the "dictionary" + * flag set. * @internal */ - int32_t fNumCachedBreakPositions; + uint32_t fDictionaryCharCount; /** - * if fCachedBreakPositions is not null, this indicates which item in the - * cache the current iteration position refers to - * @internal + * Cache of boundary positions within a region of text that has been + * sub-divided by dictionary based breaking. */ - int32_t fPositionInCache; + class DictionaryCache; + DictionaryCache *fDictionaryCache; /** * @@ -179,13 +172,11 @@ private: */ RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status); - + /** @internal */ friend class RBBIRuleBuilder; /** @internal */ friend class BreakIterator; - - public: /** Default constructor. Creates an empty shell of an iterator, with no @@ -469,7 +460,10 @@ public: virtual UBool isBoundary(int32_t offset); /** - * Returns the current iteration position. + * Returns the current iteration position. Note that UBRK_DONE is never + * returned from this function; if iteration has run to the end of a + * string, current() will return the length of the string while + * next() will return UBRK_DONE). * @return The current iteration position. * @stable ICU 2.0 */ @@ -501,6 +495,7 @@ public: * Note: this function is not thread safe. It should not have been * declared const, and the const remains only for compatibility * reasons. (The function is logically const, but not bit-wise const). + * TODO: check this. Probably thread safe now. * <p> * @return the status from the break rule that determined the most recently * returned break position. @@ -660,46 +655,31 @@ private: * Common initialization function, used by constructors and bufferClone. * @internal */ - void init(); - - /** - * This method backs the iterator back up to a "safe position" in the text. - * This is a position that we know, without any context, must be a break position. - * The various calling methods then iterate forward from this safe position to - * the appropriate position to return. (For more information, see the description - * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.) - * @param statetable state table used of moving backwards - * @internal - */ - int32_t handlePrevious(const RBBIStateTable *statetable); + void init(UErrorCode &status); /** - * This method is the actual implementation of the next() method. All iteration - * vectors through here. This method initializes the state machine to state 1 - * and advances through the text character by character until we reach the end - * of the text or the state machine transitions to state 0. We update our return - * value every time the state machine passes through a possible end state. - * @param statetable state table used of moving forwards + * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules. + * This locates a "Safe Position" from which the forward break rules + * will operate correctly. A Safe Position is not necessarily a boundary itself. + * + * @param fromPosition the position in the input text to begin the iteration. * @internal */ - int32_t handleNext(const RBBIStateTable *statetable); - + int32_t handlePrevious(int32_t fromPosition); /** - * This is the function that actually implements dictionary-based - * breaking. Covering at least the range from startPos to endPos, - * it checks for dictionary characters, and if it finds them determines - * the appropriate object to deal with them. It may cache found breaks in - * fCachedBreakPositions as it goes. It may well also look at text outside - * the range startPos to endPos. - * If going forward, endPos is the normal Unicode break result, and - * if goind in reverse, startPos is the normal Unicode break result - * @param startPos The start position of a range of text - * @param endPos The end position of a range of text - * @param reverse The call is for the reverse direction + * Find a rule-based boundary by running the state machine. + * Input + * fPosition, the position in the text to begin from. + * Output + * fPosition: the boundary following the starting position. + * fDictionaryCharCount the number of dictionary characters encountered. + * If > 0, the segment will be further subdivided + * fRuleStatusIndex Info from the state table indicating which rules caused the boundary. + * * @internal */ - int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse); + int32_t handleNext(); /** @@ -710,11 +690,14 @@ private: */ const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c); + public: +#ifndef U_HIDE_INTERNAL_API /** - * @internal + * Debugging function only. + * @internal */ - void makeRuleStatusValid(); - + void dumpCache(); +#endif /* U_HIDE_INTERNAL_API */ }; //------------------------------------------------------------------------------ diff --git a/deps/icu-small/source/common/unicode/simpleformatter.h b/deps/icu-small/source/common/unicode/simpleformatter.h index 26eae01525..850949caaf 100644 --- a/deps/icu-small/source/common/unicode/simpleformatter.h +++ b/deps/icu-small/source/common/unicode/simpleformatter.h @@ -21,6 +21,13 @@ U_NAMESPACE_BEGIN +// Forward declaration: +namespace number { +namespace impl { +class SimpleModifier; +} +} + /** * Formats simple patterns like "{1} was born in {0}". * Minimal subset of MessageFormat; fast, simple, minimal dependencies. @@ -286,6 +293,9 @@ private: UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue, int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode); + + // Give access to internals to SimpleModifier for number formatting + friend class number::impl::SimpleModifier; }; U_NAMESPACE_END diff --git a/deps/icu-small/source/common/unicode/stringoptions.h b/deps/icu-small/source/common/unicode/stringoptions.h new file mode 100644 index 0000000000..f2de96e963 --- /dev/null +++ b/deps/icu-small/source/common/unicode/stringoptions.h @@ -0,0 +1,198 @@ +// © 2017 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html + +// stringoptions.h +// created: 2017jun08 Markus W. Scherer + +#ifndef __STRINGOPTIONS_H__ +#define __STRINGOPTIONS_H__ + +#include "unicode/utypes.h" + +/** + * \file + * \brief C API: Bit set option bit constants for various string and character processing functions. + */ + +/** + * Option value for case folding: Use default mappings defined in CaseFolding.txt. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_DEFAULT 0 + +/** + * Option value for case folding: + * + * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I + * and dotless i appropriately for Turkic languages (tr, az). + * + * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that + * are to be included for default mappings and + * excluded for the Turkic-specific mappings. + * + * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that + * are to be excluded for default mappings and + * included for the Turkic-specific mappings. + * + * @stable ICU 2.0 + */ +#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 + +#ifndef U_HIDE_DRAFT_API + +/** + * Titlecase the string as a whole rather than each word. + * (Titlecase only the character at index 0, possibly adjusted.) + * Option bits value for titlecasing APIs that take an options bit set. + * + * It is an error to specify multiple titlecasing iterator options together, + * including both an options bit and an explicit BreakIterator. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @draft ICU 60 + */ +#define U_TITLECASE_WHOLE_STRING 0x20 + +/** + * Titlecase sentences rather than words. + * (Titlecase only the first character of each sentence, possibly adjusted.) + * Option bits value for titlecasing APIs that take an options bit set. + * + * It is an error to specify multiple titlecasing iterator options together, + * including both an options bit and an explicit BreakIterator. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @draft ICU 60 + */ +#define U_TITLECASE_SENTENCES 0x40 + +#endif // U_HIDE_DRAFT_API + +/** + * Do not lowercase non-initial parts of words when titlecasing. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will titlecase the character at each + * (possibly adjusted) BreakIterator index and + * lowercase all other characters up to the next iterator index. + * With this option, the other characters will not be modified. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @see UnicodeString::toTitle + * @see CaseMap::toTitle + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_LOWERCASE 0x100 + +/** + * Do not adjust the titlecasing BreakIterator indexes; + * titlecase exactly the characters at breaks from the iterator. + * Option bit for titlecasing APIs that take an options bit set. + * + * By default, titlecasing will take each break iterator index, + * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED), + * and titlecase that one. + * + * Other characters are lowercased. + * + * It is an error to specify multiple titlecasing adjustment options together. + * + * @see U_TITLECASE_ADJUST_TO_CASED + * @see U_TITLECASE_NO_LOWERCASE + * @see UnicodeString::toTitle + * @see CaseMap::toTitle + * @see ucasemap_setOptions + * @see ucasemap_toTitle + * @see ucasemap_utf8ToTitle + * @stable ICU 3.8 + */ +#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 + +#ifndef U_HIDE_DRAFT_API + +/** + * Adjust each titlecasing BreakIterator index to the next cased character. + * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).) + * Option bit for titlecasing APIs that take an options bit set. + * + * This used to be the default index adjustment in ICU. + * Since ICU 60, the default index adjustment is to the next character that is + * a letter, number, symbol, or private use code point. + * (Uncased modifier letters are skipped.) + * The difference in behavior is small for word titlecasing, + * but the new adjustment is much better for whole-string and sentence titlecasing: + * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»". + * + * It is an error to specify multiple titlecasing adjustment options together. + * + * @see U_TITLECASE_NO_BREAK_ADJUSTMENT + * @draft ICU 60 + */ +#define U_TITLECASE_ADJUST_TO_CASED 0x400 + +/** + * Option for string transformation functions to not first reset the Edits object. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @draft ICU 60 + */ +#define U_EDITS_NO_RESET 0x2000 + +/** + * Omit unchanged text when recording how source substrings + * relate to changed and unchanged result substrings. + * Used for example in some case-mapping and normalization functions. + * + * @see CaseMap + * @see Edits + * @see Normalizer2 + * @draft ICU 60 + */ +#define U_OMIT_UNCHANGED_TEXT 0x4000 + +#endif // U_HIDE_DRAFT_API + +/** + * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: + * Compare strings in code point order instead of code unit order. + * @stable ICU 2.2 + */ +#define U_COMPARE_CODE_POINT_ORDER 0x8000 + +/** + * Option bit for unorm_compare: + * Perform case-insensitive comparison. + * @stable ICU 2.2 + */ +#define U_COMPARE_IGNORE_CASE 0x10000 + +/** + * Option bit for unorm_compare: + * Both input strings are assumed to fulfill FCD conditions. + * @stable ICU 2.2 + */ +#define UNORM_INPUT_IS_FCD 0x20000 + +// Related definitions elsewhere. +// Options that are not meaningful in the same functions +// can share the same bits. +// +// Public: +// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20 +// +// Internal: (may change or be removed) +// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff +// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7 +// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0 +// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600 +// ustr_imp.h #define _STRNCMP_STYLE 0x1000 +// unormcmp.cpp #define _COMPARE_EQUIV 0x80000 + +#endif // __STRINGOPTIONS_H__ diff --git a/deps/icu-small/source/common/unicode/stringtriebuilder.h b/deps/icu-small/source/common/unicode/stringtriebuilder.h index d1ac003c48..8d2b229413 100644 --- a/deps/icu-small/source/common/unicode/stringtriebuilder.h +++ b/deps/icu-small/source/common/unicode/stringtriebuilder.h @@ -256,7 +256,7 @@ protected: /** @internal */ class FinalValueNode : public Node { public: - FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {} + FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {} virtual UBool operator==(const Node &other) const; virtual void write(StringTrieBuilder &builder); protected: @@ -276,7 +276,7 @@ protected: void setValue(int32_t v) { hasValue=TRUE; value=v; - hash=hash*37+v; + hash=hash*37u+v; } protected: UBool hasValue; @@ -290,7 +290,7 @@ protected: class IntermediateValueNode : public ValueNode { public: IntermediateValueNode(int32_t v, Node *nextNode) - : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); } + : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); } virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); virtual void write(StringTrieBuilder &builder); @@ -307,7 +307,7 @@ protected: class LinearMatchNode : public ValueNode { public: LinearMatchNode(int32_t len, Node *nextNode) - : ValueNode((0x333333*37+len)*37+hashCode(nextNode)), + : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)), length(len), next(nextNode) {} virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); @@ -342,7 +342,7 @@ protected: equal[length]=NULL; values[length]=value; ++length; - hash=(hash*37+c)*37+value; + hash=(hash*37u+c)*37u+value; } // Adds a unit which leads to another match node. void add(int32_t c, Node *node) { @@ -350,7 +350,7 @@ protected: equal[length]=node; values[length]=0; ++length; - hash=(hash*37+c)*37+hashCode(node); + hash=(hash*37u+c)*37u+hashCode(node); } protected: Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". @@ -365,8 +365,8 @@ protected: class SplitBranchNode : public BranchNode { public: SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) - : BranchNode(((0x555555*37+middleUnit)*37+ - hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), + : BranchNode(((0x555555u*37u+middleUnit)*37u+ + hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)), unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); @@ -382,7 +382,7 @@ protected: class BranchHeadNode : public ValueNode { public: BranchHeadNode(int32_t len, Node *subNode) - : ValueNode((0x666666*37+len)*37+hashCode(subNode)), + : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)), length(len), next(subNode) {} virtual UBool operator==(const Node &other) const; virtual int32_t markRightEdgesFirst(int32_t edgeNumber); diff --git a/deps/icu-small/source/common/unicode/ubiditransform.h b/deps/icu-small/source/common/unicode/ubiditransform.h index 724587dddc..627b005ed4 100644 --- a/deps/icu-small/source/common/unicode/ubiditransform.h +++ b/deps/icu-small/source/common/unicode/ubiditransform.h @@ -23,8 +23,6 @@ #include "unicode/uchar.h" #include "unicode/localpointer.h" -#ifndef U_HIDE_DRAFT_API - /** * \file * \brief Bidi Transformations @@ -60,17 +58,17 @@ * @see UBIDI_REORDER_DEFAULT * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT * @see UBIDI_REORDER_RUNS_ONLY - * @draft ICU 58 + * @stable ICU 58 */ typedef enum { /** 0: Constant indicating a logical order. * This is the default for input text. - * @draft ICU 58 + * @stable ICU 58 */ UBIDI_LOGICAL = 0, /** 1: Constant indicating a visual order. * This is a default for output text. - * @draft ICU 58 + * @stable ICU 58 */ UBIDI_VISUAL } UBiDiOrder; @@ -83,20 +81,20 @@ typedef enum { * @see ubidi_setReorderingOptions * @see ubidi_writeReordered * @see ubidi_writeReverse - * @draft ICU 58 + * @stable ICU 58 */ typedef enum { /** 0: Constant indicating that character mirroring should not be * performed. * This is the default. - * @draft ICU 58 + * @stable ICU 58 */ UBIDI_MIRRORING_OFF = 0, /** 1: Constant indicating that character mirroring should be performed. * This corresponds to calling <code>ubidi_writeReordered</code> or * <code>ubidi_writeReverse</code> with the * <code>UBIDI_DO_MIRRORING</code> option bit set. - * @draft ICU 58 + * @stable ICU 58 */ UBIDI_MIRRORING_ON } UBiDiMirroring; @@ -104,7 +102,7 @@ typedef enum { /** * Forward declaration of the <code>UBiDiTransform</code> structure that stores * information used by the layout transformation engine. - * @draft ICU 58 + * @stable ICU 58 */ typedef struct UBiDiTransform UBiDiTransform; @@ -240,9 +238,9 @@ typedef struct UBiDiTransform UBiDiTransform; * @see UBiDiMirroring * @see ubidi_setPara * @see u_shapeArabic - * @draft ICU 58 + * @stable ICU 58 */ -U_DRAFT uint32_t U_EXPORT2 +U_STABLE uint32_t U_EXPORT2 ubiditransform_transform(UBiDiTransform *pBiDiTransform, const UChar *src, int32_t srcLength, UChar *dest, int32_t destSize, @@ -286,16 +284,16 @@ ubiditransform_transform(UBiDiTransform *pBiDiTransform, * <code>ubiditransform_close()</code>. * * @return An empty <code>UBiDiTransform</code> object. - * @draft ICU 58 + * @stable ICU 58 */ -U_DRAFT UBiDiTransform* U_EXPORT2 +U_STABLE UBiDiTransform* U_EXPORT2 ubiditransform_open(UErrorCode *pErrorCode); /** * Deallocates the given <code>UBiDiTransform</code> object. - * @draft ICU 58 + * @stable ICU 58 */ -U_DRAFT void U_EXPORT2 +U_STABLE void U_EXPORT2 ubiditransform_close(UBiDiTransform *pBidiTransform); #if U_SHOW_CPLUSPLUS_API @@ -309,7 +307,7 @@ U_NAMESPACE_BEGIN * * @see LocalPointerBase * @see LocalPointer - * @draft ICU 58 + * @stable ICU 58 */ U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close); @@ -317,5 +315,4 @@ U_NAMESPACE_END #endif -#endif /* U_HIDE_DRAFT_API */ #endif diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h index 22a4b99cd6..600328c49c 100644 --- a/deps/icu-small/source/common/unicode/ubrk.h +++ b/deps/icu-small/source/common/unicode/ubrk.h @@ -230,7 +230,8 @@ typedef enum USentenceBreakTag { * @param locale The locale specifying the text-breaking conventions. Note that * locale keys such as "lb" and "ss" may be used to modify text break behavior, * see general discussion of BreakIterator C API. - * @param text The text to be iterated over. + * @param text The text to be iterated over. May be null, in which case ubrk_setText() is + * used to specify the text to be iterated. * @param textLength The number of characters in text, or -1 if null-terminated. * @param status A UErrorCode to receive any errors. * @return A UBreakIterator for the specified locale. diff --git a/deps/icu-small/source/common/unicode/ucasemap.h b/deps/icu-small/source/common/unicode/ucasemap.h index 18e6c2ba0b..6b253e3d63 100644 --- a/deps/icu-small/source/common/unicode/ucasemap.h +++ b/deps/icu-small/source/common/unicode/ucasemap.h @@ -23,6 +23,7 @@ #include "unicode/utypes.h" #include "unicode/localpointer.h" +#include "unicode/stringoptions.h" #include "unicode/ustring.h" /** @@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode); U_STABLE void U_EXPORT2 ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode); -/** - * Do not lowercase non-initial parts of words when titlecasing. - * Option bit for titlecasing APIs that take an options bit set. - * - * By default, titlecasing will titlecase the first cased character - * of a word and lowercase all other characters. - * With this option, the other characters will not be modified. - * - * @see ucasemap_setOptions - * @see ucasemap_toTitle - * @see ucasemap_utf8ToTitle - * @see UnicodeString::toTitle - * @stable ICU 3.8 - */ -#define U_TITLECASE_NO_LOWERCASE 0x100 - -/** - * Do not adjust the titlecasing indexes from BreakIterator::next() indexes; - * titlecase exactly the characters at breaks from the iterator. - * Option bit for titlecasing APIs that take an options bit set. - * - * By default, titlecasing will take each break iterator index, - * adjust it by looking for the next cased character, and titlecase that one. - * Other characters are lowercased. - * - * This follows Unicode 4 & 5 section 3.13 Default Case Operations: - * - * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex - * #29, "Text Boundaries." Between each pair of word boundaries, find the first - * cased character F. If F exists, map F to default_title(F); then map each - * subsequent character C to default_lower(C). - * - * @see ucasemap_setOptions - * @see ucasemap_toTitle - * @see ucasemap_utf8ToTitle - * @see UnicodeString::toTitle - * @see U_TITLECASE_NO_LOWERCASE - * @stable ICU 3.8 - */ -#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200 - -/** - * Omit unchanged text when case-mapping with Edits. - * - * @see CaseMap - * @see Edits - * @draft ICU 59 - */ -#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000 - #if !UCONFIG_NO_BREAK_ITERATION /** @@ -251,7 +202,7 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode * The standard titlecase iterator for the root locale implements the * algorithm of Unicode TR 21. * - * This function uses only the setUText(), first(), next() and close() methods of the + * This function uses only the setText(), first() and next() methods of the * provided break iterator. * * The result may be longer or shorter than the original. diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h index 8174ca23e6..3613374d9a 100644 --- a/deps/icu-small/source/common/unicode/uchar.h +++ b/deps/icu-small/source/common/unicode/uchar.h @@ -26,6 +26,7 @@ #define UCHAR_H #include "unicode/utypes.h" +#include "unicode/stringoptions.h" U_CDECL_BEGIN @@ -41,7 +42,7 @@ U_CDECL_BEGIN * @see u_getUnicodeVersion * @stable ICU 2.0 */ -#define U_UNICODE_VERSION "9.0" +#define U_UNICODE_VERSION "10.0" /** * \file @@ -148,8 +149,9 @@ U_CDECL_BEGIN * * The properties APIs are intended to reflect Unicode properties as defined * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR). - * For details about the properties see http://www.unicode.org/ucd/ . - * For names of Unicode properties see the UCD file PropertyAliases.txt. + * + * For details about the properties see + * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/). * * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2, * then properties marked with "new in Unicode 3.2" are not or not fully available. @@ -427,12 +429,29 @@ typedef enum UProperty { * @stable ICU 57 */ UCHAR_EMOJI_MODIFIER_BASE=60, + /** + * Binary property Emoji_Component. + * See http://www.unicode.org/reports/tr51/#Emoji_Properties + * + * @stable ICU 60 + */ + UCHAR_EMOJI_COMPONENT=61, + /** + * Binary property Regional_Indicator. + * @stable ICU 60 + */ + UCHAR_REGIONAL_INDICATOR=62, + /** + * Binary property Prepended_Concatenation_Mark. + * @stable ICU 60 + */ + UCHAR_PREPENDED_CONCATENATION_MARK=63, #ifndef U_HIDE_DEPRECATED_API /** * One more than the last constant for binary Unicode properties. * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UCHAR_BINARY_LIMIT=61, + UCHAR_BINARY_LIMIT, #endif // U_HIDE_DEPRECATED_API /** Enumerated property Bidi_Class. @@ -1647,6 +1666,23 @@ enum UBlockCode { /** @stable ICU 58 */ UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/ + // New blocks in Unicode 10.0 + + /** @stable ICU 60 */ + UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/ + /** @stable ICU 60 */ + UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/ + /** @stable ICU 60 */ + UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/ + /** @stable ICU 60 */ + UBLOCK_NUSHU = 277, /*[1B170]*/ + /** @stable ICU 60 */ + UBLOCK_SOYOMBO = 278, /*[11A50]*/ + /** @stable ICU 60 */ + UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/ + /** @stable ICU 60 */ + UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UBlockCode value. @@ -1654,7 +1690,7 @@ enum UBlockCode { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - UBLOCK_COUNT = 274, + UBLOCK_COUNT = 281, #endif // U_HIDE_DEPRECATED_API /** @stable ICU 2.0 */ @@ -1930,6 +1966,19 @@ typedef enum UJoiningGroup { U_JG_AFRICAN_FEH, /**< @stable ICU 58 */ U_JG_AFRICAN_NOON, /**< @stable ICU 58 */ U_JG_AFRICAN_QAF, /**< @stable ICU 58 */ + + U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_JA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_RA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */ + U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UJoiningGroup value. @@ -3521,27 +3570,6 @@ u_toupper(UChar32 c); U_STABLE UChar32 U_EXPORT2 u_totitle(UChar32 c); -/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */ -#define U_FOLD_CASE_DEFAULT 0 - -/** - * Option value for case folding: - * - * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I - * and dotless i appropriately for Turkic languages (tr, az). - * - * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that - * are to be included for default mappings and - * excluded for the Turkic-specific mappings. - * - * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that - * are to be excluded for default mappings and - * included for the Turkic-specific mappings. - * - * @stable ICU 2.0 - */ -#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1 - /** * The given character is mapped to its case folding equivalent according to * UnicodeData.txt and CaseFolding.txt; diff --git a/deps/icu-small/source/common/unicode/uclean.h b/deps/icu-small/source/common/unicode/uclean.h index d0bfcb13a6..3f73af37b8 100644 --- a/deps/icu-small/source/common/unicode/uclean.h +++ b/deps/icu-small/source/common/unicode/uclean.h @@ -149,7 +149,7 @@ typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem); * @system */ U_STABLE void U_EXPORT2 -u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV a, UMemReallocFn * U_CALLCONV r, UMemFreeFn * U_CALLCONV f, +u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f, UErrorCode *status); U_CDECL_END diff --git a/deps/icu-small/source/common/unicode/uconfig.h b/deps/icu-small/source/common/unicode/uconfig.h index 25f19a1a61..5e28a146de 100644 --- a/deps/icu-small/source/common/unicode/uconfig.h +++ b/deps/icu-small/source/common/unicode/uconfig.h @@ -76,7 +76,7 @@ #endif /** - * Determines wheter to enable auto cleanup of libraries. + * Determines whether to enable auto cleanup of libraries. * @internal */ #ifndef UCLN_NO_AUTO_CLEANUP @@ -262,7 +262,8 @@ /** * \def UCONFIG_NO_CONVERSION - * ICU will not completely build with this switch turned on. + * ICU will not completely build (compiling the tools fails) with this + * switch turned on. * This switch turns off all converters. * * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1 @@ -320,7 +321,9 @@ */ #ifndef UCONFIG_NO_NORMALIZATION # define UCONFIG_NO_NORMALIZATION 0 -#elif UCONFIG_NO_NORMALIZATION +#endif + +#if UCONFIG_NO_NORMALIZATION /* common library */ /* ICU 50 CJK dictionary BreakIterator uses normalization */ # define UCONFIG_NO_BREAK_ITERATION 1 diff --git a/deps/icu-small/source/common/unicode/udisplaycontext.h b/deps/icu-small/source/common/unicode/udisplaycontext.h index c4f6c957e9..398481c681 100644 --- a/deps/icu-small/source/common/unicode/udisplaycontext.h +++ b/deps/icu-small/source/common/unicode/udisplaycontext.h @@ -44,14 +44,12 @@ enum UDisplayContextType { * @stable ICU 54 */ UDISPCTX_TYPE_DISPLAY_LENGTH = 2, -#ifndef U_HIDE_DRAFT_API /** * Type to retrieve the substitute handling setting, e.g. * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE. - * @draft ICU 58 + * @stable ICU 58 */ UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3 -#endif /* U_HIDE_DRAFT_API */ }; /** * @stable ICU 51 @@ -143,7 +141,6 @@ enum UDisplayContext { * @stable ICU 54 */ UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1, -#ifndef U_HIDE_DRAFT_API /** * ================================ * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or @@ -154,16 +151,15 @@ enum UDisplayContext { * A possible setting for SUBSTITUTE_HANDLING: * Returns a fallback value (e.g., the input code) when no data is available. * This is the default value. - * @draft ICU 58 + * @stable ICU 58 */ UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0, /** * A possible setting for SUBSTITUTE_HANDLING: * Returns a null value when no data is available. - * @draft ICU 58 + * @stable ICU 58 */ UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1 -#endif /* U_HIDE_DRAFT_API */ }; /** diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h index e0ab0b9eb7..b99a686126 100644 --- a/deps/icu-small/source/common/unicode/unistr.h +++ b/deps/icu-small/source/common/unicode/unistr.h @@ -38,16 +38,6 @@ struct UConverter; // unicode/ucnv.h -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also ustring.h and unorm.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - #ifndef USTRING_H /** * \ingroup ustring_ustrlen @@ -1730,7 +1720,7 @@ public: */ template<typename StringClass> StringClass &toUTF8String(StringClass &result) const { - StringByteSink<StringClass> sbs(&result); + StringByteSink<StringClass> sbs(&result, length()); toUTF8(sbs); return result; } @@ -1901,7 +1891,6 @@ public: */ UnicodeString &fastCopyFrom(const UnicodeString &src); -#if U_HAVE_RVALUE_REFERENCES /** * Move assignment operator, might leave src in bogus state. * This string will have the same contents and state that the source string had. @@ -1913,7 +1902,7 @@ public: UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT { return moveFrom(src); } -#endif + // do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API /** * Move assignment, might leave src in bogus state. @@ -2786,11 +2775,11 @@ public: * break iterator is opened. * Otherwise the provided iterator is set to the string's text. * @param locale The locale to consider. + * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, + * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, + * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES. * @param options Options bit set, see ucasemap_open(). * @return A reference to this. - * @see U_TITLECASE_NO_LOWERCASE - * @see U_TITLECASE_NO_BREAK_ADJUSTMENT - * @see ucasemap_open * @stable ICU 3.8 */ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options); @@ -3360,7 +3349,6 @@ public: */ UnicodeString(const UnicodeString& that); -#if U_HAVE_RVALUE_REFERENCES /** * Move constructor, might leave src in bogus state. * This string will have the same contents and state that the source string had. @@ -3368,7 +3356,6 @@ public: * @stable ICU 56 */ UnicodeString(UnicodeString &&src) U_NOEXCEPT; -#endif /** * 'Substring' constructor from tail of source string. diff --git a/deps/icu-small/source/common/unicode/unorm.h b/deps/icu-small/source/common/unicode/unorm.h index 1b5af16700..3839de1295 100644 --- a/deps/icu-small/source/common/unicode/unorm.h +++ b/deps/icu-small/source/common/unicode/unorm.h @@ -210,7 +210,7 @@ enum { * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR. * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE int32_t U_EXPORT2 +U_DEPRECATED int32_t U_EXPORT2 unorm_normalize(const UChar *source, int32_t sourceLength, UNormalizationMode mode, int32_t options, UChar *result, int32_t resultLength, @@ -236,7 +236,7 @@ unorm_normalize(const UChar *source, int32_t sourceLength, * @see unorm_isNormalized * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE UNormalizationCheckResult U_EXPORT2 +U_DEPRECATED UNormalizationCheckResult U_EXPORT2 unorm_quickCheck(const UChar *source, int32_t sourcelength, UNormalizationMode mode, UErrorCode *status); @@ -257,7 +257,7 @@ unorm_quickCheck(const UChar *source, int32_t sourcelength, * @see unorm_isNormalized * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE UNormalizationCheckResult U_EXPORT2 +U_DEPRECATED UNormalizationCheckResult U_EXPORT2 unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, UNormalizationMode mode, int32_t options, UErrorCode *pErrorCode); @@ -283,7 +283,7 @@ unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength, * @see unorm_quickCheck * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE UBool U_EXPORT2 +U_DEPRECATED UBool U_EXPORT2 unorm_isNormalized(const UChar *src, int32_t srcLength, UNormalizationMode mode, UErrorCode *pErrorCode); @@ -305,7 +305,7 @@ unorm_isNormalized(const UChar *src, int32_t srcLength, * @see unorm_isNormalized * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE UBool U_EXPORT2 +U_DEPRECATED UBool U_EXPORT2 unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, UNormalizationMode mode, int32_t options, UErrorCode *pErrorCode); @@ -383,7 +383,7 @@ unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength, * * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE int32_t U_EXPORT2 +U_DEPRECATED int32_t U_EXPORT2 unorm_next(UCharIterator *src, UChar *dest, int32_t destCapacity, UNormalizationMode mode, int32_t options, @@ -416,7 +416,7 @@ unorm_next(UCharIterator *src, * * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE int32_t U_EXPORT2 +U_DEPRECATED int32_t U_EXPORT2 unorm_previous(UCharIterator *src, UChar *dest, int32_t destCapacity, UNormalizationMode mode, int32_t options, @@ -460,7 +460,7 @@ unorm_previous(UCharIterator *src, * * @deprecated ICU 56 Use unorm2.h instead. */ -U_STABLE int32_t U_EXPORT2 +U_DEPRECATED int32_t U_EXPORT2 unorm_concatenate(const UChar *left, int32_t leftLength, const UChar *right, int32_t rightLength, UChar *dest, int32_t destCapacity, diff --git a/deps/icu-small/source/common/unicode/unorm2.h b/deps/icu-small/source/common/unicode/unorm2.h index c6d3494d70..a9bd02f256 100644 --- a/deps/icu-small/source/common/unicode/unorm2.h +++ b/deps/icu-small/source/common/unicode/unorm2.h @@ -32,6 +32,7 @@ #include "unicode/utypes.h" #include "unicode/localpointer.h" +#include "unicode/stringoptions.h" #include "unicode/uset.h" /** @@ -527,30 +528,6 @@ U_STABLE UBool U_EXPORT2 unorm2_isInert(const UNormalizer2 *norm2, UChar32 c); /** - * Option bit for unorm_compare: - * Both input strings are assumed to fulfill FCD conditions. - * @stable ICU 2.2 - */ -#define UNORM_INPUT_IS_FCD 0x20000 - -/** - * Option bit for unorm_compare: - * Perform case-insensitive comparison. - * @stable ICU 2.2 - */ -#define U_COMPARE_IGNORE_CASE 0x10000 - -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also unistr.h and ustring.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - -/** * Compares two strings for canonical equivalence. * Further options include case-insensitive comparison and * code point order (as opposed to code unit order). diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h index 21c839abbf..982655c442 100644 --- a/deps/icu-small/source/common/unicode/urename.h +++ b/deps/icu-small/source/common/unicode/urename.h @@ -107,6 +107,7 @@ #define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data) #define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data) #define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup) +#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl) #define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup) #define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup) #define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats) @@ -944,6 +945,7 @@ #define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget) #define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti) #define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init) +#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize) #define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput) #define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi) #define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove) @@ -1654,6 +1656,7 @@ #define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN) #define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN) #define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale) +#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator) #define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold) #define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower) #define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle) diff --git a/deps/icu-small/source/common/unicode/uscript.h b/deps/icu-small/source/common/unicode/uscript.h index 1420578f02..3ec235d50c 100644 --- a/deps/icu-small/source/common/unicode/uscript.h +++ b/deps/icu-small/source/common/unicode/uscript.h @@ -444,6 +444,13 @@ typedef enum UScriptCode { /** @stable ICU 58 */ USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */ + /** @stable ICU 60 */ + USCRIPT_MASARAM_GONDI = 175,/* Gonm */ + /** @stable ICU 60 */ + USCRIPT_SOYOMBO = 176,/* Soyo */ + /** @stable ICU 60 */ + USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */ + #ifndef U_HIDE_DEPRECATED_API /** * One more than the highest normal UScriptCode value. @@ -451,7 +458,7 @@ typedef enum UScriptCode { * * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420. */ - USCRIPT_CODE_LIMIT = 175 + USCRIPT_CODE_LIMIT = 178 #endif // U_HIDE_DEPRECATED_API } UScriptCode; diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h index 2099ab5913..1ea27126cc 100644 --- a/deps/icu-small/source/common/unicode/ustring.h +++ b/deps/icu-small/source/common/unicode/ustring.h @@ -497,16 +497,6 @@ u_strCompare(const UChar *s1, int32_t length1, U_STABLE int32_t U_EXPORT2 u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder); -#ifndef U_COMPARE_CODE_POINT_ORDER -/* see also unistr.h and unorm.h */ -/** - * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc: - * Compare strings in code point order instead of code unit order. - * @stable ICU 2.2 - */ -#define U_COMPARE_CODE_POINT_ORDER 0x8000 -#endif - /** * Compare two strings case-insensitively using full case folding. * This is equivalent to diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h index edcb267597..55709d403a 100644 --- a/deps/icu-small/source/common/unicode/utext.h +++ b/deps/icu-small/source/common/unicode/utext.h @@ -768,7 +768,7 @@ utext_extract(UText *ut, */ #define UTEXT_SETNATIVEINDEX(ut, ix) \ { int64_t __offset = (ix) - (ut)->chunkNativeStart; \ - if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \ + if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \ (ut)->chunkOffset=(int32_t)__offset; \ } else { \ utext_setNativeIndex((ut), (ix)); } } diff --git a/deps/icu-small/source/common/unicode/utf.h b/deps/icu-small/source/common/unicode/utf.h index ab7e9ac96a..aa56980691 100644 --- a/deps/icu-small/source/common/unicode/utf.h +++ b/deps/icu-small/source/common/unicode/utf.h @@ -23,9 +23,6 @@ * This file defines macros for checking whether a code point is * a surrogate or a non-character etc. * - * The UChar and UChar32 data types for Unicode code units and code points - * are defined in umachine.h because they can be machine-dependent. - * * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h * and itself includes utf8.h and utf16.h after some * common definitions. @@ -50,11 +47,11 @@ * but are optimized for the much more frequently occurring BMP code points. * * umachine.h defines UChar to be an unsigned 16-bit integer. - * Where available, UChar is defined to be a char16_t - * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t. + * Since ICU 59, ICU uses char16_t in C++, UChar only in C, + * and defines UChar=char16_t by default. See the UChar API docs for details. * * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit - * Unicode code point (Unicode scalar value, 0..0x10ffff). + * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1). * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as * the definition of UChar. For details see the documentation for UChar32 itself. * @@ -63,11 +60,20 @@ * For actual Unicode character properties see uchar.h. * * By default, string operations must be done with error checking in case - * a string is not well-formed UTF-16. - * The macros will detect if a surrogate code unit is unpaired + * a string is not well-formed UTF-16 or UTF-8. + * + * The U16_ macros detect if a surrogate code unit is unpaired * (lead unit without trail unit or vice versa) and just return the unit itself * as the code point. * + * The U8_ macros detect illegal byte sequences and return a negative value. + * Starting with ICU 60, the observable length of a single illegal byte sequence + * skipped by one of these macros follows the Unicode 6+ recommendation + * which is consistent with the W3C Encoding Standard. + * + * There are ..._OR_FFFD versions of both U16_ and U8_ macros + * that return U+FFFD for illegal code unit sequences. + * * The regular "safe" macros require that the initial, passed-in string index * is within bounds. They only check the index when they read more than one * code unit. This is usually done with code similar to the following loop: @@ -91,10 +97,7 @@ * The performance differences are much larger here because UTF-8 provides so * many opportunities for malformed sequences. * The unsafe UTF-8 macros are entirely implemented inside the macro definitions - * and are fast, while the safe UTF-8 macros call functions for all but the - * trivial (ASCII) cases. - * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common - * characters inline as well.) + * and are fast, while the safe UTF-8 macros call functions for some complicated cases. * * Unlike with UTF-16, malformed sequences cannot be expressed with distinct * code point values (0..U+10ffff). They are indicated with negative values instead. @@ -126,8 +129,7 @@ */ #define U_IS_UNICODE_NONCHAR(c) \ ((c)>=0xfdd0 && \ - ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \ - (uint32_t)(c)<=0x10ffff) + ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff) /** * Is c a Unicode code point value (0..U+10ffff) @@ -148,9 +150,7 @@ */ #define U_IS_UNICODE_CHAR(c) \ ((uint32_t)(c)<0xd800 || \ - ((uint32_t)(c)>0xdfff && \ - (uint32_t)(c)<=0x10ffff && \ - !U_IS_UNICODE_NONCHAR(c))) + (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c))) /** * Is this code point a BMP code point (U+0000..U+ffff)? diff --git a/deps/icu-small/source/common/unicode/utf16.h b/deps/icu-small/source/common/unicode/utf16.h index 0665381612..b9b9c59d3c 100644 --- a/deps/icu-small/source/common/unicode/utf16.h +++ b/deps/icu-small/source/common/unicode/utf16.h @@ -185,8 +185,8 @@ * * The length can be negative for a NUL-terminated string. * - * If the offset points to a single, unpaired surrogate, then that itself - * will be returned as the code point. + * If the offset points to a single, unpaired surrogate, then + * c is set to that unpaired surrogate. * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT. * * @param s const UChar * string @@ -213,6 +213,53 @@ } \ } +#ifndef U_HIDE_DRAFT_API + +/** + * Get a code point from a string at a random-access offset, + * without changing the offset. + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The offset may point to either the lead or trail surrogate unit + * for a supplementary code point, in which case the macro will read + * the adjacent matching surrogate as well. + * + * The length can be negative for a NUL-terminated string. + * + * If the offset points to a single, unpaired surrogate, then + * c is set to U+FFFD. + * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<=i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_GET_UNSAFE + * @draft ICU 60 + */ +#define U16_GET_OR_FFFD(s, start, i, length, c) { \ + (c)=(s)[i]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c)) { \ + if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } else { \ + (c)=0xfffd; \ + } \ + } else { \ + if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ + } \ +} + +#endif // U_HIDE_DRAFT_API + /* definitions with forward iteration --------------------------------------- */ /** @@ -253,8 +300,7 @@ * for a supplementary code point, in which case the macro will read * the following trail surrogate as well. * If the offset points to a trail surrogate or - * to a single, unpaired lead surrogate, then that itself - * will be returned as the code point. + * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate. * * @param s const UChar * string * @param i string offset, must be i<length @@ -274,6 +320,44 @@ } \ } +#ifndef U_HIDE_DRAFT_API + +/** + * Get a code point from a string at a code point boundary offset, + * and advance the offset to the next code point boundary. + * (Post-incrementing forward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The length can be negative for a NUL-terminated string. + * + * The offset may point to the lead surrogate unit + * for a supplementary code point, in which case the macro will read + * the following trail surrogate as well. + * If the offset points to a trail surrogate or + * to a single, unpaired lead surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param i string offset, must be i<length + * @param length string length + * @param c output UChar32 variable + * @see U16_NEXT_UNSAFE + * @draft ICU 60 + */ +#define U16_NEXT_OR_FFFD(s, i, length, c) { \ + (c)=(s)[(i)++]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \ + ++(i); \ + (c)=U16_GET_SUPPLEMENTARY((c), __c2); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} + +#endif // U_HIDE_DRAFT_API + /** * Append a code point to a string, overwriting 1 or 2 code units. * The offset points to the current end of the string contents @@ -481,8 +565,7 @@ * for a supplementary code point, then the macro will read * the preceding lead surrogate as well. * If the offset is behind a lead surrogate or behind a single, unpaired - * trail surrogate, then that itself - * will be returned as the code point. + * trail surrogate, then c is set to that unpaired surrogate. * * @param s const UChar * string * @param start starting string offset (usually 0) @@ -502,6 +585,43 @@ } \ } +#ifndef U_HIDE_DRAFT_API + +/** + * Move the string offset from one code point boundary to the previous one + * and get the code point between them. + * (Pre-decrementing backward iteration.) + * "Safe" macro, handles unpaired surrogates and checks for string boundaries. + * + * The input offset may be the same as the string length. + * If the offset is behind a trail surrogate unit + * for a supplementary code point, then the macro will read + * the preceding lead surrogate as well. + * If the offset is behind a lead surrogate or behind a single, unpaired + * trail surrogate, then c is set to U+FFFD. + * + * @param s const UChar * string + * @param start starting string offset (usually 0) + * @param i string offset, must be start<i + * @param c output UChar32 variable + * @see U16_PREV_UNSAFE + * @draft ICU 60 + */ +#define U16_PREV_OR_FFFD(s, start, i, c) { \ + (c)=(s)[--(i)]; \ + if(U16_IS_SURROGATE(c)) { \ + uint16_t __c2; \ + if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \ + --(i); \ + (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \ + } else { \ + (c)=0xfffd; \ + } \ + } \ +} + +#endif // U_HIDE_DRAFT_API + /** * Move the string offset from one code point boundary to the previous one. * (Pre-decrementing backward iteration.) diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h index 9e56b50474..59b4b25570 100644 --- a/deps/icu-small/source/common/unicode/utf8.h +++ b/deps/icu-small/source/common/unicode/utf8.h @@ -41,34 +41,24 @@ /* internal definitions ----------------------------------------------------- */ - - /** * Counts the trail bytes for a UTF-8 lead byte. - * Returns 0 for 0..0xbf as well as for 0xfe and 0xff. + * Returns 0 for 0..0xc1 as well as for 0xf5..0xff. + * leadByte might be evaluated multiple times. * * This is internal since it is not meant to be called directly by external clients; * however it is called by public macros in this file and thus must remain stable. * - * Note: Beginning with ICU 50, the implementation uses a multi-condition expression - * which was shown in 2012 (on x86-64) to compile to fast, branch-free code. - * leadByte is evaluated multiple times. - * - * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes: - * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte]) - * leadByte was evaluated exactly once. - * * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff. * @internal */ #define U8_COUNT_TRAIL_BYTES(leadByte) \ - ((uint8_t)(leadByte)<0xf0 ? \ - ((uint8_t)(leadByte)>=0xc0)+((uint8_t)(leadByte)>=0xe0) : \ - (uint8_t)(leadByte)<0xfe ? 3+((uint8_t)(leadByte)>=0xf8)+((uint8_t)(leadByte)>=0xfc) : 0) + (U8_IS_LEAD(leadByte) ? \ + ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0) /** * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence. - * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF. + * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff. * leadByte might be evaluated multiple times. * * This is internal since it is not meant to be called directly by external clients; @@ -78,7 +68,7 @@ * @internal */ #define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \ - (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0)) + (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)) /** * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value. @@ -90,6 +80,40 @@ #define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1) /** + * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * Lead byte E0..EF bits 3..0 are used as byte index, + * first trail byte bits 7..5 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD3_AND_T1 + * @internal + */ +#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30" + +/** + * Internal 3-byte UTF-8 validity check. + * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5))) + +/** + * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1. + * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence. + * First trail byte bits 7..4 are used as byte index, + * lead byte F0..F4 bits 2..0 are used as bit index into that byte. + * @see U8_IS_VALID_LEAD4_AND_T1 + * @internal + */ +#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00" + +/** + * Internal 4-byte UTF-8 validity check. + * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence. + * @internal + */ +#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7))) + +/** * Function for handling "next code point" with error-checking. * * This is internal since it is not meant to be called directly by external clients; @@ -148,20 +172,21 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); #define U8_IS_SINGLE(c) (((c)&0x80)==0) /** - * Is this code unit (byte) a UTF-8 lead byte? + * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4) * @param c 8-bit code unit (byte) * @return TRUE or FALSE * @stable ICU 2.4 */ -#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e) +#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32) +// 0x32=0xf4-0xc2 /** - * Is this code unit (byte) a UTF-8 trail byte? + * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF) * @param c 8-bit code unit (byte) * @return TRUE or FALSE * @stable ICU 2.4 */ -#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80) +#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40) /** * How many code units (bytes) are used for the UTF-8 encoding @@ -289,7 +314,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); */ #define U8_NEXT_UNSAFE(s, i, c) { \ (c)=(uint8_t)(s)[(i)++]; \ - if((c)>=0x80) { \ + if(!U8_IS_SINGLE(c)) { \ if((c)<0xe0) { \ (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \ } else if((c)<0xf0) { \ @@ -325,22 +350,19 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); */ #define U8_NEXT(s, i, length, c) { \ (c)=(uint8_t)(s)[(i)++]; \ - if((c)>=0x80) { \ + if(!U8_IS_SINGLE(c)) { \ uint8_t __t1, __t2; \ - if( /* handle U+1000..U+CFFF inline */ \ - (0xe0<(c) && (c)<=0xec) && \ - (((i)+1)<(length) || (length)<0) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \ - (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \ - ) { \ - /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ - (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \ + if( /* handle U+0800..U+FFFF inline */ \ + (0xe0<=(c) && (c)<0xf0) && \ + (((i)+1)<(length) || (length)<0) && \ + U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ + (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ + (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ (i)+=2; \ } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \ - ) { \ + ((c)<0xe0 && (c)>=0xc2) && \ + ((i)!=(length)) && \ + (__t1=(s)[i]-0x80)<=0x3f) { \ (c)=(((c)&0x1f)<<6)|__t1; \ ++(i); \ } else { \ @@ -376,22 +398,19 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); */ #define U8_NEXT_OR_FFFD(s, i, length, c) { \ (c)=(uint8_t)(s)[(i)++]; \ - if((c)>=0x80) { \ + if(!U8_IS_SINGLE(c)) { \ uint8_t __t1, __t2; \ - if( /* handle U+1000..U+CFFF inline */ \ - (0xe0<(c) && (c)<=0xec) && \ - (((i)+1)<(length) || (length)<0) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \ - (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \ - ) { \ - /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \ - (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \ + if( /* handle U+0800..U+FFFF inline */ \ + (0xe0<=(c) && (c)<0xf0) && \ + (((i)+1)<(length) || (length)<0) && \ + U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \ + (__t2=(s)[(i)+1]-0x80)<=0x3f) { \ + (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \ (i)+=2; \ } else if( /* handle U+0080..U+07FF inline */ \ - ((c)<0xe0 && (c)>=0xc2) && \ - ((i)!=(length)) && \ - (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \ - ) { \ + ((c)<0xe0 && (c)>=0xc2) && \ + ((i)!=(length)) && \ + (__t1=(s)[i]-0x80)<=0x3f) { \ (c)=(((c)&0x1f)<<6)|__t1; \ ++(i); \ } else { \ @@ -476,7 +495,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_FWD_1_UNSAFE(s, i) { \ - (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \ + (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \ } /** @@ -493,15 +512,24 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); * @stable ICU 2.4 */ #define U8_FWD_1(s, i, length) { \ - uint8_t __b=(uint8_t)(s)[(i)++]; \ - if(U8_IS_LEAD(__b)) { \ - uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \ - if((i)+__count>(length) && (length)>=0) { \ - __count=(uint8_t)((length)-(i)); \ - } \ - while(__count>0 && U8_IS_TRAIL((s)[i])) { \ - ++(i); \ - --__count; \ + uint8_t __b=(s)[(i)++]; \ + if(U8_IS_LEAD(__b) && (i)!=(length)) { \ + uint8_t __t1=(s)[i]; \ + if((0xe0<=__b && __b<0xf0)) { \ + if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ + } else if(__b<0xe0) { \ + if(U8_IS_TRAIL(__t1)) { \ + ++(i); \ + } \ + } else /* c>=0xf0 */ { \ + if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \ + ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \ + ++(i); \ + } \ } \ } \ } @@ -615,7 +643,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); /* c is a trail byte */ \ (c)&=0x3f; \ for(;;) { \ - __b=(uint8_t)(s)[--(i)]; \ + __b=(s)[--(i)]; \ if(__b>=0xc0) { \ U8_MASK_LEAD_BYTE(__b, __count); \ (c)|=(UChar32)__b<<__shift; \ @@ -651,7 +679,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); */ #define U8_PREV(s, start, i, c) { \ (c)=(uint8_t)(s)[--(i)]; \ - if((c)>=0x80) { \ + if(!U8_IS_SINGLE(c)) { \ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \ } \ } @@ -682,7 +710,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i); */ #define U8_PREV_OR_FFFD(s, start, i, c) { \ (c)=(uint8_t)(s)[--(i)]; \ - if((c)>=0x80) { \ + if(!U8_IS_SINGLE(c)) { \ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \ } \ } diff --git a/deps/icu-small/source/common/unicode/utf_old.h b/deps/icu-small/source/common/unicode/utf_old.h index cb229cb301..55c17c01df 100644 --- a/deps/icu-small/source/common/unicode/utf_old.h +++ b/deps/icu-small/source/common/unicode/utf_old.h @@ -145,7 +145,22 @@ #ifndef __UTF_OLD_H__ #define __UTF_OLD_H__ -#ifndef U_HIDE_DEPRECATED_API +/** + * \def U_HIDE_OBSOLETE_UTF_OLD_H + * + * Hides the obsolete definitions in unicode/utf_old.h. + * Recommended to be set to 1 at compile time to make sure + * the long-deprecated macros are no longer used. + * + * For reasons for the deprecation see the utf_old.h file comments. + * + * @internal + */ +#ifndef U_HIDE_OBSOLETE_UTF_OLD_H +# define U_HIDE_OBSOLETE_UTF_OLD_H 0 +#endif + +#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H #include "unicode/utf.h" #include "unicode/utf8.h" @@ -1184,6 +1199,6 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I */ #define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length) -#endif /* U_HIDE_DEPRECATED_API */ +#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H #endif diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h index cae59ad880..ce7dec1553 100644 --- a/deps/icu-small/source/common/unicode/uvernum.h +++ b/deps/icu-small/source/common/unicode/uvernum.h @@ -58,7 +58,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION_MAJOR_NUM 59 +#define U_ICU_VERSION_MAJOR_NUM 60 /** The current ICU minor version as an integer. * This value will change in the subsequent releases of ICU @@ -84,7 +84,7 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#define U_ICU_VERSION_SUFFIX _59 +#define U_ICU_VERSION_SUFFIX _60 /** * \def U_DEF2_ICU_ENTRY_POINT_RENAME @@ -119,24 +119,19 @@ * This value will change in the subsequent releases of ICU * @stable ICU 2.4 */ -#define U_ICU_VERSION "59.1" +#define U_ICU_VERSION "60.1" /** The current ICU library major/minor version as a string without dots, for library name suffixes. * This value will change in the subsequent releases of ICU * @stable ICU 2.6 */ -#if U_PLATFORM_HAS_WINUWP_API == 0 -#define U_ICU_VERSION_SHORT "59" -#else -// U_DISABLE_RENAMING does not impact dat file name -#define U_ICU_VERSION_SHORT -#endif /* U_PLATFORM_HAS_WINUWP_API == 0 */ +#define U_ICU_VERSION_SHORT "60" #ifndef U_HIDE_INTERNAL_API /** Data version in ICU4C. * @internal ICU 4.4 Internal Use Only **/ -#define U_ICU_DATA_VERSION "59.1" +#define U_ICU_DATA_VERSION "60.1" #endif /* U_HIDE_INTERNAL_API */ /*=========================================================================== |