aboutsummaryrefslogtreecommitdiff
path: root/deps/icu-small/source/common/unicode
diff options
context:
space:
mode:
Diffstat (limited to 'deps/icu-small/source/common/unicode')
-rw-r--r--deps/icu-small/source/common/unicode/brkiter.h24
-rw-r--r--deps/icu-small/source/common/unicode/bytestream.h33
-rw-r--r--deps/icu-small/source/common/unicode/casemap.h177
-rw-r--r--deps/icu-small/source/common/unicode/char16ptr.h52
-rw-r--r--deps/icu-small/source/common/unicode/docmain.h2
-rw-r--r--deps/icu-small/source/common/unicode/edits.h195
-rw-r--r--deps/icu-small/source/common/unicode/filteredbrk.h42
-rw-r--r--deps/icu-small/source/common/unicode/localpointer.h37
-rw-r--r--deps/icu-small/source/common/unicode/locid.h2
-rw-r--r--deps/icu-small/source/common/unicode/normalizer2.h140
-rw-r--r--deps/icu-small/source/common/unicode/platform.h69
-rw-r--r--deps/icu-small/source/common/unicode/rbbi.h135
-rw-r--r--deps/icu-small/source/common/unicode/simpleformatter.h10
-rw-r--r--deps/icu-small/source/common/unicode/stringoptions.h198
-rw-r--r--deps/icu-small/source/common/unicode/stringtriebuilder.h18
-rw-r--r--deps/icu-small/source/common/unicode/ubiditransform.h31
-rw-r--r--deps/icu-small/source/common/unicode/ubrk.h3
-rw-r--r--deps/icu-small/source/common/unicode/ucasemap.h53
-rw-r--r--deps/icu-small/source/common/unicode/uchar.h80
-rw-r--r--deps/icu-small/source/common/unicode/uclean.h2
-rw-r--r--deps/icu-small/source/common/unicode/uconfig.h9
-rw-r--r--deps/icu-small/source/common/unicode/udisplaycontext.h10
-rw-r--r--deps/icu-small/source/common/unicode/unistr.h23
-rw-r--r--deps/icu-small/source/common/unicode/unorm.h16
-rw-r--r--deps/icu-small/source/common/unicode/unorm2.h25
-rw-r--r--deps/icu-small/source/common/unicode/urename.h3
-rw-r--r--deps/icu-small/source/common/unicode/uscript.h9
-rw-r--r--deps/icu-small/source/common/unicode/ustring.h10
-rw-r--r--deps/icu-small/source/common/unicode/utext.h2
-rw-r--r--deps/icu-small/source/common/unicode/utf.h34
-rw-r--r--deps/icu-small/source/common/unicode/utf16.h132
-rw-r--r--deps/icu-small/source/common/unicode/utf8.h148
-rw-r--r--deps/icu-small/source/common/unicode/utf_old.h19
-rw-r--r--deps/icu-small/source/common/unicode/uvernum.h15
34 files changed, 1236 insertions, 522 deletions
diff --git a/deps/icu-small/source/common/unicode/brkiter.h b/deps/icu-small/source/common/unicode/brkiter.h
index b1e4cc68c6..9c1ac7531b 100644
--- a/deps/icu-small/source/common/unicode/brkiter.h
+++ b/deps/icu-small/source/common/unicode/brkiter.h
@@ -250,7 +250,7 @@ public:
virtual int32_t next(void) = 0;
/**
- * Return character index of the current interator position within the text.
+ * Return character index of the current iterator position within the text.
* @return The boundary most recently returned.
* @stable ICU 2.0
*/
@@ -277,7 +277,7 @@ public:
virtual int32_t preceding(int32_t offset) = 0;
/**
- * Return true if the specfied position is a boundary position.
+ * Return true if the specified position is a boundary position.
* As a side effect, the current position of the iterator is set
* to the first boundary position at or following the specified offset.
* @param offset the offset to check.
@@ -331,7 +331,7 @@ public:
* @param fillInVec an array to be filled in with the status values.
* @param capacity the length of the supplied vector. A length of zero causes
* the function to return the number of status values, in the
- * normal way, without attemtping to store any values.
+ * normal way, without attempting to store any values.
* @param status receives error codes.
* @return The number of rule status values from rules that determined
* the most recent boundary returned by the break iterator.
@@ -469,7 +469,7 @@ public:
static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
/**
- * Get name of the object for the desired Locale, in the desired langauge.
+ * Get name of the object for the desired Locale, in the desired language.
* @param objectLocale must be from getAvailableLocales.
* @param displayLocale specifies the desired locale for output.
* @param name the fill-in parameter of the return value
@@ -482,7 +482,7 @@ public:
UnicodeString& name);
/**
- * Get name of the object for the desired Locale, in the langauge of the
+ * Get name of the object for the desired Locale, in the language of the
* default locale.
* @param objectLocale must be from getMatchingLocales
* @param name the fill-in parameter of the return value
@@ -629,10 +629,12 @@ protected:
/** @internal */
BreakIterator();
/** @internal */
- BreakIterator (const BreakIterator &other) : UObject(other) {}
+ BreakIterator (const BreakIterator &other);
#ifndef U_HIDE_INTERNAL_API
/** @internal */
- BreakIterator (const Locale& valid, const Locale& actual);
+ BreakIterator (const Locale& valid, const Locale &actual);
+ /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
+ BreakIterator &operator = (const BreakIterator &other);
#endif /* U_HIDE_INTERNAL_API */
private:
@@ -640,12 +642,6 @@ private:
/** @internal */
char actualLocale[ULOC_FULLNAME_CAPACITY];
char validLocale[ULOC_FULLNAME_CAPACITY];
-
- /**
- * The assignment operator has no real implementation.
- * It's provided to make the compiler happy. Do not call.
- */
- BreakIterator& operator=(const BreakIterator&);
};
#ifndef U_HIDE_DEPRECATED_API
@@ -661,5 +657,5 @@ U_NAMESPACE_END
#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
-#endif // _BRKITER
+#endif // BRKITER_H
//eof
diff --git a/deps/icu-small/source/common/unicode/bytestream.h b/deps/icu-small/source/common/unicode/bytestream.h
index 477892b275..9df23f79c5 100644
--- a/deps/icu-small/source/common/unicode/bytestream.h
+++ b/deps/icu-small/source/common/unicode/bytestream.h
@@ -126,8 +126,8 @@ public:
virtual void Flush();
private:
- ByteSink(const ByteSink &); // copy constructor not implemented
- ByteSink &operator=(const ByteSink &); // assignment operator not implemented
+ ByteSink(const ByteSink &) = delete;
+ ByteSink &operator=(const ByteSink &) = delete;
};
// -------------------------------------------------------------
@@ -217,9 +217,10 @@ private:
int32_t size_;
int32_t appended_;
UBool overflowed_;
- CheckedArrayByteSink(); ///< default constructor not implemented
- CheckedArrayByteSink(const CheckedArrayByteSink &); ///< copy constructor not implemented
- CheckedArrayByteSink &operator=(const CheckedArrayByteSink &); ///< assignment operator not implemented
+
+ CheckedArrayByteSink() = delete;
+ CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
+ CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
};
/**
@@ -236,6 +237,21 @@ class StringByteSink : public ByteSink {
* @stable ICU 4.2
*/
StringByteSink(StringClass* dest) : dest_(dest) { }
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
+ *
+ * @param dest pointer to string object to append to
+ * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
+ * @draft ICU 60
+ */
+ StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
+ if (initialAppendCapacity > 0 &&
+ (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
+ dest->reserve(dest->length() + initialAppendCapacity);
+ }
+ }
+#endif // U_HIDE_DRAFT_API
/**
* Append "bytes[0,n-1]" to this.
* @param data the pointer to the bytes
@@ -245,9 +261,10 @@ class StringByteSink : public ByteSink {
virtual void Append(const char* data, int32_t n) { dest_->append(data, n); }
private:
StringClass* dest_;
- StringByteSink(); ///< default constructor not implemented
- StringByteSink(const StringByteSink &); ///< copy constructor not implemented
- StringByteSink &operator=(const StringByteSink &); ///< assignment operator not implemented
+
+ StringByteSink() = delete;
+ StringByteSink(const StringByteSink &) = delete;
+ StringByteSink &operator=(const StringByteSink &) = delete;
};
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/unicode/casemap.h b/deps/icu-small/source/common/unicode/casemap.h
index 98184820d5..4a4917bdca 100644
--- a/deps/icu-small/source/common/unicode/casemap.h
+++ b/deps/icu-small/source/common/unicode/casemap.h
@@ -8,6 +8,7 @@
#define __CASEMAP_H__
#include "unicode/utypes.h"
+#include "unicode/stringpiece.h"
#include "unicode/uobject.h"
/**
@@ -20,6 +21,7 @@ U_NAMESPACE_BEGIN
#ifndef U_HIDE_DRAFT_API
class BreakIterator;
+class ByteSink;
class Edits;
/**
@@ -36,7 +38,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@@ -48,7 +50,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@@ -71,7 +74,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@@ -83,7 +86,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@@ -112,8 +116,10 @@ public:
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
- * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
* It is set to the source string (setText())
* and used one or more times for iteration (first() and next()).
@@ -130,7 +136,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@@ -159,7 +166,7 @@ public:
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@@ -172,7 +179,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@@ -192,10 +200,133 @@ public:
* Lowercases a UTF-8 string and optionally records edits.
* Casing is locale-dependent and context-sensitive.
* The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToLower
+ * @draft ICU 60
+ */
+ static void utf8ToLower(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Uppercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToUpper
+ * @draft ICU 60
+ */
+ static void utf8ToUpper(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. ("" = root locale, NULL = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setUText())
+ * and used one or more times for iteration (first() and next()).
+ * If NULL, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToTitle
+ * @draft ICU 60
+ */
+ static void utf8ToTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Case-folds a UTF-8 string and optionally records edits.
+ *
+ * Case folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8FoldCase
+ * @draft ICU 60
+ */
+ static void utf8Fold(
+ uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Lowercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@@ -207,7 +338,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@@ -217,7 +349,7 @@ public:
* @see ucasemap_utf8ToLower
* @draft ICU 59
*/
- static int32_t utf8ToLower(
+ static int32_t utf8ToLower(
const char *locale, uint32_t options,
const char *src, int32_t srcLength,
char *dest, int32_t destCapacity, Edits *edits,
@@ -230,7 +362,7 @@ public:
* The source string and the destination buffer must not overlap.
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT.
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
* @param dest A buffer for the result string. The result will be NUL-terminated if
@@ -242,7 +374,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@@ -271,10 +404,12 @@ public:
* all others. (This can be modified with options bits.)
*
* @param locale The locale ID. ("" = root locale, NULL = default locale.)
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
- * U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT.
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param iter A break iterator to find the first characters of words that are to be titlecased.
- * It is set to the source string (setText())
+ * It is set to the source string (setUText())
* and used one or more times for iteration (first() and next()).
* If NULL, then a word break iterator for the locale is used
* (or something equivalent).
@@ -289,7 +424,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
@@ -317,7 +453,7 @@ public:
* The result may be longer or shorter than the original.
* The source string and the destination buffer must not overlap.
*
- * @param options Options bit set, usually 0. See UCASEMAP_OMIT_UNCHANGED_TEXT,
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
* U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
* @param src The original string.
* @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
@@ -330,7 +466,8 @@ public:
* @param edits Records edits for index mapping, working with styled text,
* and getting only changes (if any).
* The Edits contents is undefined if any error occurs.
- * This function calls edits->reset() first. edits can be NULL.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be NULL.
* @param errorCode Reference to an in/out error code value
* which must not indicate a failure before the function call.
* @return The length of the result string, if successful.
diff --git a/deps/icu-small/source/common/unicode/char16ptr.h b/deps/icu-small/source/common/unicode/char16ptr.h
index fa17c62446..fbce177591 100644
--- a/deps/icu-small/source/common/unicode/char16ptr.h
+++ b/deps/icu-small/source/common/unicode/char16ptr.h
@@ -95,45 +95,45 @@ private:
return reinterpret_cast<char16_t *>(t);
}
- char16_t *p;
+ char16_t *p_;
#else
union {
char16_t *cp;
uint16_t *up;
wchar_t *wp;
- } u;
+ } u_;
#endif
};
#ifdef U_ALIASING_BARRIER
-Char16Ptr::Char16Ptr(char16_t *p) : p(p) {}
+Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
-Char16Ptr::Char16Ptr(uint16_t *p) : p(cast(p)) {}
+Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
-Char16Ptr::Char16Ptr(wchar_t *p) : p(cast(p)) {}
+Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
#endif
-Char16Ptr::Char16Ptr(std::nullptr_t p) : p(p) {}
+Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
Char16Ptr::~Char16Ptr() {
- U_ALIASING_BARRIER(p);
+ U_ALIASING_BARRIER(p_);
}
-char16_t *Char16Ptr::get() const { return p; }
+char16_t *Char16Ptr::get() const { return p_; }
#else
-Char16Ptr::Char16Ptr(char16_t *p) { u.cp = p; }
+Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
-Char16Ptr::Char16Ptr(uint16_t *p) { u.up = p; }
+Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
-Char16Ptr::Char16Ptr(wchar_t *p) { u.wp = p; }
+Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
#endif
-Char16Ptr::Char16Ptr(std::nullptr_t p) { u.cp = p; }
+Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
Char16Ptr::~Char16Ptr() {}
-char16_t *Char16Ptr::get() const { return u.cp; }
+char16_t *Char16Ptr::get() const { return u_.cp; }
#endif
@@ -203,45 +203,45 @@ private:
return reinterpret_cast<const char16_t *>(t);
}
- const char16_t *p;
+ const char16_t *p_;
#else
union {
const char16_t *cp;
const uint16_t *up;
const wchar_t *wp;
- } u;
+ } u_;
#endif
};
#ifdef U_ALIASING_BARRIER
-ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p(p) {}
+ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
#if !U_CHAR16_IS_TYPEDEF
-ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p(cast(p)) {}
+ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
#endif
#if U_SIZEOF_WCHAR_T==2
-ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p(cast(p)) {}
+ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
#endif
-ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p(p) {}
+ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
ConstChar16Ptr::~ConstChar16Ptr() {
- U_ALIASING_BARRIER(p);
+ U_ALIASING_BARRIER(p_);
}
-const char16_t *ConstChar16Ptr::get() const { return p; }
+const char16_t *ConstChar16Ptr::get() const { return p_; }
#else
-ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u.cp = p; }
+ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
#if !U_CHAR16_IS_TYPEDEF
-ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u.up = p; }
+ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
#endif
#if U_SIZEOF_WCHAR_T==2
-ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u.wp = p; }
+ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
#endif
-ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u.cp = p; }
+ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
ConstChar16Ptr::~ConstChar16Ptr() {}
-const char16_t *ConstChar16Ptr::get() const { return u.cp; }
+const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
#endif
diff --git a/deps/icu-small/source/common/unicode/docmain.h b/deps/icu-small/source/common/unicode/docmain.h
index 6e59f3e388..3e645aee4a 100644
--- a/deps/icu-small/source/common/unicode/docmain.h
+++ b/deps/icu-small/source/common/unicode/docmain.h
@@ -140,7 +140,7 @@
* <tr>
* <td>Number Formatting</td>
* <td>unum.h</td>
- * <td>icu::NumberFormat</td>
+ * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)</td>
* </tr>
* <tr>
* <td>Number Spellout<br/>(Rule Based Number Formatting)</td>
diff --git a/deps/icu-small/source/common/unicode/edits.h b/deps/icu-small/source/common/unicode/edits.h
index 8d3becb7a2..082c3733a8 100644
--- a/deps/icu-small/source/common/unicode/edits.h
+++ b/deps/icu-small/source/common/unicode/edits.h
@@ -36,8 +36,32 @@ public:
* @draft ICU 59
*/
Edits() :
- array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0),
- errorCode(U_ZERO_ERROR) {}
+ array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
+ errorCode_(U_ZERO_ERROR) {}
+ /**
+ * Copy constructor.
+ * @param other source edits
+ * @draft ICU 60
+ */
+ Edits(const Edits &other) :
+ array(stackArray), capacity(STACK_CAPACITY), length(other.length),
+ delta(other.delta), numChanges(other.numChanges),
+ errorCode_(other.errorCode_) {
+ copyArray(other);
+ }
+ /**
+ * Move constructor, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * @param src source edits
+ * @draft ICU 60
+ */
+ Edits(Edits &&src) U_NOEXCEPT :
+ array(stackArray), capacity(STACK_CAPACITY), length(src.length),
+ delta(src.delta), numChanges(src.numChanges),
+ errorCode_(src.errorCode_) {
+ moveArray(src);
+ }
+
/**
* Destructor.
* @draft ICU 59
@@ -45,10 +69,28 @@ public:
~Edits();
/**
+ * Assignment operator.
+ * @param other source edits
+ * @return *this
+ * @draft ICU 60
+ */
+ Edits &operator=(const Edits &other);
+
+ /**
+ * Move assignment operator, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source edits
+ * @return *this
+ * @draft ICU 60
+ */
+ Edits &operator=(Edits &&src) U_NOEXCEPT;
+
+ /**
* Resets the data but may not release memory.
* @draft ICU 59
*/
- void reset();
+ void reset() U_NOEXCEPT;
/**
* Adds a record for an unchanged segment of text.
@@ -66,6 +108,9 @@ public:
* Sets the UErrorCode if an error occurred while recording edits.
* Preserves older error codes in the outErrorCode.
* Normally called from inside ICU string transformation functions, not user code.
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while recording edits.
+ * Otherwise unchanged.
* @return TRUE if U_FAILURE(outErrorCode)
* @draft ICU 59
*/
@@ -81,7 +126,13 @@ public:
* @return TRUE if there are any change edits
* @draft ICU 59
*/
- UBool hasChanges() const;
+ UBool hasChanges() const { return numChanges != 0; }
+
+ /**
+ * @return the number of change edits
+ * @draft ICU 60
+ */
+ int32_t numberOfChanges() const { return numChanges; }
/**
* Access to the list of edits.
@@ -91,6 +142,15 @@ public:
*/
struct U_COMMON_API Iterator U_FINAL : public UMemory {
/**
+ * Default constructor, empty iterator.
+ * @draft ICU 60
+ */
+ Iterator() :
+ array(nullptr), index(0), length(0),
+ remaining(0), onlyChanges_(FALSE), coarse(FALSE),
+ dir(0), changed(FALSE), oldLength_(0), newLength_(0),
+ srcIndex(0), replIndex(0), destIndex(0) {}
+ /**
* Copy constructor.
* @draft ICU 59
*/
@@ -103,6 +163,9 @@ public:
/**
* Advances to the next edit.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
* @return TRUE if there is another edit
* @draft ICU 59
*/
@@ -121,10 +184,86 @@ public:
* if the source index is out of bounds for the source string.
*
* @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
* @return TRUE if the edit for the source index was found
* @draft ICU 59
*/
- UBool findSourceIndex(int32_t i, UErrorCode &errorCode);
+ UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, TRUE, errorCode) == 0;
+ }
+
+ /**
+ * Finds the edit that contains the destination index.
+ * The destination index may be found in a non-change
+ * even if normal iteration would skip non-changes.
+ * Normal iteration can continue from a found edit.
+ *
+ * The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return TRUE if the edit for the destination index was found
+ * @draft ICU 60
+ */
+ UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, FALSE, errorCode) == 0;
+ }
+
+ /**
+ * Returns the destination index corresponding to the given source index.
+ * If the source index is inside a change edit (not at its start),
+ * then the destination index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return destination index; undefined if i is not 0..string length
+ * @draft ICU 60
+ */
+ int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
+
+ /**
+ * Returns the source index corresponding to the given destination index.
+ * If the destination index is inside a change edit (not at its start),
+ * then the source index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return source index; undefined if i is not 0..string length
+ * @draft ICU 60
+ */
+ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
/**
* @return TRUE if this edit replaces oldLength() units with newLength() different ones.
@@ -167,15 +306,22 @@ public:
Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
int32_t readLength(int32_t head);
- void updateIndexes();
+ void updateNextIndexes();
+ void updatePreviousIndexes();
UBool noNext();
UBool next(UBool onlyChanges, UErrorCode &errorCode);
+ UBool previous(UErrorCode &errorCode);
+ /** @return -1: error or i<0; 0: found; 1: i>=string length */
+ int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
const uint16_t *array;
int32_t index, length;
+ // 0 if we are not within compressed equal-length changes.
+ // Otherwise the number of remaining changes, including the current one.
int32_t remaining;
UBool onlyChanges_, coarse;
+ int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
UBool changed;
int32_t oldLength_, newLength_;
int32_t srcIndex, replIndex, destIndex;
@@ -219,9 +365,39 @@ public:
return Iterator(array, length, FALSE, FALSE);
}
+ /**
+ * Merges the two input Edits and appends the result to this object.
+ *
+ * Consider two string transformations (for example, normalization and case mapping)
+ * where each records Edits in addition to writing an output string.<br>
+ * Edits ab reflect how substrings of input string a
+ * map to substrings of intermediate string b.<br>
+ * Edits bc reflect how substrings of intermediate string b
+ * map to substrings of output string c.<br>
+ * This function merges ab and bc such that the additional edits
+ * recorded in this object reflect how substrings of input string a
+ * map to substrings of output string c.
+ *
+ * If unrelated Edits are passed in where the output string of the first
+ * has a different length than the input string of the second,
+ * then a U_ILLEGAL_ARGUMENT_ERROR is reported.
+ *
+ * @param ab reflects how substrings of input string a
+ * map to substrings of intermediate string b.
+ * @param bc reflects how substrings of intermediate string b
+ * map to substrings of output string c.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return *this, with the merged edits appended
+ * @draft ICU 60
+ */
+ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
+
private:
- Edits(const Edits &) = delete;
- Edits &operator=(const Edits &) = delete;
+ void releaseArray() U_NOEXCEPT;
+ Edits &copyArray(const Edits &other);
+ Edits &moveArray(Edits &src) U_NOEXCEPT;
void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
@@ -234,7 +410,8 @@ private:
int32_t capacity;
int32_t length;
int32_t delta;
- UErrorCode errorCode;
+ int32_t numChanges;
+ UErrorCode errorCode_;
uint16_t stackArray[STACK_CAPACITY];
};
diff --git a/deps/icu-small/source/common/unicode/filteredbrk.h b/deps/icu-small/source/common/unicode/filteredbrk.h
index 51bb651fba..a0319bf0a7 100644
--- a/deps/icu-small/source/common/unicode/filteredbrk.h
+++ b/deps/icu-small/source/common/unicode/filteredbrk.h
@@ -55,14 +55,30 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/
static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * This function has been deprecated in favor of createEmptyInstance, which has
+ * identical behavior.
+ * @param status The error code.
+ * @return the new builder
+ * @deprecated ICU 60 use createEmptyInstance instead
+ * @see createEmptyInstance()
+ */
+ static inline FilteredBreakIteratorBuilder *createInstance(UErrorCode &status) {
+ return createEmptyInstance(status);
+ }
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_HIDE_DRAFT_API
/**
* Construct an empty FilteredBreakIteratorBuilder.
* In this state, it will not suppress any segment boundaries.
* @param status The error code.
* @return the new builder
- * @stable ICU 56
+ * @draft ICU 60
*/
- static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
+ static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
+#endif /* U_HIDE_DRAFT_API */
/**
* Suppress a certain string from being the end of a segment.
@@ -89,6 +105,20 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
*/
virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * This function has been deprecated in favor of wrapIteratorWithFilter()
+ * The behavior is identical.
+ * @param adoptBreakIterator the break iterator to adopt
+ * @param status error code
+ * @return the new BreakIterator, owned by the caller.
+ * @deprecated ICU 60 use wrapIteratorWithFilter() instead
+ * @see wrapBreakIteratorWithFilter()
+ */
+ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_HIDE_DRAFT_API
/**
* Wrap (adopt) an existing break iterator in a new filtered instance.
* The resulting BreakIterator is owned by the caller.
@@ -96,12 +126,16 @@ class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
* Note that the adoptBreakIterator is adopted by the new BreakIterator
* and should no longer be used by the caller.
* The FilteredBreakIteratorBuilder may be reused.
+ * This function is an alias for build()
* @param adoptBreakIterator the break iterator to adopt
* @param status error code
* @return the new BreakIterator, owned by the caller.
- * @stable ICU 56
+ * @draft ICU 60
*/
- virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
+ inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
+ return build(adoptBreakIterator, status);
+ }
+#endif /* U_HIDE_DRAFT_API */
protected:
/**
diff --git a/deps/icu-small/source/common/unicode/localpointer.h b/deps/icu-small/source/common/unicode/localpointer.h
index 3ab820188f..e17ee3d886 100644
--- a/deps/icu-small/source/common/unicode/localpointer.h
+++ b/deps/icu-small/source/common/unicode/localpointer.h
@@ -213,7 +213,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
-#if U_HAVE_RVALUE_REFERENCES
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
@@ -222,7 +221,6 @@ public:
LocalPointer(LocalPointer<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
-#endif
/**
* Destructor deletes the object it owns.
* @stable ICU 4.4
@@ -230,7 +228,6 @@ public:
~LocalPointer() {
delete LocalPointerBase<T>::ptr;
}
-#if U_HAVE_RVALUE_REFERENCES
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
@@ -241,7 +238,6 @@ public:
LocalPointer<T> &operator=(LocalPointer<T> &&src) U_NOEXCEPT {
return moveFrom(src);
}
-#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/**
* Move assignment, leaves src with isNull().
@@ -362,7 +358,6 @@ public:
errorCode=U_MEMORY_ALLOCATION_ERROR;
}
}
-#if U_HAVE_RVALUE_REFERENCES
/**
* Move constructor, leaves src with isNull().
* @param src source smart pointer
@@ -371,7 +366,6 @@ public:
LocalArray(LocalArray<T> &&src) U_NOEXCEPT : LocalPointerBase<T>(src.ptr) {
src.ptr=NULL;
}
-#endif
/**
* Destructor deletes the array it owns.
* @stable ICU 4.4
@@ -379,7 +373,6 @@ public:
~LocalArray() {
delete[] LocalPointerBase<T>::ptr;
}
-#if U_HAVE_RVALUE_REFERENCES
/**
* Move assignment operator, leaves src with isNull().
* The behavior is undefined if *this and src are the same object.
@@ -390,7 +383,6 @@ public:
LocalArray<T> &operator=(LocalArray<T> &&src) U_NOEXCEPT {
return moveFrom(src);
}
-#endif
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/**
* Move assignment, leaves src with isNull().
@@ -492,7 +484,6 @@ public:
* @see LocalPointer
* @stable ICU 4.4
*/
-#if U_HAVE_RVALUE_REFERENCES
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
class LocalPointerClassName : public LocalPointerBase<Type> { \
public: \
@@ -526,34 +517,6 @@ public:
ptr=p; \
} \
}
-#else
-#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
- class LocalPointerClassName : public LocalPointerBase<Type> { \
- public: \
- using LocalPointerBase<Type>::operator*; \
- using LocalPointerBase<Type>::operator->; \
- explicit LocalPointerClassName(Type *p=NULL) : LocalPointerBase<Type>(p) {} \
- ~LocalPointerClassName() { closeFunction(ptr); } \
- LocalPointerClassName &moveFrom(LocalPointerClassName &src) U_NOEXCEPT { \
- if (ptr != NULL) { closeFunction(ptr); } \
- LocalPointerBase<Type>::ptr=src.ptr; \
- src.ptr=NULL; \
- return *this; \
- } \
- void swap(LocalPointerClassName &other) U_NOEXCEPT { \
- Type *temp=LocalPointerBase<Type>::ptr; \
- LocalPointerBase<Type>::ptr=other.ptr; \
- other.ptr=temp; \
- } \
- friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) U_NOEXCEPT { \
- p1.swap(p2); \
- } \
- void adoptInstead(Type *p) { \
- if (ptr != NULL) { closeFunction(ptr); } \
- ptr=p; \
- } \
- }
-#endif
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/unicode/locid.h b/deps/icu-small/source/common/unicode/locid.h
index 37a34f7140..c752344f33 100644
--- a/deps/icu-small/source/common/unicode/locid.h
+++ b/deps/icu-small/source/common/unicode/locid.h
@@ -88,7 +88,7 @@ class UnicodeString;
* <P>
* The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
* The Variant codes are vendor and browser-specific.
- * For example, use REVISED for a langauge's revised script orthography, and POSIX for POSIX.
+ * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX.
* Where there are two variants, separate them with an underscore, and
* put the most important one first. For
* example, a Traditional Spanish collation might be referenced, with
diff --git a/deps/icu-small/source/common/unicode/normalizer2.h b/deps/icu-small/source/common/unicode/normalizer2.h
index d326da948a..8a6d713802 100644
--- a/deps/icu-small/source/common/unicode/normalizer2.h
+++ b/deps/icu-small/source/common/unicode/normalizer2.h
@@ -28,12 +28,15 @@
#if !UCONFIG_NO_NORMALIZATION
+#include "unicode/stringpiece.h"
#include "unicode/uniset.h"
#include "unicode/unistr.h"
#include "unicode/unorm2.h"
U_NAMESPACE_BEGIN
+class ByteSink;
+
/**
* Unicode normalization functionality for standard Unicode normalization or
* for using custom mapping tables.
@@ -215,6 +218,35 @@ public:
normalize(const UnicodeString &src,
UnicodeString &dest,
UErrorCode &errorCode) const = 0;
+
+ /**
+ * Normalizes a UTF-8 string and optionally records how source substrings
+ * relate to changed and unchanged result substrings.
+ *
+ * Currently implemented completely only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * Otherwise currently converts to & from UTF-16 and does not support edits.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src Source UTF-8 string.
+ * @param sink A ByteSink to which the normalized UTF-8 result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @draft ICU 60
+ */
+ virtual void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const;
+
/**
* Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string.
@@ -340,6 +372,30 @@ public:
*/
virtual UBool
isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+ /**
+ * Tests if the UTF-8 string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ *
+ * This works for all normalization modes,
+ * but it is currently optimized for UTF-8 only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * For other modes it currently converts to UTF-16 and calls isNormalized().
+ *
+ * @param s UTF-8 input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return TRUE if s is normalized
+ * @draft ICU 60
+ */
+ virtual UBool
+ isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
+
/**
* Tests if the string is normalized.
@@ -479,7 +535,36 @@ public:
virtual UnicodeString &
normalize(const UnicodeString &src,
UnicodeString &dest,
- UErrorCode &errorCode) const;
+ UErrorCode &errorCode) const U_OVERRIDE;
+
+ /**
+ * Normalizes a UTF-8 string and optionally records how source substrings
+ * relate to changed and unchanged result substrings.
+ *
+ * Currently implemented completely only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * Otherwise currently converts to & from UTF-16 and does not support edits.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src Source UTF-8 string.
+ * @param sink A ByteSink to which the normalized UTF-8 result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @draft ICU 60
+ */
+ virtual void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const U_OVERRIDE;
+
/**
* Appends the normalized form of the second string to the first string
* (merging them at the boundary) and returns the first string.
@@ -497,7 +582,7 @@ public:
virtual UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
- UErrorCode &errorCode) const;
+ UErrorCode &errorCode) const U_OVERRIDE;
/**
* Appends the second string to the first string
* (merging them at the boundary) and returns the first string.
@@ -515,7 +600,7 @@ public:
virtual UnicodeString &
append(UnicodeString &first,
const UnicodeString &second,
- UErrorCode &errorCode) const;
+ UErrorCode &errorCode) const U_OVERRIDE;
/**
* Gets the decomposition mapping of c.
@@ -529,7 +614,7 @@ public:
* @stable ICU 4.6
*/
virtual UBool
- getDecomposition(UChar32 c, UnicodeString &decomposition) const;
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/**
* Gets the raw decomposition mapping of c.
@@ -543,7 +628,7 @@ public:
* @stable ICU 49
*/
virtual UBool
- getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const U_OVERRIDE;
/**
* Performs pairwise composition of a & b and returns the composite if there is one.
@@ -556,7 +641,7 @@ public:
* @stable ICU 49
*/
virtual UChar32
- composePair(UChar32 a, UChar32 b) const;
+ composePair(UChar32 a, UChar32 b) const U_OVERRIDE;
/**
* Gets the combining class of c.
@@ -567,7 +652,7 @@ public:
* @stable ICU 49
*/
virtual uint8_t
- getCombiningClass(UChar32 c) const;
+ getCombiningClass(UChar32 c) const U_OVERRIDE;
/**
* Tests if the string is normalized.
@@ -581,7 +666,30 @@ public:
* @stable ICU 4.4
*/
virtual UBool
- isNormalized(const UnicodeString &s, UErrorCode &errorCode) const;
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
+ /**
+ * Tests if the UTF-8 string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ *
+ * This works for all normalization modes,
+ * but it is currently optimized for UTF-8 only for "compose" modes,
+ * such as for NFC, NFKC, and NFKC_Casefold
+ * (UNORM2_COMPOSE and UNORM2_COMPOSE_CONTIGUOUS).
+ * For other modes it currently converts to UTF-16 and calls isNormalized().
+ *
+ * @param s UTF-8 input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return TRUE if s is normalized
+ * @draft ICU 60
+ */
+ virtual UBool
+ isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Tests if the string is normalized.
* For details see the Normalizer2 base class documentation.
@@ -594,7 +702,7 @@ public:
* @stable ICU 4.4
*/
virtual UNormalizationCheckResult
- quickCheck(const UnicodeString &s, UErrorCode &errorCode) const;
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Returns the end of the normalized substring of the input string.
* For details see the Normalizer2 base class documentation.
@@ -607,7 +715,7 @@ public:
* @stable ICU 4.4
*/
virtual int32_t
- spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const;
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE;
/**
* Tests if the character always has a normalization boundary before it,
@@ -617,7 +725,7 @@ public:
* @return TRUE if c has a normalization boundary before it
* @stable ICU 4.4
*/
- virtual UBool hasBoundaryBefore(UChar32 c) const;
+ virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE;
/**
* Tests if the character always has a normalization boundary after it,
@@ -627,7 +735,7 @@ public:
* @return TRUE if c has a normalization boundary after it
* @stable ICU 4.4
*/
- virtual UBool hasBoundaryAfter(UChar32 c) const;
+ virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE;
/**
* Tests if the character is normalization-inert.
@@ -636,7 +744,7 @@ public:
* @return TRUE if c is normalization-inert
* @stable ICU 4.4
*/
- virtual UBool isInert(UChar32 c) const;
+ virtual UBool isInert(UChar32 c) const U_OVERRIDE;
private:
UnicodeString &
normalize(const UnicodeString &src,
@@ -644,6 +752,12 @@ private:
USetSpanCondition spanCondition,
UErrorCode &errorCode) const;
+ void
+ normalizeUTF8(uint32_t options, const char *src, int32_t length,
+ ByteSink &sink, Edits *edits,
+ USetSpanCondition spanCondition,
+ UErrorCode &errorCode) const;
+
UnicodeString &
normalizeSecondAndAppend(UnicodeString &first,
const UnicodeString &second,
diff --git a/deps/icu-small/source/common/unicode/platform.h b/deps/icu-small/source/common/unicode/platform.h
index 23b9464c65..12e2929d24 100644
--- a/deps/icu-small/source/common/unicode/platform.h
+++ b/deps/icu-small/source/common/unicode/platform.h
@@ -132,6 +132,8 @@
#define U_PF_BROWSER_NATIVE_CLIENT 4020
/** Android is based on Linux. @internal */
#define U_PF_ANDROID 4050
+/** Fuchsia is a POSIX-ish platform. @internal */
+#define U_PF_FUCHSIA 4100
/* Maximum value for Linux-based platform is 4499 */
/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
#define U_PF_OS390 9000
@@ -152,6 +154,8 @@
# include <android/api-level.h>
#elif defined(__pnacl__) || defined(__native_client__)
# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
+#elif defined(__Fuchsia__)
+# define U_PLATFORM U_PF_FUCHSIA
#elif defined(linux) || defined(__linux__) || defined(__linux)
# define U_PLATFORM U_PF_LINUX
#elif defined(__APPLE__) && defined(__MACH__)
@@ -193,6 +197,20 @@
#endif
/**
+ * \def UPRV_INCOMPLETE_CPP11_SUPPORT
+ * This switch turns off ICU 60 NumberFormatter code.
+ * By default, this switch is enabled on AIX and z/OS,
+ * which have poor C++11 support.
+ *
+ * NOTE: This switch is intended to be temporary; see #13393.
+ *
+ * @internal
+ */
+#ifndef UPRV_INCOMPLETE_CPP11_SUPPORT
+# define UPRV_INCOMPLETE_CPP11_SUPPORT (U_PLATFORM == U_PF_AIX || U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_SOLARIS )
+#endif
+
+/**
* \def CYGWINMSVC
* Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
* Otherwise undefined.
@@ -330,31 +348,6 @@
# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
#endif
-/**
- * \def U_IOSTREAM_SOURCE
- * Defines what support for C++ streams is available.
- *
- * If U_IOSTREAM_SOURCE is set to 199711, then &lt;iostream&gt; is available
- * (the ISO/IEC C++ FDIS was published in November 1997), and then
- * one should qualify streams using the std namespace in ICU header
- * files.
- * Starting with ICU 49, this is the only supported version.
- *
- * If U_IOSTREAM_SOURCE is set to 198506, then &lt;iostream.h&gt; is
- * available instead (in June 1985 Stroustrup published
- * "An Extensible I/O Facility for C++" at the summer USENIX conference).
- * Starting with ICU 49, this version is not supported any more.
- *
- * If U_IOSTREAM_SOURCE is 0 (or any value less than 199711),
- * then C++ streams are not available and
- * support for them will be silently suppressed in ICU.
- *
- * @internal
- */
-#ifndef U_IOSTREAM_SOURCE
-#define U_IOSTREAM_SOURCE 199711
-#endif
-
/*===========================================================================*/
/** @{ Compiler and environment features */
/*===========================================================================*/
@@ -506,22 +499,6 @@ namespace std {
#endif
/**
- * \def U_HAVE_RVALUE_REFERENCES
- * Set to 1 if the compiler supports rvalue references.
- * C++11 feature, necessary for move constructor & move assignment.
- * @internal
- */
-#ifdef U_HAVE_RVALUE_REFERENCES
- /* Use the predefined value. */
-#elif U_CPLUSPLUS_VERSION >= 11 || __has_feature(cxx_rvalue_references) \
- || defined(__GXX_EXPERIMENTAL_CXX0X__) \
- || (defined(_MSC_VER) && _MSC_VER >= 1600) /* Visual Studio 2010 */
-# define U_HAVE_RVALUE_REFERENCES 1
-#else
-# define U_HAVE_RVALUE_REFERENCES 0
-#endif
-
-/**
* \def U_NOEXCEPT
* "noexcept" if supported, otherwise empty.
* Some code, especially STL containers, uses move semantics of objects only
@@ -871,6 +848,16 @@ namespace std {
# define U_CALLCONV U_EXPORT2
#endif
+/**
+ * \def U_CALLCONV_FPTR
+ * Similar to U_CALLCONV, but only used on function pointers.
+ * @internal
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+# define U_CALLCONV_FPTR U_CALLCONV
+#else
+# define U_CALLCONV_FPTR
+#endif
/* @} */
#endif
diff --git a/deps/icu-small/source/common/unicode/rbbi.h b/deps/icu-small/source/common/unicode/rbbi.h
index d654154008..c3c201dd35 100644
--- a/deps/icu-small/source/common/unicode/rbbi.h
+++ b/deps/icu-small/source/common/unicode/rbbi.h
@@ -31,23 +31,14 @@
#include "unicode/schriter.h"
#include "unicode/uchriter.h"
-
-struct UTrie;
-
U_NAMESPACE_BEGIN
/** @internal */
+class LanguageBreakEngine;
struct RBBIDataHeader;
-class RuleBasedBreakIteratorTables;
-class BreakIterator;
class RBBIDataWrapper;
-class UStack;
-class LanguageBreakEngine;
class UnhandledEngine;
-struct RBBIStateTable;
-
-
-
+class UStack;
/**
*
@@ -96,47 +87,49 @@ private:
*/
RBBIDataWrapper *fData;
- /** Index of the Rule {tag} values for the most recent match.
+ /**
+ * The iteration state - current position, rule status for the current position,
+ * and whether the iterator ran off the end, yielding UBRK_DONE.
+ * Current position is pinned to be 0 < position <= text.length.
+ * Current position is always set to a boundary.
* @internal
*/
- int32_t fLastRuleStatusIndex;
+ /**
+ * The current position of the iterator. Pinned, 0 < fPosition <= text.length.
+ * Never has the value UBRK_DONE (-1).
+ */
+ int32_t fPosition;
/**
- * Rule tag value valid flag.
- * Some iterator operations don't intrinsically set the correct tag value.
- * This flag lets us lazily compute the value if we are ever asked for it.
- * @internal
- */
- UBool fLastStatusIndexValid;
+ * TODO:
+ */
+ int32_t fRuleStatusIndex;
/**
- * Counter for the number of characters encountered with the "dictionary"
- * flag set.
- * @internal
- */
- uint32_t fDictionaryCharCount;
+ * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
+ */
+ UBool fDone;
/**
- * When a range of characters is divided up using the dictionary, the break
- * positions that are discovered are stored here, preventing us from having
- * to use either the dictionary or the state table again until the iterator
- * leaves this range of text. Has the most impact for line breaking.
- * @internal
+ * Cache of previously determined boundary positions.
*/
- int32_t* fCachedBreakPositions;
-
+ public: // TODO: debug, return to private.
+ class BreakCache;
+ BreakCache *fBreakCache;
+ private:
/**
- * The number of elements in fCachedBreakPositions
+ * Counter for the number of characters encountered with the "dictionary"
+ * flag set.
* @internal
*/
- int32_t fNumCachedBreakPositions;
+ uint32_t fDictionaryCharCount;
/**
- * if fCachedBreakPositions is not null, this indicates which item in the
- * cache the current iteration position refers to
- * @internal
+ * Cache of boundary positions within a region of text that has been
+ * sub-divided by dictionary based breaking.
*/
- int32_t fPositionInCache;
+ class DictionaryCache;
+ DictionaryCache *fDictionaryCache;
/**
*
@@ -179,13 +172,11 @@ private:
*/
RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
-
+ /** @internal */
friend class RBBIRuleBuilder;
/** @internal */
friend class BreakIterator;
-
-
public:
/** Default constructor. Creates an empty shell of an iterator, with no
@@ -469,7 +460,10 @@ public:
virtual UBool isBoundary(int32_t offset);
/**
- * Returns the current iteration position.
+ * Returns the current iteration position. Note that UBRK_DONE is never
+ * returned from this function; if iteration has run to the end of a
+ * string, current() will return the length of the string while
+ * next() will return UBRK_DONE).
* @return The current iteration position.
* @stable ICU 2.0
*/
@@ -501,6 +495,7 @@ public:
* Note: this function is not thread safe. It should not have been
* declared const, and the const remains only for compatibility
* reasons. (The function is logically const, but not bit-wise const).
+ * TODO: check this. Probably thread safe now.
* <p>
* @return the status from the break rule that determined the most recently
* returned break position.
@@ -660,46 +655,31 @@ private:
* Common initialization function, used by constructors and bufferClone.
* @internal
*/
- void init();
-
- /**
- * This method backs the iterator back up to a "safe position" in the text.
- * This is a position that we know, without any context, must be a break position.
- * The various calling methods then iterate forward from this safe position to
- * the appropriate position to return. (For more information, see the description
- * of buildBackwardsStateTable() in RuleBasedBreakIterator.Builder.)
- * @param statetable state table used of moving backwards
- * @internal
- */
- int32_t handlePrevious(const RBBIStateTable *statetable);
+ void init(UErrorCode &status);
/**
- * This method is the actual implementation of the next() method. All iteration
- * vectors through here. This method initializes the state machine to state 1
- * and advances through the text character by character until we reach the end
- * of the text or the state machine transitions to state 0. We update our return
- * value every time the state machine passes through a possible end state.
- * @param statetable state table used of moving forwards
+ * Iterate backwards from an arbitrary position in the input text using the Safe Reverse rules.
+ * This locates a "Safe Position" from which the forward break rules
+ * will operate correctly. A Safe Position is not necessarily a boundary itself.
+ *
+ * @param fromPosition the position in the input text to begin the iteration.
* @internal
*/
- int32_t handleNext(const RBBIStateTable *statetable);
-
+ int32_t handlePrevious(int32_t fromPosition);
/**
- * This is the function that actually implements dictionary-based
- * breaking. Covering at least the range from startPos to endPos,
- * it checks for dictionary characters, and if it finds them determines
- * the appropriate object to deal with them. It may cache found breaks in
- * fCachedBreakPositions as it goes. It may well also look at text outside
- * the range startPos to endPos.
- * If going forward, endPos is the normal Unicode break result, and
- * if goind in reverse, startPos is the normal Unicode break result
- * @param startPos The start position of a range of text
- * @param endPos The end position of a range of text
- * @param reverse The call is for the reverse direction
+ * Find a rule-based boundary by running the state machine.
+ * Input
+ * fPosition, the position in the text to begin from.
+ * Output
+ * fPosition: the boundary following the starting position.
+ * fDictionaryCharCount the number of dictionary characters encountered.
+ * If > 0, the segment will be further subdivided
+ * fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
+ *
* @internal
*/
- int32_t checkDictionary(int32_t startPos, int32_t endPos, UBool reverse);
+ int32_t handleNext();
/**
@@ -710,11 +690,14 @@ private:
*/
const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
+ public:
+#ifndef U_HIDE_INTERNAL_API
/**
- * @internal
+ * Debugging function only.
+ * @internal
*/
- void makeRuleStatusValid();
-
+ void dumpCache();
+#endif /* U_HIDE_INTERNAL_API */
};
//------------------------------------------------------------------------------
diff --git a/deps/icu-small/source/common/unicode/simpleformatter.h b/deps/icu-small/source/common/unicode/simpleformatter.h
index 26eae01525..850949caaf 100644
--- a/deps/icu-small/source/common/unicode/simpleformatter.h
+++ b/deps/icu-small/source/common/unicode/simpleformatter.h
@@ -21,6 +21,13 @@
U_NAMESPACE_BEGIN
+// Forward declaration:
+namespace number {
+namespace impl {
+class SimpleModifier;
+}
+}
+
/**
* Formats simple patterns like "{1} was born in {0}".
* Minimal subset of MessageFormat; fast, simple, minimal dependencies.
@@ -286,6 +293,9 @@ private:
UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
int32_t *offsets, int32_t offsetsLength,
UErrorCode &errorCode);
+
+ // Give access to internals to SimpleModifier for number formatting
+ friend class number::impl::SimpleModifier;
};
U_NAMESPACE_END
diff --git a/deps/icu-small/source/common/unicode/stringoptions.h b/deps/icu-small/source/common/unicode/stringoptions.h
new file mode 100644
index 0000000000..f2de96e963
--- /dev/null
+++ b/deps/icu-small/source/common/unicode/stringoptions.h
@@ -0,0 +1,198 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// stringoptions.h
+// created: 2017jun08 Markus W. Scherer
+
+#ifndef __STRINGOPTIONS_H__
+#define __STRINGOPTIONS_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Bit set option bit constants for various string and character processing functions.
+ */
+
+/**
+ * Option value for case folding: Use default mappings defined in CaseFolding.txt.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Titlecase the string as a whole rather than each word.
+ * (Titlecase only the character at index 0, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @draft ICU 60
+ */
+#define U_TITLECASE_WHOLE_STRING 0x20
+
+/**
+ * Titlecase sentences rather than words.
+ * (Titlecase only the first character of each sentence, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @draft ICU 60
+ */
+#define U_TITLECASE_SENTENCES 0x40
+
+#endif // U_HIDE_DRAFT_API
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the character at each
+ * (possibly adjusted) BreakIterator index and
+ * lowercase all other characters up to the next iterator index.
+ * With this option, the other characters will not be modified.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing BreakIterator indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
+ * and titlecase that one.
+ *
+ * Other characters are lowercased.
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Adjust each titlecasing BreakIterator index to the next cased character.
+ * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * This used to be the default index adjustment in ICU.
+ * Since ICU 60, the default index adjustment is to the next character that is
+ * a letter, number, symbol, or private use code point.
+ * (Uncased modifier letters are skipped.)
+ * The difference in behavior is small for word titlecasing,
+ * but the new adjustment is much better for whole-string and sentence titlecasing:
+ * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @draft ICU 60
+ */
+#define U_TITLECASE_ADJUST_TO_CASED 0x400
+
+/**
+ * Option for string transformation functions to not first reset the Edits object.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @draft ICU 60
+ */
+#define U_EDITS_NO_RESET 0x2000
+
+/**
+ * Omit unchanged text when recording how source substrings
+ * relate to changed and unchanged result substrings.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @draft ICU 60
+ */
+#define U_OMIT_UNCHANGED_TEXT 0x4000
+
+#endif // U_HIDE_DRAFT_API
+
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD 0x20000
+
+// Related definitions elsewhere.
+// Options that are not meaningful in the same functions
+// can share the same bits.
+//
+// Public:
+// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+//
+// Internal: (may change or be removed)
+// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
+// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
+// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
+// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
+// ustr_imp.h #define _STRNCMP_STYLE 0x1000
+// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
+
+#endif // __STRINGOPTIONS_H__
diff --git a/deps/icu-small/source/common/unicode/stringtriebuilder.h b/deps/icu-small/source/common/unicode/stringtriebuilder.h
index d1ac003c48..8d2b229413 100644
--- a/deps/icu-small/source/common/unicode/stringtriebuilder.h
+++ b/deps/icu-small/source/common/unicode/stringtriebuilder.h
@@ -256,7 +256,7 @@ protected:
/** @internal */
class FinalValueNode : public Node {
public:
- FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
+ FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
virtual UBool operator==(const Node &other) const;
virtual void write(StringTrieBuilder &builder);
protected:
@@ -276,7 +276,7 @@ protected:
void setValue(int32_t v) {
hasValue=TRUE;
value=v;
- hash=hash*37+v;
+ hash=hash*37u+v;
}
protected:
UBool hasValue;
@@ -290,7 +290,7 @@ protected:
class IntermediateValueNode : public ValueNode {
public:
IntermediateValueNode(int32_t v, Node *nextNode)
- : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
+ : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
virtual void write(StringTrieBuilder &builder);
@@ -307,7 +307,7 @@ protected:
class LinearMatchNode : public ValueNode {
public:
LinearMatchNode(int32_t len, Node *nextNode)
- : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
+ : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
length(len), next(nextNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@@ -342,7 +342,7 @@ protected:
equal[length]=NULL;
values[length]=value;
++length;
- hash=(hash*37+c)*37+value;
+ hash=(hash*37u+c)*37u+value;
}
// Adds a unit which leads to another match node.
void add(int32_t c, Node *node) {
@@ -350,7 +350,7 @@ protected:
equal[length]=node;
values[length]=0;
++length;
- hash=(hash*37+c)*37+hashCode(node);
+ hash=(hash*37u+c)*37u+hashCode(node);
}
protected:
Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value".
@@ -365,8 +365,8 @@ protected:
class SplitBranchNode : public BranchNode {
public:
SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
- : BranchNode(((0x555555*37+middleUnit)*37+
- hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
+ : BranchNode(((0x555555u*37u+middleUnit)*37u+
+ hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
@@ -382,7 +382,7 @@ protected:
class BranchHeadNode : public ValueNode {
public:
BranchHeadNode(int32_t len, Node *subNode)
- : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
+ : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
length(len), next(subNode) {}
virtual UBool operator==(const Node &other) const;
virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
diff --git a/deps/icu-small/source/common/unicode/ubiditransform.h b/deps/icu-small/source/common/unicode/ubiditransform.h
index 724587dddc..627b005ed4 100644
--- a/deps/icu-small/source/common/unicode/ubiditransform.h
+++ b/deps/icu-small/source/common/unicode/ubiditransform.h
@@ -23,8 +23,6 @@
#include "unicode/uchar.h"
#include "unicode/localpointer.h"
-#ifndef U_HIDE_DRAFT_API
-
/**
* \file
* \brief Bidi Transformations
@@ -60,17 +58,17 @@
* @see UBIDI_REORDER_DEFAULT
* @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
* @see UBIDI_REORDER_RUNS_ONLY
- * @draft ICU 58
+ * @stable ICU 58
*/
typedef enum {
/** 0: Constant indicating a logical order.
* This is the default for input text.
- * @draft ICU 58
+ * @stable ICU 58
*/
UBIDI_LOGICAL = 0,
/** 1: Constant indicating a visual order.
* This is a default for output text.
- * @draft ICU 58
+ * @stable ICU 58
*/
UBIDI_VISUAL
} UBiDiOrder;
@@ -83,20 +81,20 @@ typedef enum {
* @see ubidi_setReorderingOptions
* @see ubidi_writeReordered
* @see ubidi_writeReverse
- * @draft ICU 58
+ * @stable ICU 58
*/
typedef enum {
/** 0: Constant indicating that character mirroring should not be
* performed.
* This is the default.
- * @draft ICU 58
+ * @stable ICU 58
*/
UBIDI_MIRRORING_OFF = 0,
/** 1: Constant indicating that character mirroring should be performed.
* This corresponds to calling <code>ubidi_writeReordered</code> or
* <code>ubidi_writeReverse</code> with the
* <code>UBIDI_DO_MIRRORING</code> option bit set.
- * @draft ICU 58
+ * @stable ICU 58
*/
UBIDI_MIRRORING_ON
} UBiDiMirroring;
@@ -104,7 +102,7 @@ typedef enum {
/**
* Forward declaration of the <code>UBiDiTransform</code> structure that stores
* information used by the layout transformation engine.
- * @draft ICU 58
+ * @stable ICU 58
*/
typedef struct UBiDiTransform UBiDiTransform;
@@ -240,9 +238,9 @@ typedef struct UBiDiTransform UBiDiTransform;
* @see UBiDiMirroring
* @see ubidi_setPara
* @see u_shapeArabic
- * @draft ICU 58
+ * @stable ICU 58
*/
-U_DRAFT uint32_t U_EXPORT2
+U_STABLE uint32_t U_EXPORT2
ubiditransform_transform(UBiDiTransform *pBiDiTransform,
const UChar *src, int32_t srcLength,
UChar *dest, int32_t destSize,
@@ -286,16 +284,16 @@ ubiditransform_transform(UBiDiTransform *pBiDiTransform,
* <code>ubiditransform_close()</code>.
*
* @return An empty <code>UBiDiTransform</code> object.
- * @draft ICU 58
+ * @stable ICU 58
*/
-U_DRAFT UBiDiTransform* U_EXPORT2
+U_STABLE UBiDiTransform* U_EXPORT2
ubiditransform_open(UErrorCode *pErrorCode);
/**
* Deallocates the given <code>UBiDiTransform</code> object.
- * @draft ICU 58
+ * @stable ICU 58
*/
-U_DRAFT void U_EXPORT2
+U_STABLE void U_EXPORT2
ubiditransform_close(UBiDiTransform *pBidiTransform);
#if U_SHOW_CPLUSPLUS_API
@@ -309,7 +307,7 @@ U_NAMESPACE_BEGIN
*
* @see LocalPointerBase
* @see LocalPointer
- * @draft ICU 58
+ * @stable ICU 58
*/
U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
@@ -317,5 +315,4 @@ U_NAMESPACE_END
#endif
-#endif /* U_HIDE_DRAFT_API */
#endif
diff --git a/deps/icu-small/source/common/unicode/ubrk.h b/deps/icu-small/source/common/unicode/ubrk.h
index 22a4b99cd6..600328c49c 100644
--- a/deps/icu-small/source/common/unicode/ubrk.h
+++ b/deps/icu-small/source/common/unicode/ubrk.h
@@ -230,7 +230,8 @@ typedef enum USentenceBreakTag {
* @param locale The locale specifying the text-breaking conventions. Note that
* locale keys such as "lb" and "ss" may be used to modify text break behavior,
* see general discussion of BreakIterator C API.
- * @param text The text to be iterated over.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
* @param textLength The number of characters in text, or -1 if null-terminated.
* @param status A UErrorCode to receive any errors.
* @return A UBreakIterator for the specified locale.
diff --git a/deps/icu-small/source/common/unicode/ucasemap.h b/deps/icu-small/source/common/unicode/ucasemap.h
index 18e6c2ba0b..6b253e3d63 100644
--- a/deps/icu-small/source/common/unicode/ucasemap.h
+++ b/deps/icu-small/source/common/unicode/ucasemap.h
@@ -23,6 +23,7 @@
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
+#include "unicode/stringoptions.h"
#include "unicode/ustring.h"
/**
@@ -144,56 +145,6 @@ ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
U_STABLE void U_EXPORT2
ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
-/**
- * Do not lowercase non-initial parts of words when titlecasing.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will titlecase the first cased character
- * of a word and lowercase all other characters.
- * With this option, the other characters will not be modified.
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @stable ICU 3.8
- */
-#define U_TITLECASE_NO_LOWERCASE 0x100
-
-/**
- * Do not adjust the titlecasing indexes from BreakIterator::next() indexes;
- * titlecase exactly the characters at breaks from the iterator.
- * Option bit for titlecasing APIs that take an options bit set.
- *
- * By default, titlecasing will take each break iterator index,
- * adjust it by looking for the next cased character, and titlecase that one.
- * Other characters are lowercased.
- *
- * This follows Unicode 4 & 5 section 3.13 Default Case Operations:
- *
- * R3 toTitlecase(X): Find the word boundaries based on Unicode Standard Annex
- * #29, "Text Boundaries." Between each pair of word boundaries, find the first
- * cased character F. If F exists, map F to default_title(F); then map each
- * subsequent character C to default_lower(C).
- *
- * @see ucasemap_setOptions
- * @see ucasemap_toTitle
- * @see ucasemap_utf8ToTitle
- * @see UnicodeString::toTitle
- * @see U_TITLECASE_NO_LOWERCASE
- * @stable ICU 3.8
- */
-#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
-
-/**
- * Omit unchanged text when case-mapping with Edits.
- *
- * @see CaseMap
- * @see Edits
- * @draft ICU 59
- */
-#define UCASEMAP_OMIT_UNCHANGED_TEXT 0x4000
-
#if !UCONFIG_NO_BREAK_ITERATION
/**
@@ -251,7 +202,7 @@ ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode
* The standard titlecase iterator for the root locale implements the
* algorithm of Unicode TR 21.
*
- * This function uses only the setUText(), first(), next() and close() methods of the
+ * This function uses only the setText(), first() and next() methods of the
* provided break iterator.
*
* The result may be longer or shorter than the original.
diff --git a/deps/icu-small/source/common/unicode/uchar.h b/deps/icu-small/source/common/unicode/uchar.h
index 8174ca23e6..3613374d9a 100644
--- a/deps/icu-small/source/common/unicode/uchar.h
+++ b/deps/icu-small/source/common/unicode/uchar.h
@@ -26,6 +26,7 @@
#define UCHAR_H
#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
U_CDECL_BEGIN
@@ -41,7 +42,7 @@ U_CDECL_BEGIN
* @see u_getUnicodeVersion
* @stable ICU 2.0
*/
-#define U_UNICODE_VERSION "9.0"
+#define U_UNICODE_VERSION "10.0"
/**
* \file
@@ -148,8 +149,9 @@ U_CDECL_BEGIN
*
* The properties APIs are intended to reflect Unicode properties as defined
* in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
- * For details about the properties see http://www.unicode.org/ucd/ .
- * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * For details about the properties see
+ * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
*
* Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
* then properties marked with "new in Unicode 3.2" are not or not fully available.
@@ -427,12 +429,29 @@ typedef enum UProperty {
* @stable ICU 57
*/
UCHAR_EMOJI_MODIFIER_BASE=60,
+ /**
+ * Binary property Emoji_Component.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 60
+ */
+ UCHAR_EMOJI_COMPONENT=61,
+ /**
+ * Binary property Regional_Indicator.
+ * @stable ICU 60
+ */
+ UCHAR_REGIONAL_INDICATOR=62,
+ /**
+ * Binary property Prepended_Concatenation_Mark.
+ * @stable ICU 60
+ */
+ UCHAR_PREPENDED_CONCATENATION_MARK=63,
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the last constant for binary Unicode properties.
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UCHAR_BINARY_LIMIT=61,
+ UCHAR_BINARY_LIMIT,
#endif // U_HIDE_DEPRECATED_API
/** Enumerated property Bidi_Class.
@@ -1647,6 +1666,23 @@ enum UBlockCode {
/** @stable ICU 58 */
UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
+ // New blocks in Unicode 10.0
+
+ /** @stable ICU 60 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/
+ /** @stable ICU 60 */
+ UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
+ /** @stable ICU 60 */
+ UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
+ /** @stable ICU 60 */
+ UBLOCK_NUSHU = 277, /*[1B170]*/
+ /** @stable ICU 60 */
+ UBLOCK_SOYOMBO = 278, /*[11A50]*/
+ /** @stable ICU 60 */
+ UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
+ /** @stable ICU 60 */
+ UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UBlockCode value.
@@ -1654,7 +1690,7 @@ enum UBlockCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- UBLOCK_COUNT = 274,
+ UBLOCK_COUNT = 281,
#endif // U_HIDE_DEPRECATED_API
/** @stable ICU 2.0 */
@@ -1930,6 +1966,19 @@ typedef enum UJoiningGroup {
U_JG_AFRICAN_FEH, /**< @stable ICU 58 */
U_JG_AFRICAN_NOON, /**< @stable ICU 58 */
U_JG_AFRICAN_QAF, /**< @stable ICU 58 */
+
+ U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_JA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_RA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UJoiningGroup value.
@@ -3521,27 +3570,6 @@ u_toupper(UChar32 c);
U_STABLE UChar32 U_EXPORT2
u_totitle(UChar32 c);
-/** Option value for case folding: use default mappings defined in CaseFolding.txt. @stable ICU 2.0 */
-#define U_FOLD_CASE_DEFAULT 0
-
-/**
- * Option value for case folding:
- *
- * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
- * and dotless i appropriately for Turkic languages (tr, az).
- *
- * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
- * are to be included for default mappings and
- * excluded for the Turkic-specific mappings.
- *
- * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
- * are to be excluded for default mappings and
- * included for the Turkic-specific mappings.
- *
- * @stable ICU 2.0
- */
-#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
-
/**
* The given character is mapped to its case folding equivalent according to
* UnicodeData.txt and CaseFolding.txt;
diff --git a/deps/icu-small/source/common/unicode/uclean.h b/deps/icu-small/source/common/unicode/uclean.h
index d0bfcb13a6..3f73af37b8 100644
--- a/deps/icu-small/source/common/unicode/uclean.h
+++ b/deps/icu-small/source/common/unicode/uclean.h
@@ -149,7 +149,7 @@ typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
* @system
*/
U_STABLE void U_EXPORT2
-u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV a, UMemReallocFn * U_CALLCONV r, UMemFreeFn * U_CALLCONV f,
+u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f,
UErrorCode *status);
U_CDECL_END
diff --git a/deps/icu-small/source/common/unicode/uconfig.h b/deps/icu-small/source/common/unicode/uconfig.h
index 25f19a1a61..5e28a146de 100644
--- a/deps/icu-small/source/common/unicode/uconfig.h
+++ b/deps/icu-small/source/common/unicode/uconfig.h
@@ -76,7 +76,7 @@
#endif
/**
- * Determines wheter to enable auto cleanup of libraries.
+ * Determines whether to enable auto cleanup of libraries.
* @internal
*/
#ifndef UCLN_NO_AUTO_CLEANUP
@@ -262,7 +262,8 @@
/**
* \def UCONFIG_NO_CONVERSION
- * ICU will not completely build with this switch turned on.
+ * ICU will not completely build (compiling the tools fails) with this
+ * switch turned on.
* This switch turns off all converters.
*
* You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
@@ -320,7 +321,9 @@
*/
#ifndef UCONFIG_NO_NORMALIZATION
# define UCONFIG_NO_NORMALIZATION 0
-#elif UCONFIG_NO_NORMALIZATION
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
/* common library */
/* ICU 50 CJK dictionary BreakIterator uses normalization */
# define UCONFIG_NO_BREAK_ITERATION 1
diff --git a/deps/icu-small/source/common/unicode/udisplaycontext.h b/deps/icu-small/source/common/unicode/udisplaycontext.h
index c4f6c957e9..398481c681 100644
--- a/deps/icu-small/source/common/unicode/udisplaycontext.h
+++ b/deps/icu-small/source/common/unicode/udisplaycontext.h
@@ -44,14 +44,12 @@ enum UDisplayContextType {
* @stable ICU 54
*/
UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
-#ifndef U_HIDE_DRAFT_API
/**
* Type to retrieve the substitute handling setting, e.g.
* UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
- * @draft ICU 58
+ * @stable ICU 58
*/
UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
-#endif /* U_HIDE_DRAFT_API */
};
/**
* @stable ICU 51
@@ -143,7 +141,6 @@ enum UDisplayContext {
* @stable ICU 54
*/
UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
-#ifndef U_HIDE_DRAFT_API
/**
* ================================
* SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
@@ -154,16 +151,15 @@ enum UDisplayContext {
* A possible setting for SUBSTITUTE_HANDLING:
* Returns a fallback value (e.g., the input code) when no data is available.
* This is the default value.
- * @draft ICU 58
+ * @stable ICU 58
*/
UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
/**
* A possible setting for SUBSTITUTE_HANDLING:
* Returns a null value when no data is available.
- * @draft ICU 58
+ * @stable ICU 58
*/
UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
-#endif /* U_HIDE_DRAFT_API */
};
/**
diff --git a/deps/icu-small/source/common/unicode/unistr.h b/deps/icu-small/source/common/unicode/unistr.h
index e0ab0b9eb7..b99a686126 100644
--- a/deps/icu-small/source/common/unicode/unistr.h
+++ b/deps/icu-small/source/common/unicode/unistr.h
@@ -38,16 +38,6 @@
struct UConverter; // unicode/ucnv.h
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also ustring.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER 0x8000
-#endif
-
#ifndef USTRING_H
/**
* \ingroup ustring_ustrlen
@@ -1730,7 +1720,7 @@ public:
*/
template<typename StringClass>
StringClass &toUTF8String(StringClass &result) const {
- StringByteSink<StringClass> sbs(&result);
+ StringByteSink<StringClass> sbs(&result, length());
toUTF8(sbs);
return result;
}
@@ -1901,7 +1891,6 @@ public:
*/
UnicodeString &fastCopyFrom(const UnicodeString &src);
-#if U_HAVE_RVALUE_REFERENCES
/**
* Move assignment operator, might leave src in bogus state.
* This string will have the same contents and state that the source string had.
@@ -1913,7 +1902,7 @@ public:
UnicodeString &operator=(UnicodeString &&src) U_NOEXCEPT {
return moveFrom(src);
}
-#endif
+
// do not use #ifndef U_HIDE_DRAFT_API for moveFrom, needed by non-draft API
/**
* Move assignment, might leave src in bogus state.
@@ -2786,11 +2775,11 @@ public:
* break iterator is opened.
* Otherwise the provided iterator is set to the string's text.
* @param locale The locale to consider.
+ * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
* @param options Options bit set, see ucasemap_open().
* @return A reference to this.
- * @see U_TITLECASE_NO_LOWERCASE
- * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
- * @see ucasemap_open
* @stable ICU 3.8
*/
UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
@@ -3360,7 +3349,6 @@ public:
*/
UnicodeString(const UnicodeString& that);
-#if U_HAVE_RVALUE_REFERENCES
/**
* Move constructor, might leave src in bogus state.
* This string will have the same contents and state that the source string had.
@@ -3368,7 +3356,6 @@ public:
* @stable ICU 56
*/
UnicodeString(UnicodeString &&src) U_NOEXCEPT;
-#endif
/**
* 'Substring' constructor from tail of source string.
diff --git a/deps/icu-small/source/common/unicode/unorm.h b/deps/icu-small/source/common/unicode/unorm.h
index 1b5af16700..3839de1295 100644
--- a/deps/icu-small/source/common/unicode/unorm.h
+++ b/deps/icu-small/source/common/unicode/unorm.h
@@ -210,7 +210,7 @@ enum {
* the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
unorm_normalize(const UChar *source, int32_t sourceLength,
UNormalizationMode mode, int32_t options,
UChar *result, int32_t resultLength,
@@ -236,7 +236,7 @@ unorm_normalize(const UChar *source, int32_t sourceLength,
* @see unorm_isNormalized
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE UNormalizationCheckResult U_EXPORT2
+U_DEPRECATED UNormalizationCheckResult U_EXPORT2
unorm_quickCheck(const UChar *source, int32_t sourcelength,
UNormalizationMode mode,
UErrorCode *status);
@@ -257,7 +257,7 @@ unorm_quickCheck(const UChar *source, int32_t sourcelength,
* @see unorm_isNormalized
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE UNormalizationCheckResult U_EXPORT2
+U_DEPRECATED UNormalizationCheckResult U_EXPORT2
unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode);
@@ -283,7 +283,7 @@ unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
* @see unorm_quickCheck
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE UBool U_EXPORT2
+U_DEPRECATED UBool U_EXPORT2
unorm_isNormalized(const UChar *src, int32_t srcLength,
UNormalizationMode mode,
UErrorCode *pErrorCode);
@@ -305,7 +305,7 @@ unorm_isNormalized(const UChar *src, int32_t srcLength,
* @see unorm_isNormalized
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE UBool U_EXPORT2
+U_DEPRECATED UBool U_EXPORT2
unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
UNormalizationMode mode, int32_t options,
UErrorCode *pErrorCode);
@@ -383,7 +383,7 @@ unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
*
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
unorm_next(UCharIterator *src,
UChar *dest, int32_t destCapacity,
UNormalizationMode mode, int32_t options,
@@ -416,7 +416,7 @@ unorm_next(UCharIterator *src,
*
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
unorm_previous(UCharIterator *src,
UChar *dest, int32_t destCapacity,
UNormalizationMode mode, int32_t options,
@@ -460,7 +460,7 @@ unorm_previous(UCharIterator *src,
*
* @deprecated ICU 56 Use unorm2.h instead.
*/
-U_STABLE int32_t U_EXPORT2
+U_DEPRECATED int32_t U_EXPORT2
unorm_concatenate(const UChar *left, int32_t leftLength,
const UChar *right, int32_t rightLength,
UChar *dest, int32_t destCapacity,
diff --git a/deps/icu-small/source/common/unicode/unorm2.h b/deps/icu-small/source/common/unicode/unorm2.h
index c6d3494d70..a9bd02f256 100644
--- a/deps/icu-small/source/common/unicode/unorm2.h
+++ b/deps/icu-small/source/common/unicode/unorm2.h
@@ -32,6 +32,7 @@
#include "unicode/utypes.h"
#include "unicode/localpointer.h"
+#include "unicode/stringoptions.h"
#include "unicode/uset.h"
/**
@@ -527,30 +528,6 @@ U_STABLE UBool U_EXPORT2
unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
/**
- * Option bit for unorm_compare:
- * Both input strings are assumed to fulfill FCD conditions.
- * @stable ICU 2.2
- */
-#define UNORM_INPUT_IS_FCD 0x20000
-
-/**
- * Option bit for unorm_compare:
- * Perform case-insensitive comparison.
- * @stable ICU 2.2
- */
-#define U_COMPARE_IGNORE_CASE 0x10000
-
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and ustring.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER 0x8000
-#endif
-
-/**
* Compares two strings for canonical equivalence.
* Further options include case-insensitive comparison and
* code point order (as opposed to code unit order).
diff --git a/deps/icu-small/source/common/unicode/urename.h b/deps/icu-small/source/common/unicode/urename.h
index 21c839abbf..982655c442 100644
--- a/deps/icu-small/source/common/unicode/urename.h
+++ b/deps/icu-small/source/common/unicode/urename.h
@@ -107,6 +107,7 @@
#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup)
+#define checkImpl U_ICU_ENTRY_POINT_RENAME(checkImpl)
#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup)
#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats)
@@ -944,6 +945,7 @@
#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
+#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
@@ -1654,6 +1656,7 @@
#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN)
#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN)
#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale)
+#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator)
#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold)
#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower)
#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle)
diff --git a/deps/icu-small/source/common/unicode/uscript.h b/deps/icu-small/source/common/unicode/uscript.h
index 1420578f02..3ec235d50c 100644
--- a/deps/icu-small/source/common/unicode/uscript.h
+++ b/deps/icu-small/source/common/unicode/uscript.h
@@ -444,6 +444,13 @@ typedef enum UScriptCode {
/** @stable ICU 58 */
USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
+ /** @stable ICU 60 */
+ USCRIPT_MASARAM_GONDI = 175,/* Gonm */
+ /** @stable ICU 60 */
+ USCRIPT_SOYOMBO = 176,/* Soyo */
+ /** @stable ICU 60 */
+ USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */
+
#ifndef U_HIDE_DEPRECATED_API
/**
* One more than the highest normal UScriptCode value.
@@ -451,7 +458,7 @@ typedef enum UScriptCode {
*
* @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
*/
- USCRIPT_CODE_LIMIT = 175
+ USCRIPT_CODE_LIMIT = 178
#endif // U_HIDE_DEPRECATED_API
} UScriptCode;
diff --git a/deps/icu-small/source/common/unicode/ustring.h b/deps/icu-small/source/common/unicode/ustring.h
index 2099ab5913..1ea27126cc 100644
--- a/deps/icu-small/source/common/unicode/ustring.h
+++ b/deps/icu-small/source/common/unicode/ustring.h
@@ -497,16 +497,6 @@ u_strCompare(const UChar *s1, int32_t length1,
U_STABLE int32_t U_EXPORT2
u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
-#ifndef U_COMPARE_CODE_POINT_ORDER
-/* see also unistr.h and unorm.h */
-/**
- * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
- * Compare strings in code point order instead of code unit order.
- * @stable ICU 2.2
- */
-#define U_COMPARE_CODE_POINT_ORDER 0x8000
-#endif
-
/**
* Compare two strings case-insensitively using full case folding.
* This is equivalent to
diff --git a/deps/icu-small/source/common/unicode/utext.h b/deps/icu-small/source/common/unicode/utext.h
index edcb267597..55709d403a 100644
--- a/deps/icu-small/source/common/unicode/utext.h
+++ b/deps/icu-small/source/common/unicode/utext.h
@@ -768,7 +768,7 @@ utext_extract(UText *ut,
*/
#define UTEXT_SETNATIVEINDEX(ut, ix) \
{ int64_t __offset = (ix) - (ut)->chunkNativeStart; \
- if (__offset>=0 && __offset<=(int64_t)(ut)->nativeIndexingLimit) { \
+ if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
(ut)->chunkOffset=(int32_t)__offset; \
} else { \
utext_setNativeIndex((ut), (ix)); } }
diff --git a/deps/icu-small/source/common/unicode/utf.h b/deps/icu-small/source/common/unicode/utf.h
index ab7e9ac96a..aa56980691 100644
--- a/deps/icu-small/source/common/unicode/utf.h
+++ b/deps/icu-small/source/common/unicode/utf.h
@@ -23,9 +23,6 @@
* This file defines macros for checking whether a code point is
* a surrogate or a non-character etc.
*
- * The UChar and UChar32 data types for Unicode code units and code points
- * are defined in umachine.h because they can be machine-dependent.
- *
* If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
* and itself includes utf8.h and utf16.h after some
* common definitions.
@@ -50,11 +47,11 @@
* but are optimized for the much more frequently occurring BMP code points.
*
* umachine.h defines UChar to be an unsigned 16-bit integer.
- * Where available, UChar is defined to be a char16_t
- * or a wchar_t (if that is an unsigned 16-bit type), otherwise uint16_t.
+ * Since ICU 59, ICU uses char16_t in C++, UChar only in C,
+ * and defines UChar=char16_t by default. See the UChar API docs for details.
*
* UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
- * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1).
* Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
* the definition of UChar. For details see the documentation for UChar32 itself.
*
@@ -63,11 +60,20 @@
* For actual Unicode character properties see uchar.h.
*
* By default, string operations must be done with error checking in case
- * a string is not well-formed UTF-16.
- * The macros will detect if a surrogate code unit is unpaired
+ * a string is not well-formed UTF-16 or UTF-8.
+ *
+ * The U16_ macros detect if a surrogate code unit is unpaired
* (lead unit without trail unit or vice versa) and just return the unit itself
* as the code point.
*
+ * The U8_ macros detect illegal byte sequences and return a negative value.
+ * Starting with ICU 60, the observable length of a single illegal byte sequence
+ * skipped by one of these macros follows the Unicode 6+ recommendation
+ * which is consistent with the W3C Encoding Standard.
+ *
+ * There are ..._OR_FFFD versions of both U16_ and U8_ macros
+ * that return U+FFFD for illegal code unit sequences.
+ *
* The regular "safe" macros require that the initial, passed-in string index
* is within bounds. They only check the index when they read more than one
* code unit. This is usually done with code similar to the following loop:
@@ -91,10 +97,7 @@
* The performance differences are much larger here because UTF-8 provides so
* many opportunities for malformed sequences.
* The unsafe UTF-8 macros are entirely implemented inside the macro definitions
- * and are fast, while the safe UTF-8 macros call functions for all but the
- * trivial (ASCII) cases.
- * (ICU 3.6 optimizes U8_NEXT() and U8_APPEND() to handle most other common
- * characters inline as well.)
+ * and are fast, while the safe UTF-8 macros call functions for some complicated cases.
*
* Unlike with UTF-16, malformed sequences cannot be expressed with distinct
* code point values (0..U+10ffff). They are indicated with negative values instead.
@@ -126,8 +129,7 @@
*/
#define U_IS_UNICODE_NONCHAR(c) \
((c)>=0xfdd0 && \
- ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
- (uint32_t)(c)<=0x10ffff)
+ ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff)
/**
* Is c a Unicode code point value (0..U+10ffff)
@@ -148,9 +150,7 @@
*/
#define U_IS_UNICODE_CHAR(c) \
((uint32_t)(c)<0xd800 || \
- ((uint32_t)(c)>0xdfff && \
- (uint32_t)(c)<=0x10ffff && \
- !U_IS_UNICODE_NONCHAR(c)))
+ (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
/**
* Is this code point a BMP code point (U+0000..U+ffff)?
diff --git a/deps/icu-small/source/common/unicode/utf16.h b/deps/icu-small/source/common/unicode/utf16.h
index 0665381612..b9b9c59d3c 100644
--- a/deps/icu-small/source/common/unicode/utf16.h
+++ b/deps/icu-small/source/common/unicode/utf16.h
@@ -185,8 +185,8 @@
*
* The length can be negative for a NUL-terminated string.
*
- * If the offset points to a single, unpaired surrogate, then that itself
- * will be returned as the code point.
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to that unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
@@ -213,6 +213,53 @@
} \
}
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_GET_OR_FFFD(s, start, i, length, c) { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } else { \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+ } \
+}
+
+#endif // U_HIDE_DRAFT_API
+
/* definitions with forward iteration --------------------------------------- */
/**
@@ -253,8 +300,7 @@
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
- * to a single, unpaired lead surrogate, then that itself
- * will be returned as the code point.
+ * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param i string offset, must be i<length
@@ -274,6 +320,44 @@
} \
}
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_NEXT_OR_FFFD(s, i, length, c) { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+}
+
+#endif // U_HIDE_DRAFT_API
+
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
@@ -481,8 +565,7 @@
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
- * trail surrogate, then that itself
- * will be returned as the code point.
+ * trail surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
@@ -502,6 +585,43 @@
} \
}
+#ifndef U_HIDE_DRAFT_API
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @draft ICU 60
+ */
+#define U16_PREV_OR_FFFD(s, start, i, c) { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+}
+
+#endif // U_HIDE_DRAFT_API
+
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h
index 9e56b50474..59b4b25570 100644
--- a/deps/icu-small/source/common/unicode/utf8.h
+++ b/deps/icu-small/source/common/unicode/utf8.h
@@ -41,34 +41,24 @@
/* internal definitions ----------------------------------------------------- */
-
-
/**
* Counts the trail bytes for a UTF-8 lead byte.
- * Returns 0 for 0..0xbf as well as for 0xfe and 0xff.
+ * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
- * Note: Beginning with ICU 50, the implementation uses a multi-condition expression
- * which was shown in 2012 (on x86-64) to compile to fast, branch-free code.
- * leadByte is evaluated multiple times.
- *
- * The pre-ICU 50 implementation used the exported array utf8_countTrailBytes:
- * #define U8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[leadByte])
- * leadByte was evaluated exactly once.
- *
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES(leadByte) \
- ((uint8_t)(leadByte)<0xf0 ? \
- ((uint8_t)(leadByte)>=0xc0)+((uint8_t)(leadByte)>=0xe0) : \
- (uint8_t)(leadByte)<0xfe ? 3+((uint8_t)(leadByte)>=0xf8)+((uint8_t)(leadByte)>=0xfc) : 0)
+ (U8_IS_LEAD(leadByte) ? \
+ ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
/**
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
- * The maximum supported lead byte is 0xf4 corresponding to U+10FFFF.
+ * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
@@ -78,7 +68,7 @@
* @internal
*/
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
- (((leadByte)>=0xc0)+((leadByte)>=0xe0)+((leadByte)>=0xf0))
+ (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
/**
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
@@ -90,6 +80,40 @@
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
/**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
+/**
* Function for handling "next code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
@@ -148,20 +172,21 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
/**
- * Is this code unit (byte) a UTF-8 lead byte?
+ * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
-#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc0)<0x3e)
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
+// 0x32=0xf4-0xc2
/**
- * Is this code unit (byte) a UTF-8 trail byte?
+ * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
-#define U8_IS_TRAIL(c) (((c)&0xc0)==0x80)
+#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
/**
* How many code units (bytes) are used for the UTF-8 encoding
@@ -289,7 +314,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_NEXT_UNSAFE(s, i, c) { \
(c)=(uint8_t)(s)[(i)++]; \
- if((c)>=0x80) { \
+ if(!U8_IS_SINGLE(c)) { \
if((c)<0xe0) { \
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
} else if((c)<0xf0) { \
@@ -325,22 +350,19 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_NEXT(s, i, length, c) { \
(c)=(uint8_t)(s)[(i)++]; \
- if((c)>=0x80) { \
+ if(!U8_IS_SINGLE(c)) { \
uint8_t __t1, __t2; \
- if( /* handle U+1000..U+CFFF inline */ \
- (0xe0<(c) && (c)<=0xec) && \
- (((i)+1)<(length) || (length)<0) && \
- (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
- (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
- ) { \
- /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
- (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+ if( /* handle U+0800..U+FFFF inline */ \
+ (0xe0<=(c) && (c)<0xf0) && \
+ (((i)+1)<(length) || (length)<0) && \
+ U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
+ (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
+ (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
(i)+=2; \
} else if( /* handle U+0080..U+07FF inline */ \
- ((c)<0xe0 && (c)>=0xc2) && \
- ((i)!=(length)) && \
- (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
- ) { \
+ ((c)<0xe0 && (c)>=0xc2) && \
+ ((i)!=(length)) && \
+ (__t1=(s)[i]-0x80)<=0x3f) { \
(c)=(((c)&0x1f)<<6)|__t1; \
++(i); \
} else { \
@@ -376,22 +398,19 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_NEXT_OR_FFFD(s, i, length, c) { \
(c)=(uint8_t)(s)[(i)++]; \
- if((c)>=0x80) { \
+ if(!U8_IS_SINGLE(c)) { \
uint8_t __t1, __t2; \
- if( /* handle U+1000..U+CFFF inline */ \
- (0xe0<(c) && (c)<=0xec) && \
- (((i)+1)<(length) || (length)<0) && \
- (__t1=(uint8_t)((s)[i]-0x80))<=0x3f && \
- (__t2=(uint8_t)((s)[(i)+1]-0x80))<= 0x3f \
- ) { \
- /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
- (c)=(UChar)(((c)<<12)|(__t1<<6)|__t2); \
+ if( /* handle U+0800..U+FFFF inline */ \
+ (0xe0<=(c) && (c)<0xf0) && \
+ (((i)+1)<(length) || (length)<0) && \
+ U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
+ (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
+ (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
(i)+=2; \
} else if( /* handle U+0080..U+07FF inline */ \
- ((c)<0xe0 && (c)>=0xc2) && \
- ((i)!=(length)) && \
- (__t1=(uint8_t)((s)[i]-0x80))<=0x3f \
- ) { \
+ ((c)<0xe0 && (c)>=0xc2) && \
+ ((i)!=(length)) && \
+ (__t1=(s)[i]-0x80)<=0x3f) { \
(c)=(((c)&0x1f)<<6)|__t1; \
++(i); \
} else { \
@@ -476,7 +495,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_FWD_1_UNSAFE(s, i) { \
- (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((uint8_t)(s)[i]); \
+ (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
}
/**
@@ -493,15 +512,24 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
* @stable ICU 2.4
*/
#define U8_FWD_1(s, i, length) { \
- uint8_t __b=(uint8_t)(s)[(i)++]; \
- if(U8_IS_LEAD(__b)) { \
- uint8_t __count=U8_COUNT_TRAIL_BYTES(__b); \
- if((i)+__count>(length) && (length)>=0) { \
- __count=(uint8_t)((length)-(i)); \
- } \
- while(__count>0 && U8_IS_TRAIL((s)[i])) { \
- ++(i); \
- --__count; \
+ uint8_t __b=(s)[(i)++]; \
+ if(U8_IS_LEAD(__b) && (i)!=(length)) { \
+ uint8_t __t1=(s)[i]; \
+ if((0xe0<=__b && __b<0xf0)) { \
+ if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+ } else if(__b<0xe0) { \
+ if(U8_IS_TRAIL(__t1)) { \
+ ++(i); \
+ } \
+ } else /* c>=0xf0 */ { \
+ if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
} \
} \
}
@@ -615,7 +643,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
/* c is a trail byte */ \
(c)&=0x3f; \
for(;;) { \
- __b=(uint8_t)(s)[--(i)]; \
+ __b=(s)[--(i)]; \
if(__b>=0xc0) { \
U8_MASK_LEAD_BYTE(__b, __count); \
(c)|=(UChar32)__b<<__shift; \
@@ -651,7 +679,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_PREV(s, start, i, c) { \
(c)=(uint8_t)(s)[--(i)]; \
- if((c)>=0x80) { \
+ if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
} \
}
@@ -682,7 +710,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
*/
#define U8_PREV_OR_FFFD(s, start, i, c) { \
(c)=(uint8_t)(s)[--(i)]; \
- if((c)>=0x80) { \
+ if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
} \
}
diff --git a/deps/icu-small/source/common/unicode/utf_old.h b/deps/icu-small/source/common/unicode/utf_old.h
index cb229cb301..55c17c01df 100644
--- a/deps/icu-small/source/common/unicode/utf_old.h
+++ b/deps/icu-small/source/common/unicode/utf_old.h
@@ -145,7 +145,22 @@
#ifndef __UTF_OLD_H__
#define __UTF_OLD_H__
-#ifndef U_HIDE_DEPRECATED_API
+/**
+ * \def U_HIDE_OBSOLETE_UTF_OLD_H
+ *
+ * Hides the obsolete definitions in unicode/utf_old.h.
+ * Recommended to be set to 1 at compile time to make sure
+ * the long-deprecated macros are no longer used.
+ *
+ * For reasons for the deprecation see the utf_old.h file comments.
+ *
+ * @internal
+ */
+#ifndef U_HIDE_OBSOLETE_UTF_OLD_H
+# define U_HIDE_OBSOLETE_UTF_OLD_H 0
+#endif
+
+#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H
#include "unicode/utf.h"
#include "unicode/utf8.h"
@@ -1184,6 +1199,6 @@ U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[]; /* U_IMPORT2? */ /*U_I
*/
#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
-#endif /* U_HIDE_DEPRECATED_API */
+#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H
#endif
diff --git a/deps/icu-small/source/common/unicode/uvernum.h b/deps/icu-small/source/common/unicode/uvernum.h
index cae59ad880..ce7dec1553 100644
--- a/deps/icu-small/source/common/unicode/uvernum.h
+++ b/deps/icu-small/source/common/unicode/uvernum.h
@@ -58,7 +58,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION_MAJOR_NUM 59
+#define U_ICU_VERSION_MAJOR_NUM 60
/** The current ICU minor version as an integer.
* This value will change in the subsequent releases of ICU
@@ -84,7 +84,7 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#define U_ICU_VERSION_SUFFIX _59
+#define U_ICU_VERSION_SUFFIX _60
/**
* \def U_DEF2_ICU_ENTRY_POINT_RENAME
@@ -119,24 +119,19 @@
* This value will change in the subsequent releases of ICU
* @stable ICU 2.4
*/
-#define U_ICU_VERSION "59.1"
+#define U_ICU_VERSION "60.1"
/** The current ICU library major/minor version as a string without dots, for library name suffixes.
* This value will change in the subsequent releases of ICU
* @stable ICU 2.6
*/
-#if U_PLATFORM_HAS_WINUWP_API == 0
-#define U_ICU_VERSION_SHORT "59"
-#else
-// U_DISABLE_RENAMING does not impact dat file name
-#define U_ICU_VERSION_SHORT
-#endif /* U_PLATFORM_HAS_WINUWP_API == 0 */
+#define U_ICU_VERSION_SHORT "60"
#ifndef U_HIDE_INTERNAL_API
/** Data version in ICU4C.
* @internal ICU 4.4 Internal Use Only
**/
-#define U_ICU_DATA_VERSION "59.1"
+#define U_ICU_DATA_VERSION "60.1"
#endif /* U_HIDE_INTERNAL_API */
/*===========================================================================