deps: ICU 61.1 bump

- Update to released ICU 61.1, including: - CLDR 33 (many new languages and data improvements) - Many small API additions, improvements, and bug fixes - note: 'icu::' namespace is no longer used by default (Necessated https://github.com/nodejs/node/pull/18667 ) PR-URL: https://github.com/nodejs/node/pull/19621 Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com>
author: Steven R. Loomis <srloomis@us.ibm.com> 2018-03-26 15:29:02 -0700
committer: Steven R. Loomis <srloomis@us.ibm.com> 2018-04-02 18:18:28 -0700
commit: 64211405dab824a570e52d000891c49415cc42b8 (patch)
tree: 4c196d0e2c19e083db1e124139dd4ba6272fd049 /deps/icu-small/source/common/unicode/utf8.h
parent: 88773af540a36b23a47af0d6c4ce03b8cc3ef9aa (diff)
download: android-node-v8-64211405dab824a570e52d000891c49415cc42b8.tar.gz
android-node-v8-64211405dab824a570e52d000891c49415cc42b8.tar.bz2
android-node-v8-64211405dab824a570e52d000891c49415cc42b8.zip
1 files changed, 106 insertions, 60 deletions
diff --git a/deps/icu-small/source/common/unicode/utf8.h b/deps/icu-small/source/common/unicode/utf8.h
index 59b4b25570..1f07634359 100644
--- a/deps/icu-small/source/common/unicode/utf8.h
+++ b/deps/icu-small/source/common/unicode/utf8.h
@@ -348,29 +348,7 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  * @see U8_NEXT_UNSAFE
  * @stable ICU 2.4
  */
-#define U8_NEXT(s, i, length, c) { \
-    (c)=(uint8_t)(s)[(i)++]; \
-    if(!U8_IS_SINGLE(c)) { \
-        uint8_t __t1, __t2; \
-        if( /* handle U+0800..U+FFFF inline */ \
-                (0xe0<=(c) && (c)<0xf0) && \
-                (((i)+1)<(length) || (length)<0) && \
-                U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
-                (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
-            (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
-            (i)+=2; \
-        } else if( /* handle U+0080..U+07FF inline */ \
-                ((c)<0xe0 && (c)>=0xc2) && \
-                ((i)!=(length)) && \
-                (__t1=(s)[i]-0x80)<=0x3f) { \
-            (c)=(((c)&0x1f)<<6)|__t1; \
-            ++(i); \
-        } else { \
-            /* function call for "complicated" and error cases */ \
-            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -1); \
-        } \
-    } \
-}
+#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
 
 /**
  * Get a code point from a string at a code point boundary offset,
@@ -396,26 +374,33 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  * @see U8_NEXT
  * @stable ICU 51
  */
-#define U8_NEXT_OR_FFFD(s, i, length, c) { \
+#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
+
+/** @internal */
+#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) { \
     (c)=(uint8_t)(s)[(i)++]; \
     if(!U8_IS_SINGLE(c)) { \
-        uint8_t __t1, __t2; \
-        if( /* handle U+0800..U+FFFF inline */ \
-                (0xe0<=(c) && (c)<0xf0) && \
-                (((i)+1)<(length) || (length)<0) && \
-                U8_IS_VALID_LEAD3_AND_T1((c), __t1=(s)[i]) && \
-                (__t2=(s)[(i)+1]-0x80)<=0x3f) { \
-            (c)=(((c)&0xf)<<12)|((__t1&0x3f)<<6)|__t2; \
-            (i)+=2; \
-        } else if( /* handle U+0080..U+07FF inline */ \
-                ((c)<0xe0 && (c)>=0xc2) && \
-                ((i)!=(length)) && \
-                (__t1=(s)[i]-0x80)<=0x3f) { \
-            (c)=(((c)&0x1f)<<6)|__t1; \
-            ++(i); \
+        uint8_t __t = 0; \
+        if((i)!=(length) && \
+            /* fetch/validate/assemble all but last trail byte */ \
+            ((c)>=0xe0 ? \
+                ((c)<0xf0 ?  /* U+0800..U+FFFF except surrogates */ \
+                    U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
+                    (__t&=0x3f, 1) \
+                :  /* U+10000..U+10FFFF */ \
+                    ((c)-=0xf0)<=4 && \
+                    U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
+                    ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
+                    (__t=(s)[i]-0x80)<=0x3f) && \
+                /* valid second-to-last trail byte */ \
+                ((c)=((c)<<6)|__t, ++(i)!=(length)) \
+            :  /* U+0080..U+07FF */ \
+                (c)>=0xc2 && ((c)&=0x1f, 1)) && \
+            /* last trail byte */ \
+            (__t=(s)[i]-0x80)<=0x3f && \
+            ((c)=((c)<<6)|__t, ++(i), 1)) { \
         } else { \
-            /* function call for "complicated" and error cases */ \
-            (c)=utf8_nextCharSafeBody((const uint8_t *)s, &(i), (length), c, -3); \
+            (c)=(sub);  /* ill-formed*/ \
         } \
     } \
 }
@@ -434,21 +419,22 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  * @stable ICU 2.4
  */
 #define U8_APPEND_UNSAFE(s, i, c) { \
-    if((uint32_t)(c)<=0x7f) { \
-        (s)[(i)++]=(uint8_t)(c); \
+    uint32_t __uc=(c); \
+    if(__uc<=0x7f) { \
+        (s)[(i)++]=(uint8_t)__uc; \
     } else { \
-        if((uint32_t)(c)<=0x7ff) { \
-            (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+        if(__uc<=0x7ff) { \
+            (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
         } else { \
-            if((uint32_t)(c)<=0xffff) { \
-                (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+            if(__uc<=0xffff) { \
+                (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
             } else { \
-                (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
-                (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+                (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+                (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
             } \
-            (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+            (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
         } \
-        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
     } \
 }
 
@@ -470,17 +456,23 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  * @stable ICU 2.4
  */
 #define U8_APPEND(s, i, capacity, c, isError) { \
-    if((uint32_t)(c)<=0x7f) { \
-        (s)[(i)++]=(uint8_t)(c); \
-    } else if((uint32_t)(c)<=0x7ff && (i)+1<(capacity)) { \
-        (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
-        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
-    } else if((uint32_t)(c)<=0xd7ff && (i)+2<(capacity)) { \
-        (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
-        (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
-        (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+    uint32_t __uc=(c); \
+    if(__uc<=0x7f) { \
+        (s)[(i)++]=(uint8_t)__uc; \
+    } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+    } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
+        (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+        (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+        (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
     } else { \
-        (i)=utf8_appendCharSafeBody(s, (i), (capacity), c, &(isError)); \
+        (isError)=TRUE; \
     } \
 }
 
@@ -600,12 +592,15 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
  * If the offset points to a UTF-8 trail byte,
  * then the offset is moved backward to the corresponding lead byte.
  * Otherwise, it is not modified.
+ *
  * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
  *
  * @param s const uint8_t * string
  * @param start int32_t starting string offset (usually 0)
  * @param i int32_t string offset, must be start<=i
  * @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
  * @stable ICU 2.4
  */
 #define U8_SET_CP_START(s, start, i) { \
@@ -614,6 +609,57 @@ utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
     } \
 }
 
+#ifndef U_HIDE_DRAFT_API
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * In all other cases (the string ends with a complete sequence, or it is not
+ * possible for any further trail byte to extend the trailing sequence)
+ * the length remains unchanged.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length (usually start<=length)
+ * @see U8_SET_CP_START
+ * @draft ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) \
+    if((length)>(start)) { \
+        uint8_t __b1=s[(length)-1]; \
+        if(U8_IS_SINGLE(__b1)) { \
+            /* common ASCII character */ \
+        } else if(U8_IS_LEAD(__b1)) { \
+            --(length); \
+        } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+            uint8_t __b2=s[(length)-2]; \
+            if(0xe0<=__b2 && __b2<=0xf4) { \
+                if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+                        U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+                    (length)-=2; \
+                } \
+            } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+                uint8_t __b3=s[(length)-3]; \
+                if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+                    (length)-=3; \
+                } \
+            } \
+        } \
+    }
+#endif  // U_HIDE_DRAFT_API
+
 /* definitions with backward iteration -------------------------------------- */
 
 /**
author	Steven R. Loomis <srloomis@us.ibm.com>	2018-03-26 15:29:02 -0700
committer	Steven R. Loomis <srloomis@us.ibm.com>	2018-04-02 18:18:28 -0700
commit	64211405dab824a570e52d000891c49415cc42b8 (patch)
tree	4c196d0e2c19e083db1e124139dd4ba6272fd049 /deps/icu-small/source/common/unicode/utf8.h
parent	88773af540a36b23a47af0d6c4ce03b8cc3ef9aa (diff)
download	android-node-v8-64211405dab824a570e52d000891c49415cc42b8.tar.gz android-node-v8-64211405dab824a570e52d000891c49415cc42b8.tar.bz2 android-node-v8-64211405dab824a570e52d000891c49415cc42b8.zip