src: revise character width calculation

- Categorize all nonspacing marks (Mn) and enclosing marks (Me) as 0-width - Categorize all spacing marks (Mc) as non-0-width. - Treat soft hyphens (a format character Cf) as non-0-width. - Do not treat all unassigned code points as 0-width; instead, let ICU select the default for that character per UAX #11. - Avoid getting the General_Category of a character multiple times as it is an intensive operation. Refs: http://unicode.org/reports/tr11/ PR-URL: https://github.com/nodejs/node/pull/13918 Reviewed-By: James M Snell <jasnell@gmail.com>
author: Timothy Gu <timothygu99@gmail.com> 2017-06-26 15:19:03 +0800
committer: James M Snell <jasnell@gmail.com> 2017-06-28 21:50:55 -0700
commit: f4b5b704447821dda56069a601595322344248ab (patch)
tree: 47eb0f762c5ee824774d81a3ff3fa937b22ee27e /test/parallel/test-icu-stringwidth.js
parent: 01aeb388000b250ce82036b2a6b8e4a676bc5b5d (diff)
download: android-node-v8-f4b5b704447821dda56069a601595322344248ab.tar.gz
android-node-v8-f4b5b704447821dda56069a601595322344248ab.tar.bz2
android-node-v8-f4b5b704447821dda56069a601595322344248ab.zip
1 files changed, 31 insertions, 1 deletions
diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js
index 80e798b13a..7c8c2e948e 100644
--- a/test/parallel/test-icu-stringwidth.js
+++ b/test/parallel/test-icu-stringwidth.js
@@ -11,13 +11,43 @@ const assert = require('assert');
 const readline = require('internal/readline');
 
 // Test column width
+
+// Ll (Lowercase Letter): LATIN SMALL LETTER A
 assert.strictEqual(readline.getStringWidth('a'), 1);
+assert.strictEqual(readline.getStringWidth(0x0061), 1);
+// Lo (Other Letter)
 assert.strictEqual(readline.getStringWidth('丁'), 2);
+assert.strictEqual(readline.getStringWidth(0x4E01), 2);
+// Surrogate pairs
 assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 2);
 assert.strictEqual(readline.getStringWidth('👅'), 2);
+// Cs (Surrogate): High Surrogate
+assert.strictEqual(readline.getStringWidth('\ud83d'), 1);
+// Cs (Surrogate): Low Surrogate
+assert.strictEqual(readline.getStringWidth('\udc78'), 1);
+// Cc (Control): NULL
+assert.strictEqual(readline.getStringWidth(0), 0);
+// Cc (Control): BELL
+assert.strictEqual(readline.getStringWidth(0x0007), 0);
+// Cc (Control): LINE FEED
 assert.strictEqual(readline.getStringWidth('\n'), 0);
+// Cf (Format): SOFT HYPHEN
+assert.strictEqual(readline.getStringWidth(0x00AD), 1);
+// Cf (Format): LEFT-TO-RIGHT MARK
+// Cf (Format): RIGHT-TO-LEFT MARK
 assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1);
-assert.strictEqual(readline.getStringWidth(97), 1);
+// Cn (Unassigned): Not a character
+assert.strictEqual(readline.getStringWidth(0x10FFEF), 1);
+// Cn (Unassigned): Not a character (but in a CJK range)
+assert.strictEqual(readline.getStringWidth(0x3FFEF), 2);
+// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
+assert.strictEqual(readline.getStringWidth(0x0301), 0);
+// Mc (Spacing Mark): BALINESE ADEG ADEG
+// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
+// character.
+assert.strictEqual(readline.getStringWidth(0x1B44), 1);
+// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
+assert.strictEqual(readline.getStringWidth(0x20DD), 0);
 
 // The following is an emoji sequence. In some implementations, it is
 // represented as a single glyph, in other implementations as a sequence
author	Timothy Gu <timothygu99@gmail.com>	2017-06-26 15:19:03 +0800
committer	James M Snell <jasnell@gmail.com>	2017-06-28 21:50:55 -0700
commit	f4b5b704447821dda56069a601595322344248ab (patch)
tree	47eb0f762c5ee824774d81a3ff3fa937b22ee27e /test/parallel/test-icu-stringwidth.js
parent	01aeb388000b250ce82036b2a6b8e4a676bc5b5d (diff)
download	android-node-v8-f4b5b704447821dda56069a601595322344248ab.tar.gz android-node-v8-f4b5b704447821dda56069a601595322344248ab.tar.bz2 android-node-v8-f4b5b704447821dda56069a601595322344248ab.zip