diff options
author | Timothy Gu <timothygu99@gmail.com> | 2017-06-26 15:19:03 +0800 |
---|---|---|
committer | James M Snell <jasnell@gmail.com> | 2017-06-28 21:50:55 -0700 |
commit | f4b5b704447821dda56069a601595322344248ab (patch) | |
tree | 47eb0f762c5ee824774d81a3ff3fa937b22ee27e /test/parallel/test-icu-stringwidth.js | |
parent | 01aeb388000b250ce82036b2a6b8e4a676bc5b5d (diff) | |
download | android-node-v8-f4b5b704447821dda56069a601595322344248ab.tar.gz android-node-v8-f4b5b704447821dda56069a601595322344248ab.tar.bz2 android-node-v8-f4b5b704447821dda56069a601595322344248ab.zip |
src: revise character width calculation
- Categorize all nonspacing marks (Mn) and enclosing marks (Me) as
0-width
- Categorize all spacing marks (Mc) as non-0-width.
- Treat soft hyphens (a format character Cf) as non-0-width.
- Do not treat all unassigned code points as 0-width; instead, let ICU
select the default for that character per UAX #11.
- Avoid getting the General_Category of a character multiple times as it
is an intensive operation.
Refs: http://unicode.org/reports/tr11/
PR-URL: https://github.com/nodejs/node/pull/13918
Reviewed-By: James M Snell <jasnell@gmail.com>
Diffstat (limited to 'test/parallel/test-icu-stringwidth.js')
-rw-r--r-- | test/parallel/test-icu-stringwidth.js | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 80e798b13a..7c8c2e948e 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -11,13 +11,43 @@ const assert = require('assert'); const readline = require('internal/readline'); // Test column width + +// Ll (Lowercase Letter): LATIN SMALL LETTER A assert.strictEqual(readline.getStringWidth('a'), 1); +assert.strictEqual(readline.getStringWidth(0x0061), 1); +// Lo (Other Letter) assert.strictEqual(readline.getStringWidth('δΈ'), 2); +assert.strictEqual(readline.getStringWidth(0x4E01), 2); +// Surrogate pairs assert.strictEqual(readline.getStringWidth('\ud83d\udc78\ud83c\udfff'), 2); assert.strictEqual(readline.getStringWidth('π
'), 2); +// Cs (Surrogate): High Surrogate +assert.strictEqual(readline.getStringWidth('\ud83d'), 1); +// Cs (Surrogate): Low Surrogate +assert.strictEqual(readline.getStringWidth('\udc78'), 1); +// Cc (Control): NULL +assert.strictEqual(readline.getStringWidth(0), 0); +// Cc (Control): BELL +assert.strictEqual(readline.getStringWidth(0x0007), 0); +// Cc (Control): LINE FEED assert.strictEqual(readline.getStringWidth('\n'), 0); +// Cf (Format): SOFT HYPHEN +assert.strictEqual(readline.getStringWidth(0x00AD), 1); +// Cf (Format): LEFT-TO-RIGHT MARK +// Cf (Format): RIGHT-TO-LEFT MARK assert.strictEqual(readline.getStringWidth('\u200Ef\u200F'), 1); -assert.strictEqual(readline.getStringWidth(97), 1); +// Cn (Unassigned): Not a character +assert.strictEqual(readline.getStringWidth(0x10FFEF), 1); +// Cn (Unassigned): Not a character (but in a CJK range) +assert.strictEqual(readline.getStringWidth(0x3FFEF), 2); +// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT +assert.strictEqual(readline.getStringWidth(0x0301), 0); +// Mc (Spacing Mark): BALINESE ADEG ADEG +// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width +// character. +assert.strictEqual(readline.getStringWidth(0x1B44), 1); +// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE +assert.strictEqual(readline.getStringWidth(0x20DD), 0); // The following is an emoji sequence. In some implementations, it is // represented as a single glyph, in other implementations as a sequence |