summaryrefslogtreecommitdiff
path: root/test/parallel/test-icu-stringwidth.js
blob: 66142a8d6811c004854d14f713f1fcf81c459b2d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
// Flags: --expose-internals
'use strict';
const common = require('../common');

const assert = require('assert');
const { getStringWidth } = require('internal/util/inspect');

// Test column width

// Ll (Lowercase Letter): LATIN SMALL LETTER A
assert.strictEqual(getStringWidth('a'), 1);
assert.strictEqual(getStringWidth(String.fromCharCode(0x0061)), 1);
// Lo (Other Letter)
assert.strictEqual(getStringWidth('丁'), 2);
assert.strictEqual(getStringWidth(String.fromCharCode(0x4E01)), 2);
// Surrogate pairs
assert.strictEqual(getStringWidth('\ud83d\udc78\ud83c\udfff'), 4);
assert.strictEqual(getStringWidth('πŸ‘…'), 2);
// Cs (Surrogate): High Surrogate
assert.strictEqual(getStringWidth('\ud83d'), 1);
// Cs (Surrogate): Low Surrogate
assert.strictEqual(getStringWidth('\udc78'), 1);
// Cc (Control): NULL
assert.strictEqual(getStringWidth('\u0000'), 0);
// Cc (Control): BELL
assert.strictEqual(getStringWidth(String.fromCharCode(0x0007)), 0);
// Cc (Control): LINE FEED
assert.strictEqual(getStringWidth('\n'), 0);
// Cf (Format): SOFT HYPHEN
assert.strictEqual(getStringWidth(String.fromCharCode(0x00AD)), 1);
// Cf (Format): LEFT-TO-RIGHT MARK
// Cf (Format): RIGHT-TO-LEFT MARK
assert.strictEqual(getStringWidth('\u200Ef\u200F'), 1);
// Cn (Unassigned): Not a character
assert.strictEqual(getStringWidth(String.fromCharCode(0x10FFEF)), 1);
// Cn (Unassigned): Not a character (but in a CJK range)
assert.strictEqual(getStringWidth(String.fromCharCode(0x3FFEF)), 1);
// Mn (Nonspacing Mark): COMBINING ACUTE ACCENT
assert.strictEqual(getStringWidth(String.fromCharCode(0x0301)), 0);
// Mc (Spacing Mark): BALINESE ADEG ADEG
// Chosen as its Canonical_Combining_Class is not 0, but is not a 0-width
// character.
assert.strictEqual(getStringWidth(String.fromCharCode(0x1B44)), 1);
// Me (Enclosing Mark): COMBINING ENCLOSING CIRCLE
assert.strictEqual(getStringWidth(String.fromCharCode(0x20DD)), 0);

// The following is an emoji sequence with ZWJ (zero-width-joiner). In some
// implementations, it is represented as a single glyph, in other
// implementations as a sequence of individual glyphs. By default, each
// component will be counted individually, since not a lot of systems support
// these fully.
// See https://www.unicode.org/reports/tr51/tr51-16.html#Emoji_ZWJ_Sequences
assert.strictEqual(getStringWidth('πŸ‘©β€πŸ‘©β€πŸ‘§β€πŸ‘§'), 8);
// TODO(BridgeAR): This should have a width of two and six. The heart contains
// the \uFE0F variation selector that indicates that it should be displayed as
// emoji instead of as text. Emojis are all full width characters when not being
// rendered as text.
// https://en.wikipedia.org/wiki/Variation_Selectors_(Unicode_block)
assert.strictEqual(getStringWidth('❀️'), 1);
assert.strictEqual(getStringWidth('πŸ‘©β€β€οΈβ€πŸ‘©'), 5);
// The length of one is correct. It is an emoji treated as text.
assert.strictEqual(getStringWidth('❀'), 1);

// By default, unicode characters whose width is considered ambiguous will
// be considered half-width. For these characters, getStringWidth will return
// 1. In some contexts, however, it is more appropriate to consider them full
// width. By default, the algorithm will assume half width.
assert.strictEqual(getStringWidth('\u01d4'), 1);

// Control chars and combining chars are zero
assert.strictEqual(getStringWidth('\u200E\n\u220A\u20D2'), 1);

// Test that the fast path for ASCII characters yields results consistent
// with the 'slow' path.
for (let i = 0; i < 256; i++) {
  const char = String.fromCharCode(i);
  assert.strictEqual(
    getStringWidth(char + 'πŸŽ‰'),
    getStringWidth(char) + 2);

  if (i < 32 || (i >= 127 && i < 160)) {  // Control character
    assert.strictEqual(getStringWidth(char), 0);
  } else {  // Regular ASCII character
    assert.strictEqual(getStringWidth(char), 1);
  }
}

if (common.hasIntl) {
  const a = 'ν•œκΈ€'.normalize('NFD'); // ᄒᅑᆫ글
  const b = 'ν•œκΈ€'.normalize('NFC'); // ν•œκΈ€
  assert.strictEqual(a.length, 6);
  assert.strictEqual(b.length, 2);
  assert.strictEqual(getStringWidth(a), 4);
  assert.strictEqual(getStringWidth(b), 4);
}