summaryrefslogtreecommitdiff
path: root/lib/internal/readline.js
diff options
context:
space:
mode:
authorJames M Snell <jasnell@gmail.com>2016-10-11 15:20:03 -0700
committerJames M Snell <jasnell@gmail.com>2016-10-25 09:00:45 -0700
commit72547fe28de95be435789716e0fd970e66c58477 (patch)
tree4408622ce8904373ed8c257b39baa91dd83edc12 /lib/internal/readline.js
parent52670fc09e49eab6db76787c160a18c16b645018 (diff)
downloadandroid-node-v8-72547fe28de95be435789716e0fd970e66c58477.tar.gz
android-node-v8-72547fe28de95be435789716e0fd970e66c58477.tar.bz2
android-node-v8-72547fe28de95be435789716e0fd970e66c58477.zip
readline: use icu based string width calculation
Rather than the pseudo-wcwidth impl used currently, use the ICU character properties database to calculate string width and determine if a character is full width or not. This allows the algorithm to correctly identify emoji's as full width, ensures the algorithm will continue to fucntion properly as new unicode codepoints are added, and it's faster. This was originally part of a proposal to add a new unicode module, but has been split out. Refs: https://github.com/nodejs/node/pull/8075 PR-URL: https://github.com/nodejs/node/pull/9040 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Steven R Loomis <srloomis@us.ibm.com>
Diffstat (limited to 'lib/internal/readline.js')
-rw-r--r--lib/internal/readline.js160
1 files changed, 87 insertions, 73 deletions
diff --git a/lib/internal/readline.js b/lib/internal/readline.js
index dbe8775dba..60fe946560 100644
--- a/lib/internal/readline.js
+++ b/lib/internal/readline.js
@@ -1,103 +1,117 @@
'use strict';
-// Regexes used for ansi escape code splitting
+// Regex used for ansi escape code splitting
// eslint-disable-next-line no-control-regex
-const metaKeyCodeReAnywhere = /(?:\x1b)([a-zA-Z0-9])/;
-const functionKeyCodeReAnywhere = new RegExp('(?:\x1b+)(O|N|\\[|\\[\\[)(?:' + [
- '(\\d+)(?:;(\\d+))?([~^$])',
- '(?:M([@ #!a`])(.)(.))', // mouse
- '(?:1;)?(\\d+)?([a-zA-Z])'
-].join('|') + ')');
+// Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js
+// License: MIT, authors: @sindresorhus, Qix-, and arjunmehta
+// Matches all ansi escape code sequences in a string
+const ansi =
+ /[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g;
module.exports = {
emitKeys,
- getStringWidth,
- isFullWidthCodePoint,
stripVTControlCharacters
};
+if (process.binding('config').hasIntl) {
+ const icu = process.binding('icu');
+ module.exports.getStringWidth = function getStringWidth(str, options) {
+ options = options || {};
+ if (!Number.isInteger(str))
+ str = stripVTControlCharacters(String(str));
+ return icu.getStringWidth(str,
+ Boolean(options.ambiguousAsFullWidth),
+ Boolean(options.expandEmojiSequence));
+ };
+ module.exports.isFullWidthCodePoint =
+ function isFullWidthCodePoint(code, options) {
+ if (typeof code !== 'number')
+ return false;
+ return icu.getStringWidth(code, options) === 2;
+ };
+} else {
+ /**
+ * Returns the number of columns required to display the given string.
+ */
+ module.exports.getStringWidth = function getStringWidth(str) {
+ if (Number.isInteger(str))
+ return module.exports.isFullWidthCodePoint(str) ? 2 : 1;
-/**
- * Returns the number of columns required to display the given string.
- */
-function getStringWidth(str) {
- let width = 0;
+ let width = 0;
- str = stripVTControlCharacters(str);
+ str = stripVTControlCharacters(String(str));
- for (var i = 0; i < str.length; i++) {
- const code = str.codePointAt(i);
+ for (var i = 0; i < str.length; i++) {
+ const code = str.codePointAt(i);
- if (code >= 0x10000) { // surrogates
- i++;
- }
+ if (code >= 0x10000) { // surrogates
+ i++;
+ }
- if (isFullWidthCodePoint(code)) {
- width += 2;
- } else {
- width++;
+ if (module.exports.isFullWidthCodePoint(code)) {
+ width += 2;
+ } else {
+ width++;
+ }
}
- }
-
- return width;
-}
+ return width;
+ };
-/**
- * Returns true if the character represented by a given
- * Unicode code point is full-width. Otherwise returns false.
- */
-function isFullWidthCodePoint(code) {
- if (isNaN(code)) {
- return false;
- }
+ /**
+ * Returns true if the character represented by a given
+ * Unicode code point is full-width. Otherwise returns false.
+ */
+ module.exports.isFullWidthCodePoint = function isFullWidthCodePoint(code) {
+ if (!Number.isInteger(code)) {
+ return false;
+ }
- // Code points are derived from:
- // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
- if (code >= 0x1100 && (
- code <= 0x115f || // Hangul Jamo
- 0x2329 === code || // LEFT-POINTING ANGLE BRACKET
- 0x232a === code || // RIGHT-POINTING ANGLE BRACKET
- // CJK Radicals Supplement .. Enclosed CJK Letters and Months
- (0x2e80 <= code && code <= 0x3247 && code !== 0x303f) ||
- // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
- 0x3250 <= code && code <= 0x4dbf ||
- // CJK Unified Ideographs .. Yi Radicals
- 0x4e00 <= code && code <= 0xa4c6 ||
- // Hangul Jamo Extended-A
- 0xa960 <= code && code <= 0xa97c ||
- // Hangul Syllables
- 0xac00 <= code && code <= 0xd7a3 ||
- // CJK Compatibility Ideographs
- 0xf900 <= code && code <= 0xfaff ||
- // Vertical Forms
- 0xfe10 <= code && code <= 0xfe19 ||
- // CJK Compatibility Forms .. Small Form Variants
- 0xfe30 <= code && code <= 0xfe6b ||
- // Halfwidth and Fullwidth Forms
- 0xff01 <= code && code <= 0xff60 ||
- 0xffe0 <= code && code <= 0xffe6 ||
- // Kana Supplement
- 0x1b000 <= code && code <= 0x1b001 ||
- // Enclosed Ideographic Supplement
- 0x1f200 <= code && code <= 0x1f251 ||
- // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
- 0x20000 <= code && code <= 0x3fffd)) {
- return true;
- }
+ // Code points are derived from:
+ // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
+ if (code >= 0x1100 && (
+ code <= 0x115f || // Hangul Jamo
+ 0x2329 === code || // LEFT-POINTING ANGLE BRACKET
+ 0x232a === code || // RIGHT-POINTING ANGLE BRACKET
+ // CJK Radicals Supplement .. Enclosed CJK Letters and Months
+ (0x2e80 <= code && code <= 0x3247 && code !== 0x303f) ||
+ // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
+ 0x3250 <= code && code <= 0x4dbf ||
+ // CJK Unified Ideographs .. Yi Radicals
+ 0x4e00 <= code && code <= 0xa4c6 ||
+ // Hangul Jamo Extended-A
+ 0xa960 <= code && code <= 0xa97c ||
+ // Hangul Syllables
+ 0xac00 <= code && code <= 0xd7a3 ||
+ // CJK Compatibility Ideographs
+ 0xf900 <= code && code <= 0xfaff ||
+ // Vertical Forms
+ 0xfe10 <= code && code <= 0xfe19 ||
+ // CJK Compatibility Forms .. Small Form Variants
+ 0xfe30 <= code && code <= 0xfe6b ||
+ // Halfwidth and Fullwidth Forms
+ 0xff01 <= code && code <= 0xff60 ||
+ 0xffe0 <= code && code <= 0xffe6 ||
+ // Kana Supplement
+ 0x1b000 <= code && code <= 0x1b001 ||
+ // Enclosed Ideographic Supplement
+ 0x1f200 <= code && code <= 0x1f251 ||
+ // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
+ 0x20000 <= code && code <= 0x3fffd)) {
+ return true;
+ }
- return false;
+ return false;
+ };
}
-
/**
* Tries to remove all VT control characters. Use to estimate displayed
* string width. May be buggy due to not running a real state machine
*/
function stripVTControlCharacters(str) {
- str = str.replace(new RegExp(functionKeyCodeReAnywhere.source, 'g'), '');
- return str.replace(new RegExp(metaKeyCodeReAnywhere.source, 'g'), '');
+ return str.replace(ansi, '');
}