readline: use icu based string width calculation

Rather than the pseudo-wcwidth impl used currently, use the ICU character properties database to calculate string width and determine if a character is full width or not. This allows the algorithm to correctly identify emoji's as full width, ensures the algorithm will continue to fucntion properly as new unicode codepoints are added, and it's faster. This was originally part of a proposal to add a new unicode module, but has been split out. Refs: https://github.com/nodejs/node/pull/8075 PR-URL: https://github.com/nodejs/node/pull/9040 Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Steven R Loomis <srloomis@us.ibm.com>
author: James M Snell <jasnell@gmail.com> 2016-10-11 15:20:03 -0700
committer: James M Snell <jasnell@gmail.com> 2016-10-25 09:00:45 -0700
commit: 72547fe28de95be435789716e0fd970e66c58477 (patch)
tree: 4408622ce8904373ed8c257b39baa91dd83edc12 /lib/internal/readline.js
parent: 52670fc09e49eab6db76787c160a18c16b645018 (diff)
download: android-node-v8-72547fe28de95be435789716e0fd970e66c58477.tar.gz
android-node-v8-72547fe28de95be435789716e0fd970e66c58477.tar.bz2
android-node-v8-72547fe28de95be435789716e0fd970e66c58477.zip
1 files changed, 87 insertions, 73 deletions
diff --git a/lib/internal/readline.js b/lib/internal/readline.js
index dbe8775dba..60fe946560 100644
--- a/lib/internal/readline.js
+++ b/lib/internal/readline.js
@@ -1,103 +1,117 @@
 'use strict';
 
-// Regexes used for ansi escape code splitting
+// Regex used for ansi escape code splitting
 // eslint-disable-next-line no-control-regex
-const metaKeyCodeReAnywhere = /(?:\x1b)([a-zA-Z0-9])/;
-const functionKeyCodeReAnywhere = new RegExp('(?:\x1b+)(O|N|\\[|\\[\\[)(?:' + [
-  '(\\d+)(?:;(\\d+))?([~^$])',
-  '(?:M([@ #!a`])(.)(.))', // mouse
-  '(?:1;)?(\\d+)?([a-zA-Z])'
-].join('|') + ')');
+// Adopted from https://github.com/chalk/ansi-regex/blob/master/index.js
+// License: MIT, authors: @sindresorhus, Qix-, and arjunmehta
+// Matches all ansi escape code sequences in a string
+const ansi =
+  /[\u001b\u009b][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g;
 
 
 module.exports = {
   emitKeys,
-  getStringWidth,
-  isFullWidthCodePoint,
   stripVTControlCharacters
 };
 
+if (process.binding('config').hasIntl) {
+  const icu = process.binding('icu');
+  module.exports.getStringWidth = function getStringWidth(str, options) {
+    options = options || {};
+    if (!Number.isInteger(str))
+      str = stripVTControlCharacters(String(str));
+    return icu.getStringWidth(str,
+                              Boolean(options.ambiguousAsFullWidth),
+                              Boolean(options.expandEmojiSequence));
+  };
+  module.exports.isFullWidthCodePoint =
+    function isFullWidthCodePoint(code, options) {
+      if (typeof code !== 'number')
+        return false;
+      return icu.getStringWidth(code, options) === 2;
+    };
+} else {
+  /**
+   * Returns the number of columns required to display the given string.
+   */
+  module.exports.getStringWidth = function getStringWidth(str) {
+    if (Number.isInteger(str))
+      return module.exports.isFullWidthCodePoint(str) ? 2 : 1;
 
-/**
- * Returns the number of columns required to display the given string.
- */
-function getStringWidth(str) {
-  let width = 0;
+    let width = 0;
 
-  str = stripVTControlCharacters(str);
+    str = stripVTControlCharacters(String(str));
 
-  for (var i = 0; i < str.length; i++) {
-    const code = str.codePointAt(i);
+    for (var i = 0; i < str.length; i++) {
+      const code = str.codePointAt(i);
 
-    if (code >= 0x10000) { // surrogates
-      i++;
-    }
+      if (code >= 0x10000) { // surrogates
+        i++;
+      }
 
-    if (isFullWidthCodePoint(code)) {
-      width += 2;
-    } else {
-      width++;
+      if (module.exports.isFullWidthCodePoint(code)) {
+        width += 2;
+      } else {
+        width++;
+      }
     }
-  }
-
-  return width;
-}
 
+    return width;
+  };
 
-/**
- * Returns true if the character represented by a given
- * Unicode code point is full-width. Otherwise returns false.
- */
-function isFullWidthCodePoint(code) {
-  if (isNaN(code)) {
-    return false;
-  }
+  /**
+   * Returns true if the character represented by a given
+   * Unicode code point is full-width. Otherwise returns false.
+   */
+  module.exports.isFullWidthCodePoint = function isFullWidthCodePoint(code) {
+    if (!Number.isInteger(code)) {
+      return false;
+    }
 
-  // Code points are derived from:
-  // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
-  if (code >= 0x1100 && (
-      code <= 0x115f ||  // Hangul Jamo
-      0x2329 === code || // LEFT-POINTING ANGLE BRACKET
-      0x232a === code || // RIGHT-POINTING ANGLE BRACKET
-      // CJK Radicals Supplement .. Enclosed CJK Letters and Months
-      (0x2e80 <= code && code <= 0x3247 && code !== 0x303f) ||
-      // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
-      0x3250 <= code && code <= 0x4dbf ||
-      // CJK Unified Ideographs .. Yi Radicals
-      0x4e00 <= code && code <= 0xa4c6 ||
-      // Hangul Jamo Extended-A
-      0xa960 <= code && code <= 0xa97c ||
-      // Hangul Syllables
-      0xac00 <= code && code <= 0xd7a3 ||
-      // CJK Compatibility Ideographs
-      0xf900 <= code && code <= 0xfaff ||
-      // Vertical Forms
-      0xfe10 <= code && code <= 0xfe19 ||
-      // CJK Compatibility Forms .. Small Form Variants
-      0xfe30 <= code && code <= 0xfe6b ||
-      // Halfwidth and Fullwidth Forms
-      0xff01 <= code && code <= 0xff60 ||
-      0xffe0 <= code && code <= 0xffe6 ||
-      // Kana Supplement
-      0x1b000 <= code && code <= 0x1b001 ||
-      // Enclosed Ideographic Supplement
-      0x1f200 <= code && code <= 0x1f251 ||
-      // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
-      0x20000 <= code && code <= 0x3fffd)) {
-    return true;
-  }
+    // Code points are derived from:
+    // http://www.unicode.org/Public/UNIDATA/EastAsianWidth.txt
+    if (code >= 0x1100 && (
+        code <= 0x115f ||  // Hangul Jamo
+        0x2329 === code || // LEFT-POINTING ANGLE BRACKET
+        0x232a === code || // RIGHT-POINTING ANGLE BRACKET
+        // CJK Radicals Supplement .. Enclosed CJK Letters and Months
+        (0x2e80 <= code && code <= 0x3247 && code !== 0x303f) ||
+        // Enclosed CJK Letters and Months .. CJK Unified Ideographs Extension A
+        0x3250 <= code && code <= 0x4dbf ||
+        // CJK Unified Ideographs .. Yi Radicals
+        0x4e00 <= code && code <= 0xa4c6 ||
+        // Hangul Jamo Extended-A
+        0xa960 <= code && code <= 0xa97c ||
+        // Hangul Syllables
+        0xac00 <= code && code <= 0xd7a3 ||
+        // CJK Compatibility Ideographs
+        0xf900 <= code && code <= 0xfaff ||
+        // Vertical Forms
+        0xfe10 <= code && code <= 0xfe19 ||
+        // CJK Compatibility Forms .. Small Form Variants
+        0xfe30 <= code && code <= 0xfe6b ||
+        // Halfwidth and Fullwidth Forms
+        0xff01 <= code && code <= 0xff60 ||
+        0xffe0 <= code && code <= 0xffe6 ||
+        // Kana Supplement
+        0x1b000 <= code && code <= 0x1b001 ||
+        // Enclosed Ideographic Supplement
+        0x1f200 <= code && code <= 0x1f251 ||
+        // CJK Unified Ideographs Extension B .. Tertiary Ideographic Plane
+        0x20000 <= code && code <= 0x3fffd)) {
+      return true;
+    }
 
-  return false;
+    return false;
+  };
 }
 
-
 /**
  * Tries to remove all VT control characters. Use to estimate displayed
  * string width. May be buggy due to not running a real state machine
  */
 function stripVTControlCharacters(str) {
-  str = str.replace(new RegExp(functionKeyCodeReAnywhere.source, 'g'), '');
-  return str.replace(new RegExp(metaKeyCodeReAnywhere.source, 'g'), '');
+  return str.replace(ansi, '');
 }
author	James M Snell <jasnell@gmail.com>	2016-10-11 15:20:03 -0700
committer	James M Snell <jasnell@gmail.com>	2016-10-25 09:00:45 -0700
commit	72547fe28de95be435789716e0fd970e66c58477 (patch)
tree	4408622ce8904373ed8c257b39baa91dd83edc12 /lib/internal/readline.js
parent	52670fc09e49eab6db76787c160a18c16b645018 (diff)
download	android-node-v8-72547fe28de95be435789716e0fd970e66c58477.tar.gz android-node-v8-72547fe28de95be435789716e0fd970e66c58477.tar.bz2 android-node-v8-72547fe28de95be435789716e0fd970e66c58477.zip