diff options
author | Anna Henningsen <anna@addaleax.net> | 2019-08-25 03:07:09 +0200 |
---|---|---|
committer | Rich Trott <rtrott@gmail.com> | 2019-09-02 21:02:34 -0700 |
commit | ab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1 (patch) | |
tree | 2c43b17c57c7c2c9a92828f11ee6be4f0a457ab4 | |
parent | 020c2eaf4bf630652c1e9d3b238579c37036a4aa (diff) | |
download | android-node-v8-ab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1.tar.gz android-node-v8-ab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1.tar.bz2 android-node-v8-ab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1.zip |
lib: add ASCII fast path to getStringWidth()
A lot of strings that are going to be passed to `getStringWidth()`
are ASCII strings, for which the calculation is rather easy and
calling into C++ can be skipped.
confidence improvement accuracy (*) (**) (***)
misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77%
misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26%
misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76%
PR-URL: https://github.com/nodejs/node/pull/29301
Reviewed-By: Gus Caplan <me@gus.host>
Reviewed-By: Trivikram Kamat <trivikr.dev@gmail.com>
Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl>
Reviewed-By: Colin Ihrig <cjihrig@gmail.com>
Reviewed-By: James M Snell <jasnell@gmail.com>
Reviewed-By: Luigi Pinca <luigipinca@gmail.com>
Reviewed-By: Minwoo Jung <minwoo@nodesource.com>
Reviewed-By: Rich Trott <rtrott@gmail.com>
-rw-r--r-- | benchmark/misc/getstringwidth.js | 26 | ||||
-rw-r--r-- | lib/internal/readline/utils.js | 33 | ||||
-rw-r--r-- | test/parallel/test-icu-stringwidth.js | 22 |
3 files changed, 74 insertions, 7 deletions
diff --git a/benchmark/misc/getstringwidth.js b/benchmark/misc/getstringwidth.js new file mode 100644 index 0000000000..12f071c60d --- /dev/null +++ b/benchmark/misc/getstringwidth.js @@ -0,0 +1,26 @@ +'use strict'; + +const common = require('../common.js'); + +const bench = common.createBenchmark(main, { + type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'], + n: [10e4] +}, { + flags: ['--expose-internals'] +}); + +function main({ n, type }) { + const { getStringWidth } = require('internal/readline/utils'); + + const str = ({ + ascii: 'foobar'.repeat(100), + mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100), + emojiseq: '👨👨👧👦👨👩👦👦👨👩👧👧👩👩👧👦'.repeat(10), + fullwidth: '你好'.repeat(150) + })[type]; + + bench.start(); + for (let j = 0; j < n; j += 1) + getStringWidth(str); + bench.end(n); +} diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js index c6cd13a6bd..f72a03bb39 100644 --- a/lib/internal/readline/utils.js +++ b/lib/internal/readline/utils.js @@ -34,13 +34,32 @@ if (internalBinding('config').hasIntl) { const icu = internalBinding('icu'); getStringWidth = function getStringWidth(str, options) { options = options || {}; - if (!Number.isInteger(str)) - str = stripVTControlCharacters(String(str)); - return icu.getStringWidth( - str, - Boolean(options.ambiguousAsFullWidth), - Boolean(options.expandEmojiSequence) - ); + if (Number.isInteger(str)) { + // Provide information about the character with code point 'str'. + return icu.getStringWidth( + str, + Boolean(options.ambiguousAsFullWidth), + false + ); + } + str = stripVTControlCharacters(String(str)); + let width = 0; + for (let i = 0; i < str.length; i++) { + // Try to avoid calling into C++ by first handling the ASCII portion of + // the string. If it is fully ASCII, we skip the C++ part. + const code = str.charCodeAt(i); + if (code < 127) { + width += code >= 32; + continue; + } + width += icu.getStringWidth( + str.slice(i), + Boolean(options.ambiguousAsFullWidth), + Boolean(options.expandEmojiSequence) + ); + break; + } + return width; }; isFullWidthCodePoint = function isFullWidthCodePoint(code, options) { diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js index 0620d3af39..48384f916d 100644 --- a/test/parallel/test-icu-stringwidth.js +++ b/test/parallel/test-icu-stringwidth.js @@ -69,3 +69,25 @@ assert.strictEqual( // Control chars and combining chars are zero assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1); + +// Test that the fast path for ASCII characters yields results consistent +// with the 'slow' path. +for (const ambiguousAsFullWidth of [ false, true ]) { + for (let i = 0; i < 256; i++) { + const char = String.fromCharCode(i); + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), + readline.getStringWidth(char, { ambiguousAsFullWidth })); + assert.strictEqual( + readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }), + readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2); + + if (i < 32 || (i >= 127 && i < 160)) { // Control character + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), 0); + } else if (i < 127) { // Regular ASCII character + assert.strictEqual( + readline.getStringWidth(i, { ambiguousAsFullWidth }), 1); + } + } +} |