summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAnna Henningsen <anna@addaleax.net>2019-08-25 03:07:09 +0200
committerRich Trott <rtrott@gmail.com>2019-09-02 21:02:34 -0700
commitab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1 (patch)
tree2c43b17c57c7c2c9a92828f11ee6be4f0a457ab4
parent020c2eaf4bf630652c1e9d3b238579c37036a4aa (diff)
downloadandroid-node-v8-ab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1.tar.gz
android-node-v8-ab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1.tar.bz2
android-node-v8-ab841d5fbab1a9d2f8323d1ae3f71f37c6f636a1.zip
lib: add ASCII fast path to getStringWidth()
A lot of strings that are going to be passed to `getStringWidth()` are ASCII strings, for which the calculation is rather easy and calling into C++ can be skipped. confidence improvement accuracy (*) (**) (***) misc/getstringwidth.js n=100000 type='ascii' *** 328.99 % ±21.73% ±29.25% ±38.77% misc/getstringwidth.js n=100000 type='emojiseq' 2.94 % ±7.66% ±10.19% ±13.26% misc/getstringwidth.js n=100000 type='fullwidth' 4.70 % ±5.64% ±7.50% ±9.76% PR-URL: https://github.com/nodejs/node/pull/29301 Reviewed-By: Gus Caplan <me@gus.host> Reviewed-By: Trivikram Kamat <trivikr.dev@gmail.com> Reviewed-By: Ben Noordhuis <info@bnoordhuis.nl> Reviewed-By: Colin Ihrig <cjihrig@gmail.com> Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Luigi Pinca <luigipinca@gmail.com> Reviewed-By: Minwoo Jung <minwoo@nodesource.com> Reviewed-By: Rich Trott <rtrott@gmail.com>
-rw-r--r--benchmark/misc/getstringwidth.js26
-rw-r--r--lib/internal/readline/utils.js33
-rw-r--r--test/parallel/test-icu-stringwidth.js22
3 files changed, 74 insertions, 7 deletions
diff --git a/benchmark/misc/getstringwidth.js b/benchmark/misc/getstringwidth.js
new file mode 100644
index 0000000000..12f071c60d
--- /dev/null
+++ b/benchmark/misc/getstringwidth.js
@@ -0,0 +1,26 @@
+'use strict';
+
+const common = require('../common.js');
+
+const bench = common.createBenchmark(main, {
+ type: ['ascii', 'mixed', 'emojiseq', 'fullwidth'],
+ n: [10e4]
+}, {
+ flags: ['--expose-internals']
+});
+
+function main({ n, type }) {
+ const { getStringWidth } = require('internal/readline/utils');
+
+ const str = ({
+ ascii: 'foobar'.repeat(100),
+ mixed: 'foo'.repeat(100) + '😀' + 'bar'.repeat(100),
+ emojiseq: '👨‍👨‍👧‍👦👨‍👩‍👦‍👦👨‍👩‍👧‍👧👩‍👩‍👧‍👦'.repeat(10),
+ fullwidth: '你好'.repeat(150)
+ })[type];
+
+ bench.start();
+ for (let j = 0; j < n; j += 1)
+ getStringWidth(str);
+ bench.end(n);
+}
diff --git a/lib/internal/readline/utils.js b/lib/internal/readline/utils.js
index c6cd13a6bd..f72a03bb39 100644
--- a/lib/internal/readline/utils.js
+++ b/lib/internal/readline/utils.js
@@ -34,13 +34,32 @@ if (internalBinding('config').hasIntl) {
const icu = internalBinding('icu');
getStringWidth = function getStringWidth(str, options) {
options = options || {};
- if (!Number.isInteger(str))
- str = stripVTControlCharacters(String(str));
- return icu.getStringWidth(
- str,
- Boolean(options.ambiguousAsFullWidth),
- Boolean(options.expandEmojiSequence)
- );
+ if (Number.isInteger(str)) {
+ // Provide information about the character with code point 'str'.
+ return icu.getStringWidth(
+ str,
+ Boolean(options.ambiguousAsFullWidth),
+ false
+ );
+ }
+ str = stripVTControlCharacters(String(str));
+ let width = 0;
+ for (let i = 0; i < str.length; i++) {
+ // Try to avoid calling into C++ by first handling the ASCII portion of
+ // the string. If it is fully ASCII, we skip the C++ part.
+ const code = str.charCodeAt(i);
+ if (code < 127) {
+ width += code >= 32;
+ continue;
+ }
+ width += icu.getStringWidth(
+ str.slice(i),
+ Boolean(options.ambiguousAsFullWidth),
+ Boolean(options.expandEmojiSequence)
+ );
+ break;
+ }
+ return width;
};
isFullWidthCodePoint =
function isFullWidthCodePoint(code, options) {
diff --git a/test/parallel/test-icu-stringwidth.js b/test/parallel/test-icu-stringwidth.js
index 0620d3af39..48384f916d 100644
--- a/test/parallel/test-icu-stringwidth.js
+++ b/test/parallel/test-icu-stringwidth.js
@@ -69,3 +69,25 @@ assert.strictEqual(
// Control chars and combining chars are zero
assert.strictEqual(readline.getStringWidth('\u200E\n\u220A\u20D2'), 1);
+
+// Test that the fast path for ASCII characters yields results consistent
+// with the 'slow' path.
+for (const ambiguousAsFullWidth of [ false, true ]) {
+ for (let i = 0; i < 256; i++) {
+ const char = String.fromCharCode(i);
+ assert.strictEqual(
+ readline.getStringWidth(i, { ambiguousAsFullWidth }),
+ readline.getStringWidth(char, { ambiguousAsFullWidth }));
+ assert.strictEqual(
+ readline.getStringWidth(char + '🎉', { ambiguousAsFullWidth }),
+ readline.getStringWidth(char, { ambiguousAsFullWidth }) + 2);
+
+ if (i < 32 || (i >= 127 && i < 160)) { // Control character
+ assert.strictEqual(
+ readline.getStringWidth(i, { ambiguousAsFullWidth }), 0);
+ } else if (i < 127) { // Regular ASCII character
+ assert.strictEqual(
+ readline.getStringWidth(i, { ambiguousAsFullWidth }), 1);
+ }
+ }
+}