summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuben Bridgewater <ruben@bridgewater.de>2018-02-14 23:48:35 +0100
committerRuben Bridgewater <ruben@bridgewater.de>2018-03-02 02:09:25 +0000
commit341770fedf77ff5b8e0c646070029152b58fc746 (patch)
tree3b44a68a906b482d100d439c92166a733c9f8254
parent876836b13526fbfc638178f119773aee27c744af (diff)
downloadandroid-node-v8-341770fedf77ff5b8e0c646070029152b58fc746.tar.gz
android-node-v8-341770fedf77ff5b8e0c646070029152b58fc746.tar.bz2
android-node-v8-341770fedf77ff5b8e0c646070029152b58fc746.zip
lib: improve normalize encoding performance
This focuses on the common case by making sure they are prioritized. It also changes some typeof checks to test for undefined since that is faster and it adds a benchmark. PR-URL: https://github.com/nodejs/node/pull/18790 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Benjamin Gruenbaum <benjamingr@gmail.com> Reviewed-By: Matteo Collina <matteo.collina@gmail.com>
-rw-r--r--benchmark/buffers/buffer-normalize-encoding.js43
-rw-r--r--lib/buffer.js4
-rw-r--r--lib/internal/util.js77
-rw-r--r--lib/string_decoder.js10
4 files changed, 101 insertions, 33 deletions
diff --git a/benchmark/buffers/buffer-normalize-encoding.js b/benchmark/buffers/buffer-normalize-encoding.js
new file mode 100644
index 0000000000..7a820465bd
--- /dev/null
+++ b/benchmark/buffers/buffer-normalize-encoding.js
@@ -0,0 +1,43 @@
+'use strict';
+
+const common = require('../common.js');
+
+const bench = common.createBenchmark(main, {
+ encoding: [
+ 'ascii',
+ 'ASCII',
+ 'base64',
+ 'BASE64',
+ 'binary',
+ 'BINARY',
+ 'hex',
+ 'HEX',
+ 'latin1',
+ 'LATIN1',
+ 'ucs-2',
+ 'UCS-2',
+ 'ucs2',
+ 'UCS2',
+ 'utf-16le',
+ 'UTF-16LE',
+ 'utf-8',
+ 'UTF-8',
+ 'utf16le',
+ 'UTF16LE',
+ 'utf8',
+ 'UTF8'
+ ],
+ n: [1e6]
+}, {
+ flags: ['--expose-internals']
+});
+
+function main({ encoding, n }) {
+ const { normalizeEncoding } = require('internal/util');
+
+ bench.start();
+ for (var i = 0; i < n; i++) {
+ normalizeEncoding(encoding);
+ }
+ bench.end(n);
+}
diff --git a/lib/buffer.js b/lib/buffer.js
index 07bd63c0ae..68cebedcc9 100644
--- a/lib/buffer.js
+++ b/lib/buffer.js
@@ -242,7 +242,7 @@ function assertSize(size) {
err = new errors.RangeError('ERR_INVALID_OPT_VALUE', 'size', size);
}
- if (err) {
+ if (err !== null) {
Error.captureStackTrace(err, assertSize);
throw err;
}
@@ -428,7 +428,7 @@ Buffer.compare = function compare(a, b) {
Buffer.isEncoding = function isEncoding(encoding) {
return typeof encoding === 'string' &&
- typeof normalizeEncoding(encoding) === 'string';
+ normalizeEncoding(encoding) !== undefined;
};
Buffer[kIsEncodingSymbol] = Buffer.isEncoding;
diff --git a/lib/internal/util.js b/lib/internal/util.js
index 2516b84f34..b144063ee5 100644
--- a/lib/internal/util.js
+++ b/lib/internal/util.js
@@ -96,36 +96,59 @@ function assertCrypto() {
throw new errors.Error('ERR_NO_CRYPTO');
}
-// The loop should only run at most twice, retrying with lowercased enc
-// if there is no match in the first pass.
-// We use a loop instead of branching to retry with a helper
-// function in order to avoid the performance hit.
// Return undefined if there is no match.
+// Move the "slow cases" to a separate function to make sure this function gets
+// inlined properly. That prioritizes the common case.
function normalizeEncoding(enc) {
- if (enc == null || enc === '') return 'utf8';
- let retried;
- while (true) {
- switch (enc) {
- case 'utf8':
- case 'utf-8':
- return 'utf8';
- case 'ucs2':
- case 'ucs-2':
- case 'utf16le':
- case 'utf-16le':
+ if (enc == null || enc === 'utf8' || enc === 'utf-8') return 'utf8';
+ return slowCases(enc);
+}
+
+function slowCases(enc) {
+ switch (enc.length) {
+ case 4:
+ if (enc === 'UTF8') return 'utf8';
+ if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
+ enc = `${enc}`.toLowerCase();
+ if (enc === 'utf8') return 'utf8';
+ if (enc === 'ucs2' || enc === 'UCS2') return 'utf16le';
+ break;
+ case 3:
+ if (enc === 'hex' || enc === 'HEX' || `${enc}`.toLowerCase() === 'hex')
+ return 'hex';
+ break;
+ case 5:
+ if (enc === 'ascii') return 'ascii';
+ if (enc === 'ucs-2') return 'utf16le';
+ if (enc === 'UTF-8') return 'utf8';
+ if (enc === 'ASCII') return 'ascii';
+ if (enc === 'UCS-2') return 'utf16le';
+ enc = `${enc}`.toLowerCase();
+ if (enc === 'utf-8') return 'utf8';
+ if (enc === 'ascii') return 'ascii';
+ if (enc === 'usc-2') return 'utf16le';
+ break;
+ case 6:
+ if (enc === 'base64') return 'base64';
+ if (enc === 'latin1' || enc === 'binary') return 'latin1';
+ if (enc === 'BASE64') return 'base64';
+ if (enc === 'LATIN1' || enc === 'BINARY') return 'latin1';
+ enc = `${enc}`.toLowerCase();
+ if (enc === 'base64') return 'base64';
+ if (enc === 'latin1' || enc === 'binary') return 'latin1';
+ break;
+ case 7:
+ if (enc === 'utf16le' || enc === 'UTF16LE' ||
+ `${enc}`.toLowerCase() === 'utf16le')
return 'utf16le';
- case 'latin1':
- case 'binary':
- return 'latin1';
- case 'base64':
- case 'ascii':
- case 'hex':
- return enc;
- default:
- if (retried) return; // undefined
- enc = ('' + enc).toLowerCase();
- retried = true;
- }
+ break;
+ case 8:
+ if (enc === 'utf-16le' || enc === 'UTF-16LE' ||
+ `${enc}`.toLowerCase() === 'utf-16le')
+ return 'utf16le';
+ break;
+ default:
+ if (enc === '') return 'utf8';
}
}
diff --git a/lib/string_decoder.js b/lib/string_decoder.js
index 04d31b2607..18097be0e6 100644
--- a/lib/string_decoder.js
+++ b/lib/string_decoder.js
@@ -43,10 +43,12 @@ const kNativeDecoder = Symbol('kNativeDecoder');
// modules monkey-patch it to support additional encodings
function normalizeEncoding(enc) {
const nenc = internalUtil.normalizeEncoding(enc);
- if (typeof nenc !== 'string' &&
- (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc)))
- throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
- return nenc || enc;
+ if (nenc === undefined) {
+ if (Buffer.isEncoding === isEncoding || !Buffer.isEncoding(enc))
+ throw new errors.TypeError('ERR_UNKNOWN_ENCODING', enc);
+ return enc;
+ }
+ return nenc;
}
const encodingsMap = {};