From a365bb9cb100514efc3f57f7fac03d0679f73ff5 Mon Sep 17 00:00:00 2001 From: Joyee Cheung Date: Sun, 11 Nov 2018 16:44:19 +0800 Subject: benchmark: pre-generate data set for URL benchmarks This patch: - Introduces `common.bakeUrlData` which can be used to pre-generate the data set for the URL benchmarks to loop through instead of looping over a constant. - Add the option to use WPT data in benchmarks for better diversity in the input - Add the option to benchmark URL parsing with base URLs (whatwg only) - Moves the data in `benchmark/fixtures/url-inputs.js` to `benchmark/common.js` PR-URL: https://github.com/nodejs/node/pull/24302 Reviewed-By: Matteo Collina Reviewed-By: James M Snell --- benchmark/common.js | 92 ++++++++++++++++++++++ benchmark/fixtures/url-inputs.js | 30 ------- benchmark/querystring/querystring-parse.js | 2 +- benchmark/url/legacy-vs-whatwg-url-get-prop.js | 44 +++++------ benchmark/url/legacy-vs-whatwg-url-parse.js | 58 ++++++++------ .../url/legacy-vs-whatwg-url-searchparams-parse.js | 2 +- .../legacy-vs-whatwg-url-searchparams-serialize.js | 2 +- benchmark/url/legacy-vs-whatwg-url-serialize.js | 39 +++++---- benchmark/url/url-resolve.js | 2 +- benchmark/url/url-searchparams-sort.js | 1 + benchmark/url/whatwg-url-properties.js | 63 ++++++--------- 11 files changed, 198 insertions(+), 137 deletions(-) delete mode 100644 benchmark/fixtures/url-inputs.js (limited to 'benchmark') diff --git a/benchmark/common.js b/benchmark/common.js index c76831b573..d5e0494c16 100644 --- a/benchmark/common.js +++ b/benchmark/common.js @@ -254,3 +254,95 @@ exports.binding = function(bindingName) { return process.binding(bindingName); } }; + +const urls = { + long: 'http://nodejs.org:89/docs/latest/api/foo/bar/qua/13949281/0f28b/' + + '/5d49/b3020/url.html#test?payload1=true&payload2=false&test=1' + + '&benchmark=3&foo=38.38.011.293&bar=1234834910480&test=19299&3992&' + + 'key=f5c65e1e98fe07e648249ad41e1cfdb0', + short: 'https://nodejs.org/en/blog/', + idn: 'http://你好你好.在线', + auth: 'https://user:pass@example.com/path?search=1', + file: 'file:///foo/bar/test/node.js', + ws: 'ws://localhost:9229/f46db715-70df-43ad-a359-7f9949f39868', + javascript: 'javascript:alert("node is awesome");', + percent: 'https://%E4%BD%A0/foo', + dot: 'https://example.org/./a/../b/./c' +}; +exports.urls = urls; + +const searchParams = { + noencode: 'foo=bar&baz=quux&xyzzy=thud', + multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud', + encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud', + encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d', + encodelast: 'foo=bar&baz=quux&xyzzy=thu%64', + multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', + multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' + + 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', + manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z', + manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&', + altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi' +}; +exports.searchParams = searchParams; + +function getUrlData(withBase) { + const data = require('../test/fixtures/wpt/url/resources/urltestdata.json'); + const result = []; + for (const item of data) { + if (item.failure || !item.input) continue; + if (withBase) { + result.push([item.input, item.base]); + } else if (item.base !== 'about:blank') { + result.push(item.base); + } + } + return result; +} + +exports.urlDataTypes = Object.keys(urls).concat(['wpt']); + +/** + * Generate an array of data for URL benchmarks to use. + * The size of the resulting data set is the original data size * 2 ** `e`. + * The 'wpt' type contains about 400 data points when `withBase` is true, + * and 200 data points when `withBase` is false. + * Other types contain 200 data points with or without base. + * + * @param {string} type Type of the data, 'wpt' or a key of `urls` + * @param {number} e The repetition of the data, as exponent of 2 + * @param {boolean} withBase Whether to include a base URL + * @param {boolean} asUrl Whether to return the results as URL objects + * @return {string[] | string[][] | URL[]} + */ +function bakeUrlData(type, e = 0, withBase = false, asUrl = false) { + let result = []; + if (type === 'wpt') { + result = getUrlData(withBase); + } else if (urls[type]) { + const input = urls[type]; + const item = withBase ? [input, 'about:blank'] : input; + // Roughly the size of WPT URL test data + result = new Array(200).fill(item); + } else { + throw new Error(`Unknown url data type ${type}`); + } + + if (typeof e !== 'number') { + throw new Error(`e must be a number, received ${e}`); + } + + for (let i = 0; i < e; ++i) { + result = result.concat(result); + } + + if (asUrl) { + if (withBase) { + result = result.map(([input, base]) => new URL(input, base)); + } else { + result = result.map((input) => new URL(input)); + } + } + return result; +} +exports.bakeUrlData = bakeUrlData; diff --git a/benchmark/fixtures/url-inputs.js b/benchmark/fixtures/url-inputs.js deleted file mode 100644 index 7b1983f6fa..0000000000 --- a/benchmark/fixtures/url-inputs.js +++ /dev/null @@ -1,30 +0,0 @@ -'use strict'; - -exports.urls = { - long: 'http://nodejs.org:89/docs/latest/api/foo/bar/qua/13949281/0f28b/' + - '/5d49/b3020/url.html#test?payload1=true&payload2=false&test=1' + - '&benchmark=3&foo=38.38.011.293&bar=1234834910480&test=19299&3992&' + - 'key=f5c65e1e98fe07e648249ad41e1cfdb0', - short: 'https://nodejs.org/en/blog/', - idn: 'http://你好你好.在线', - auth: 'https://user:pass@example.com/path?search=1', - file: 'file:///foo/bar/test/node.js', - ws: 'ws://localhost:9229/f46db715-70df-43ad-a359-7f9949f39868', - javascript: 'javascript:alert("node is awesome");', - percent: 'https://%E4%BD%A0/foo', - dot: 'https://example.org/./a/../b/./c' -}; - -exports.searchParams = { - noencode: 'foo=bar&baz=quux&xyzzy=thud', - multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud', - encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud', - encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d', - encodelast: 'foo=bar&baz=quux&xyzzy=thu%64', - multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', - multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' + - 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', - manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z', - manyblankpairs: '&&&&&&&&&&&&&&&&&&&&&&&&', - altspaces: 'foo+bar=baz+quux&xyzzy+thud=quuy+quuz&abc=def+ghi' -}; diff --git a/benchmark/querystring/querystring-parse.js b/benchmark/querystring/querystring-parse.js index db650165eb..ca4dca13bc 100644 --- a/benchmark/querystring/querystring-parse.js +++ b/benchmark/querystring/querystring-parse.js @@ -1,7 +1,7 @@ 'use strict'; const common = require('../common.js'); const querystring = require('querystring'); -const inputs = require('../fixtures/url-inputs.js').searchParams; +const inputs = common.searchParams; const bench = common.createBenchmark(main, { type: Object.keys(inputs), diff --git a/benchmark/url/legacy-vs-whatwg-url-get-prop.js b/benchmark/url/legacy-vs-whatwg-url-get-prop.js index 2cc3ab8c75..59bb4724f4 100644 --- a/benchmark/url/legacy-vs-whatwg-url-get-prop.js +++ b/benchmark/url/legacy-vs-whatwg-url-get-prop.js @@ -3,20 +3,15 @@ const common = require('../common.js'); const url = require('url'); const URL = url.URL; const assert = require('assert'); -const inputs = require('../fixtures/url-inputs.js').urls; const bench = common.createBenchmark(main, { - type: Object.keys(inputs), + type: common.urlDataTypes, method: ['legacy', 'whatwg'], - n: [1e5] + e: [1] }); -// At the time of writing, when using a passed property name to index -// the object, Crankshaft would generate a LoadKeyedGeneric even when it -// remains a constant in the function, so here we must use the literal -// instead to get a LoadNamedField. -function useLegacy(n, input) { - const obj = url.parse(input); +function useLegacy(data) { + const obj = url.parse(data[0]); const noDead = { protocol: obj.protocol, auth: obj.auth, @@ -27,10 +22,12 @@ function useLegacy(n, input) { search: obj.search, hash: obj.hash }; + const len = data.length; // It's necessary to assign the values to an object // to avoid loop invariant code motion. bench.start(); - for (var i = 0; i < n; i += 1) { + for (var i = 0; i < len; i++) { + const obj = data[i]; noDead.protocol = obj.protocol; noDead.auth = obj.auth; noDead.host = obj.host; @@ -40,12 +37,12 @@ function useLegacy(n, input) { noDead.search = obj.search; noDead.hash = obj.hash; } - bench.end(n); + bench.end(len); return noDead; } -function useWHATWG(n, input) { - const obj = new URL(input); +function useWHATWG(data) { + const obj = new URL(data[0]); const noDead = { protocol: obj.protocol, auth: `${obj.username}:${obj.password}`, @@ -56,8 +53,10 @@ function useWHATWG(n, input) { search: obj.search, hash: obj.hash }; + const len = data.length; bench.start(); - for (var i = 0; i < n; i += 1) { + for (var i = 0; i < len; i++) { + const obj = data[i]; noDead.protocol = obj.protocol; noDead.auth = `${obj.username}:${obj.password}`; noDead.host = obj.host; @@ -67,23 +66,22 @@ function useWHATWG(n, input) { noDead.search = obj.search; noDead.hash = obj.hash; } - bench.end(n); + bench.end(len); return noDead; } -function main({ type, n, method }) { - const input = inputs[type]; - if (!input) { - throw new Error(`Unknown input type "${type}"`); - } - +function main({ type, method, e }) { + e = +e; + var data; var noDead; // Avoid dead code elimination. switch (method) { case 'legacy': - noDead = useLegacy(n, input); + data = common.bakeUrlData(type, e, false, false); + noDead = useLegacy(data.map((i) => url.parse(i))); break; case 'whatwg': - noDead = useWHATWG(n, input); + data = common.bakeUrlData(type, e, false, true); + noDead = useWHATWG(data); break; default: throw new Error(`Unknown method "${method}"`); diff --git a/benchmark/url/legacy-vs-whatwg-url-parse.js b/benchmark/url/legacy-vs-whatwg-url-parse.js index 2be55e17cc..e4af2e0b7c 100644 --- a/benchmark/url/legacy-vs-whatwg-url-parse.js +++ b/benchmark/url/legacy-vs-whatwg-url-parse.js @@ -3,47 +3,61 @@ const common = require('../common.js'); const url = require('url'); const URL = url.URL; const assert = require('assert'); -const inputs = require('../fixtures/url-inputs.js').urls; const bench = common.createBenchmark(main, { - type: Object.keys(inputs), - method: ['legacy', 'whatwg'], - n: [1e5] + withBase: ['true', 'false'], + type: common.urlDataTypes, + e: [1], + method: ['legacy', 'whatwg'] }); -function useLegacy(n, input) { - var noDead = url.parse(input); +function useLegacy(data) { + const len = data.length; + var result = url.parse(data[0]); // avoid dead code elimination bench.start(); - for (var i = 0; i < n; i += 1) { - noDead = url.parse(input); + for (var i = 0; i < len; ++i) { + result = url.parse(data[i]); } - bench.end(n); - return noDead; + bench.end(len); + return result; } -function useWHATWG(n, input) { - var noDead = new URL(input); +function useWHATWGWithBase(data) { + const len = data.length; + var result = new URL(data[0][0], data[0][1]); // avoid dead code elimination bench.start(); - for (var i = 0; i < n; i += 1) { - noDead = new URL(input); + for (var i = 0; i < len; ++i) { + const item = data[i]; + result = new URL(item[0], item[1]); } - bench.end(n); - return noDead; + bench.end(len); + return result; } -function main({ type, n, method }) { - const input = inputs[type]; - if (!input) { - throw new Error(`Unknown input type "${type}"`); +function useWHATWGWithoutBase(data) { + const len = data.length; + var result = new URL(data[0]); // avoid dead code elimination + bench.start(); + for (var i = 0; i < len; ++i) { + result = new URL(data[i]); } + bench.end(len); + return result; +} +function main({ e, method, type, withBase }) { + e = +e; + withBase = withBase === 'true'; var noDead; // Avoid dead code elimination. + var data; switch (method) { case 'legacy': - noDead = useLegacy(n, input); + data = common.bakeUrlData(type, e, false, false); + noDead = useLegacy(data); break; case 'whatwg': - noDead = useWHATWG(n, input); + data = common.bakeUrlData(type, e, withBase, false); + noDead = withBase ? useWHATWGWithBase(data) : useWHATWGWithoutBase(data); break; default: throw new Error(`Unknown method ${method}`); diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js index 6b054d0b2a..81b5b6dc16 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-parse.js @@ -2,7 +2,7 @@ const common = require('../common.js'); const { URLSearchParams } = require('url'); const querystring = require('querystring'); -const searchParams = require('../fixtures/url-inputs.js').searchParams; +const searchParams = common.searchParams; const bench = common.createBenchmark(main, { searchParam: Object.keys(searchParams), diff --git a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js index 54fdd95654..f97961decf 100644 --- a/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js +++ b/benchmark/url/legacy-vs-whatwg-url-searchparams-serialize.js @@ -2,7 +2,7 @@ const common = require('../common.js'); const { URLSearchParams } = require('url'); const querystring = require('querystring'); -const searchParams = require('../fixtures/url-inputs.js').searchParams; +const searchParams = common.searchParams; const bench = common.createBenchmark(main, { searchParam: Object.keys(searchParams), diff --git a/benchmark/url/legacy-vs-whatwg-url-serialize.js b/benchmark/url/legacy-vs-whatwg-url-serialize.js index 017ec4328c..e4c821cb2e 100644 --- a/benchmark/url/legacy-vs-whatwg-url-serialize.js +++ b/benchmark/url/legacy-vs-whatwg-url-serialize.js @@ -3,49 +3,48 @@ const common = require('../common.js'); const url = require('url'); const URL = url.URL; const assert = require('assert'); -const inputs = require('../fixtures/url-inputs.js').urls; const bench = common.createBenchmark(main, { - type: Object.keys(inputs), + type: common.urlDataTypes, method: ['legacy', 'whatwg'], - n: [1e5] + e: [1] }); -function useLegacy(n, input, prop) { - const obj = url.parse(input); +function useLegacy(data) { + const obj = url.parse(data[0]); + const len = data.length; var noDead = url.format(obj); bench.start(); - for (var i = 0; i < n; i += 1) { - noDead = url.format(obj); + for (var i = 0; i < len; i++) { + noDead = data[i].toString(); } - bench.end(n); + bench.end(len); return noDead; } -function useWHATWG(n, input, prop) { - const obj = new URL(input); +function useWHATWG(data) { + const obj = new URL(data[0]); + const len = data.length; var noDead = obj.toString(); bench.start(); - for (var i = 0; i < n; i += 1) { - noDead = obj.toString(); + for (var i = 0; i < len; i++) { + noDead = data[i].toString(); } - bench.end(n); + bench.end(len); return noDead; } -function main({ type, n, method }) { - const input = inputs[type]; - if (!input) { - throw new Error(`Unknown input type "${type}"`); - } +function main({ type, e, method }) { + e = +e; + const data = common.bakeUrlData(type, e, false, false); var noDead; // Avoid dead code elimination. switch (method) { case 'legacy': - noDead = useLegacy(n, input); + noDead = useLegacy(data); break; case 'whatwg': - noDead = useWHATWG(n, input); + noDead = useWHATWG(data); break; default: throw new Error(`Unknown method ${method}`); diff --git a/benchmark/url/url-resolve.js b/benchmark/url/url-resolve.js index 48978574ea..bd584c6f60 100644 --- a/benchmark/url/url-resolve.js +++ b/benchmark/url/url-resolve.js @@ -1,7 +1,7 @@ 'use strict'; const common = require('../common.js'); const url = require('url'); -const hrefs = require('../fixtures/url-inputs.js').urls; +const hrefs = common.urls; hrefs.noscheme = 'some.ran/dom/url.thing?oh=yes#whoo'; const paths = { diff --git a/benchmark/url/url-searchparams-sort.js b/benchmark/url/url-searchparams-sort.js index fe152bf823..6720b66dca 100644 --- a/benchmark/url/url-searchparams-sort.js +++ b/benchmark/url/url-searchparams-sort.js @@ -3,6 +3,7 @@ const common = require('../common.js'); const URLSearchParams = require('url').URLSearchParams; const inputs = { + wpt: 'wpt', // to work around tests empty: '', sorted: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z', almostsorted: 'a&b&c&d&e&f&g&i&h&j&k&l&m&n&o&p&q&r&s&t&u&w&v&x&y&z', diff --git a/benchmark/url/whatwg-url-properties.js b/benchmark/url/whatwg-url-properties.js index f526c07f13..6961fec49e 100644 --- a/benchmark/url/whatwg-url-properties.js +++ b/benchmark/url/whatwg-url-properties.js @@ -1,55 +1,42 @@ 'use strict'; const common = require('../common.js'); -const URL = require('url').URL; -const inputs = require('../fixtures/url-inputs.js').urls; const bench = common.createBenchmark(main, { - input: Object.keys(inputs), + withBase: ['true', 'false'], + type: ['wpt'], // Too many combinations - just use WPT by default + e: [1], prop: ['href', 'origin', 'protocol', 'username', 'password', 'host', 'hostname', 'port', - 'pathname', 'search', 'searchParams', 'hash'], - n: [3e5] + 'pathname', 'search', 'searchParams', 'hash'] }); -function setAndGet(n, url, prop, alternative) { - const old = url[prop]; +function setAndGet(data, prop) { + const len = data.length; + var result = data[0][prop]; bench.start(); - for (var i = 0; i < n; i += 1) { - url[prop] = n % 2 === 0 ? alternative : old; // set - url[prop]; // get + for (var i = 0; i < len; ++i) { + result = data[i][prop]; + data[i][prop] = result; } - bench.end(n); + bench.end(len); + return result; } -function get(n, url, prop) { +function get(data, prop) { + const len = data.length; + var result = data[0][prop]; bench.start(); - for (var i = 0; i < n; i += 1) { - url[prop]; // get + for (var i = 0; i < len; ++i) { + result = data[i][prop]; // get } - bench.end(n); + bench.end(len); + return result; } -const alternatives = { - href: 'http://user:pass@foo.bar.com:21/aaa/zzz?l=25#test', - protocol: 'https:', - username: 'user2', - password: 'pass2', - host: 'foo.bar.net:22', - hostname: 'foo.bar.org', - port: '23', - pathname: '/aaa/bbb', - search: '?k=99', - hash: '#abcd' -}; - -function getAlternative(prop) { - return alternatives[prop]; -} - -function main({ n, input, prop }) { - const value = inputs[input]; - const url = new URL(value); - +function main({ e, type, prop, withBase }) { + e = +e; + withBase = withBase === 'true'; + const data = common.bakeUrlData(type, e, withBase, true); switch (prop) { case 'protocol': case 'username': @@ -61,11 +48,11 @@ function main({ n, input, prop }) { case 'search': case 'hash': case 'href': - setAndGet(n, url, prop, getAlternative(prop)); + setAndGet(data, prop); break; case 'origin': case 'searchParams': - get(n, url, prop); + get(data, prop); break; default: throw new Error('Unknown prop'); -- cgit v1.2.3