From 4422c35c5e7e79b2443517e51a9975f55c0ea4e9 Mon Sep 17 00:00:00 2001 From: joyeecheung Date: Thu, 1 Dec 2016 19:11:43 -0600 Subject: test,url: improve escaping in url.parse - rename variables in autoEscapeStr so they are easier to understand - comment the escaping algorithm - increase coverage for autoEscapeStr PR-URL: https://github.com/nodejs/node/pull/10083 Reviewed-By: Anna Henningsen --- lib/url.js | 136 ++++++++++++++++++++++++++++++++----------------------------- 1 file changed, 71 insertions(+), 65 deletions(-) (limited to 'lib/url.js') diff --git a/lib/url.js b/lib/url.js index 5f87c0c36c..03a9fb03df 100644 --- a/lib/url.js +++ b/lib/url.js @@ -437,105 +437,111 @@ function validateHostname(self, rest, hostname) { } } +// Automatically escape all delimiters and unwise characters from RFC 2396. +// Also escape single quotes in case of an XSS attack. +// Return undefined if the string doesn't need escaping, +// otherwise return the escaped string. function autoEscapeStr(rest) { - var newRest = ''; - var lastPos = 0; + var escaped = ''; + var lastEscapedPos = 0; for (var i = 0; i < rest.length; ++i) { - // Automatically escape all delimiters and unwise characters from RFC 2396 - // Also escape single quotes in case of an XSS attack + // Manual switching is faster than using a Map/Object. + // `escaped` contains substring up to the last escaped cahracter. switch (rest.charCodeAt(i)) { case 9: // '\t' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%09'; - lastPos = i + 1; + // Concat if there are ordinary characters in the middle. + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%09'; + lastEscapedPos = i + 1; break; case 10: // '\n' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%0A'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%0A'; + lastEscapedPos = i + 1; break; case 13: // '\r' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%0D'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%0D'; + lastEscapedPos = i + 1; break; case 32: // ' ' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%20'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%20'; + lastEscapedPos = i + 1; break; case 34: // '"' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%22'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%22'; + lastEscapedPos = i + 1; break; case 39: // '\'' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%27'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%27'; + lastEscapedPos = i + 1; break; case 60: // '<' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%3C'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%3C'; + lastEscapedPos = i + 1; break; case 62: // '>' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%3E'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%3E'; + lastEscapedPos = i + 1; break; case 92: // '\\' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%5C'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%5C'; + lastEscapedPos = i + 1; break; case 94: // '^' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%5E'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%5E'; + lastEscapedPos = i + 1; break; case 96: // '`' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%60'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%60'; + lastEscapedPos = i + 1; break; case 123: // '{' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%7B'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%7B'; + lastEscapedPos = i + 1; break; case 124: // '|' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%7C'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%7C'; + lastEscapedPos = i + 1; break; case 125: // '}' - if (i - lastPos > 0) - newRest += rest.slice(lastPos, i); - newRest += '%7D'; - lastPos = i + 1; + if (i > lastEscapedPos) + escaped += rest.slice(lastEscapedPos, i); + escaped += '%7D'; + lastEscapedPos = i + 1; break; } } - if (lastPos === 0) + if (lastEscapedPos === 0) // Nothing has been escaped. return; - if (lastPos < rest.length) - return newRest + rest.slice(lastPos); - else - return newRest; + // There are ordinary characters at the end. + if (lastEscapedPos < rest.length) + return escaped + rest.slice(lastEscapedPos); + else // The last character is escaped. + return escaped; } // format a parsed object into a url string -- cgit v1.2.3