http: disallow two-byte characters in URL path

This commit changes node's handling of two-byte characters in the path component of an http URL. Previously, node would just strip the higher byte when generating the request. So this code: ``` http.request({host: "example.com", port: "80", "/Ｎ"}) ``` would request `http://example.com/.` (`.` is the character for the byte `0x2e`). This is not useful and can in some cases lead to filter evasion. With this change, the code generates `ERR_UNESCAPED_CHARACTERS`, just like space and control characters already did. PR-URL: https://github.com/nodejs/node/pull/16237 Reviewed-By: James M Snell <jasnell@gmail.com> Reviewed-By: Anna Henningsen <anna@addaleax.net> Reviewed-By: Anatoli Papirovski <apapirovski@mac.com> Reviewed-By: Ruben Bridgewater <ruben@bridgewater.de> Reviewed-By: Timothy Gu <timothygu99@gmail.com>
author: Benno Fünfstück <benno.fuenfstueck@gmail.com> 2017-10-16 15:36:32 +0200
committer: Ruben Bridgewater <ruben@bridgewater.de> 2017-12-12 12:30:12 -0200
commit: b961d9fd83c963657c2305ed13ff447573eac852 (patch)
tree: 2bc5cb7212f20bd15a41d5638aa366fa47781321 /lib/_http_client.js
parent: ac25cee2e22ac4c64e4a92b33fe3784648b97072 (diff)
download: android-node-v8-b961d9fd83c963657c2305ed13ff447573eac852.tar.gz
android-node-v8-b961d9fd83c963657c2305ed13ff447573eac852.tar.bz2
android-node-v8-b961d9fd83c963657c2305ed13ff447573eac852.zip
1 files changed, 2 insertions, 34 deletions
diff --git a/lib/_http_client.js b/lib/_http_client.js
index 5b56862800..bdda708493 100644
--- a/lib/_http_client.js
+++ b/lib/_http_client.js
@@ -41,33 +41,7 @@ const { outHeadersKey } = require('internal/http');
 const { nextTick } = require('internal/process/next_tick');
 const errors = require('internal/errors');
 
-// The actual list of disallowed characters in regexp form is more like:
-//    /[^A-Za-z0-9\-._~!$&'()*+,;=/:@]/
-// with an additional rule for ignoring percentage-escaped characters, but
-// that's a) hard to capture in a regular expression that performs well, and
-// b) possibly too restrictive for real-world usage. So instead we restrict the
-// filter to just control characters and spaces.
-//
-// This function is used in the case of small paths, where manual character code
-// checks can greatly outperform the equivalent regexp (tested in V8 5.4).
-function isInvalidPath(s) {
-  var i = 0;
-  if (s.charCodeAt(0) <= 32) return true;
-  if (++i >= s.length) return false;
-  if (s.charCodeAt(1) <= 32) return true;
-  if (++i >= s.length) return false;
-  if (s.charCodeAt(2) <= 32) return true;
-  if (++i >= s.length) return false;
-  if (s.charCodeAt(3) <= 32) return true;
-  if (++i >= s.length) return false;
-  if (s.charCodeAt(4) <= 32) return true;
-  if (++i >= s.length) return false;
-  if (s.charCodeAt(5) <= 32) return true;
-  ++i;
-  for (; i < s.length; ++i)
-    if (s.charCodeAt(i) <= 32) return true;
-  return false;
-}
+const INVALID_PATH_REGEX = /[^\u0021-\u00ff]/;
 
 function validateHost(host, name) {
   if (host != null && typeof host !== 'string') {
@@ -117,13 +91,7 @@ function ClientRequest(options, cb) {
   var path;
   if (options.path) {
     path = String(options.path);
-    var invalidPath;
-    if (path.length <= 39) { // Determined experimentally in V8 5.4
-      invalidPath = isInvalidPath(path);
-    } else {
-      invalidPath = /[\u0000-\u0020]/.test(path);
-    }
-    if (invalidPath)
+    if (INVALID_PATH_REGEX.test(path))
       throw new errors.TypeError('ERR_UNESCAPED_CHARACTERS', 'Request path');
   }
author	Benno Fünfstück <benno.fuenfstueck@gmail.com>	2017-10-16 15:36:32 +0200
committer	Ruben Bridgewater <ruben@bridgewater.de>	2017-12-12 12:30:12 -0200
commit	b961d9fd83c963657c2305ed13ff447573eac852 (patch)
tree	2bc5cb7212f20bd15a41d5638aa366fa47781321 /lib/_http_client.js
parent	ac25cee2e22ac4c64e4a92b33fe3784648b97072 (diff)
download	android-node-v8-b961d9fd83c963657c2305ed13ff447573eac852.tar.gz android-node-v8-b961d9fd83c963657c2305ed13ff447573eac852.tar.bz2 android-node-v8-b961d9fd83c963657c2305ed13ff447573eac852.zip