diff options
author | Jeremy Selier <jeremy@jolicloud.com> | 2011-06-13 14:43:16 +0200 |
---|---|---|
committer | isaacs <i@izs.me> | 2011-07-06 13:17:50 -0700 |
commit | 2a848fa7279002259a3e651223ce6f3230ca22d7 (patch) | |
tree | 8e6ff6fbea3a22091537d966d3057dbc05a00632 /lib/url.js | |
parent | 08a334fa45e8e303b718226f7384b5c0cef43c19 (diff) | |
download | android-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.tar.gz android-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.tar.bz2 android-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.zip |
Close #1149 IDNA and Punycode support in url.parse
Using @bnoordhuis's punycode lib.
Close #1174 also
Diffstat (limited to 'lib/url.js')
-rw-r--r-- | lib/url.js | 54 |
1 files changed, 44 insertions, 10 deletions
diff --git a/lib/url.js b/lib/url.js index 8b01c8548f..ed90e5cad1 100644 --- a/lib/url.js +++ b/lib/url.js @@ -19,6 +19,8 @@ // OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE // USE OR OTHER DEALINGS IN THE SOFTWARE. +var punycode = require('punycode'); + exports.parse = urlParse; exports.resolve = urlResolve; exports.resolveObject = urlResolveObject; @@ -183,24 +185,56 @@ function urlParse(url, parseQueryString, slashesDenoteHost) { var part = hostparts[i]; if (!part) continue; if (!part.match(hostnamePartPattern)) { - var validParts = hostparts.slice(0, i); - var notHost = hostparts.slice(i + 1); - var bit = part.match(hostnamePartStart); - if (bit) { - validParts.push(bit[1]); - notHost.unshift(bit[2]); + var newpart = ''; + for (var j = 0, k = part.length; j < k; j++) { + if (part.charCodeAt(j) > 127) { + // we replace non-ASCII char with a temporary placeholder + // we need this to make sure size of hostname is not + // broken by replacing non-ASCII by nothing + newpart += 'x'; + } else { + newpart += part[j]; + } } - if (notHost.length) { - rest = '/' + notHost.join('.') + rest + // we test again with ASCII char only + if (!newpart.match(hostnamePartPattern)) { + var validParts = hostparts.slice(0, i); + var notHost = hostparts.slice(i + 1); + var bit = part.match(hostnamePartStart); + if (bit) { + validParts.push(bit[1]); + notHost.unshift(bit[2]); + } + if (notHost.length) { + rest = '/' + notHost.join('.') + rest; + } + out.hostname = validParts.join('.'); + break; } - out.hostname = validParts.join('.'); - break; } } } + // hostnames are always lower case. out.hostname = out.hostname.toLowerCase(); + // IDNA Support: Returns a puny coded representation of "domain". + // It only converts the part of the domain name that + // has non ASCII characters. I.e. it dosent matter if + // you call it with a domain that already is in ASCII. + try { + var domainArray = out.hostname.split('.'); + var newOut = []; + for (var i = 0; i < domainArray.length; ++i) { + var s = domainArray[i]; + newOut.push(s.match(/[^A-Za-z0-9-]/) ? + 'xn--' + punycode.encode(s) : s); + } + out.hostname = newOut.join('.'); + } catch (e) { + // if encode fail for some reason, we just do the classic behavior. + } + out.host = ((out.auth) ? out.auth + '@' : '') + (out.hostname || '') + ((out.port) ? ':' + out.port : ''); |