aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJeremy Selier <jeremy@jolicloud.com>2011-06-13 14:43:16 +0200
committerisaacs <i@izs.me>2011-07-06 13:17:50 -0700
commit2a848fa7279002259a3e651223ce6f3230ca22d7 (patch)
tree8e6ff6fbea3a22091537d966d3057dbc05a00632 /lib
parent08a334fa45e8e303b718226f7384b5c0cef43c19 (diff)
downloadandroid-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.tar.gz
android-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.tar.bz2
android-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.zip
Close #1149 IDNA and Punycode support in url.parse
Using @bnoordhuis's punycode lib. Close #1174 also
Diffstat (limited to 'lib')
-rw-r--r--lib/punycode.js218
-rw-r--r--lib/url.js54
2 files changed, 262 insertions, 10 deletions
diff --git a/lib/punycode.js b/lib/punycode.js
new file mode 100644
index 0000000000..a7c07a6634
--- /dev/null
+++ b/lib/punycode.js
@@ -0,0 +1,218 @@
+// Copyright (C) 2011 by Ben Noordhuis <info@bnoordhuis.nl>
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in
+// all copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+// THE SOFTWARE.
+
+exports.encode = encode;
+exports.decode = decode;
+
+var TMIN = 1;
+var TMAX = 26;
+var BASE = 36;
+var SKEW = 38;
+var DAMP = 700; // initial bias scaler
+var INITIAL_N = 128;
+var INITIAL_BIAS = 72;
+
+function adapt_bias(delta, n_points, is_first) {
+ // scale back, then increase delta
+ delta /= is_first ? DAMP : 2;
+ delta += ~~(delta / n_points);
+
+ var s = (BASE - TMIN);
+ var t = ~~((s * TMAX) / 2); // threshold=455
+
+ for (var k = 0; delta > t; k += BASE) {
+ delta = ~~(delta / s);
+ }
+
+ var a = (BASE - TMIN + 1) * delta;
+ var b = (delta + SKEW);
+
+ return k + ~~(a / b);
+}
+
+function next_smallest_codepoint(codepoints, n) {
+ var m = 0x110000; // unicode upper bound + 1
+
+ for (var i = 0, len = codepoints.length; i < len; ++i) {
+ var c = codepoints[i];
+ if (c >= n && c < m) {
+ m = c;
+ }
+ }
+
+ // sanity check - should not happen
+ if (m >= 0x110000) {
+ throw new Error('Next smallest code point not found.');
+ }
+
+ return m;
+}
+
+function encode_digit(d) {
+ return d + (d < 26 ? 97 : 22);
+}
+
+function decode_digit(d) {
+ if (d >= 48 && d <= 57) {
+ return d - 22; // 0..9
+ }
+ if (d >= 65 && d <= 90) {
+ return d - 65; // A..Z
+ }
+ if (d >= 97 && d <= 122) {
+ return d - 97; // a..z
+ }
+ throw new Error('Illegal digit #' + d);
+}
+
+function threshold(k, bias) {
+ if (k <= bias + TMIN) {
+ return TMIN;
+ }
+ if (k >= bias + TMAX) {
+ return TMAX;
+ }
+ return k - bias;
+}
+
+function encode_int(bias, delta) {
+ var result = [];
+
+ for (var k = BASE, q = delta;; k += BASE) {
+ var t = threshold(k, bias);
+ if (q < t) {
+ result.push(encode_digit(q));
+ break;
+ }
+ else {
+ result.push(encode_digit(t + ((q - t) % (BASE - t))));
+ q = ~~((q - t) / (BASE - t));
+ }
+ }
+
+ return result;
+}
+
+function encode(input) {
+ if (typeof input != 'string') {
+ throw new Error('Argument must be a string.');
+ }
+
+ input = input.split('').map(function(c) {
+ return c.charCodeAt(0);
+ });
+
+ var output = [];
+ var non_basic = [];
+
+ for (var i = 0, len = input.length; i < len; ++i) {
+ var c = input[i];
+ if (c < 128) {
+ output.push(c);
+ }
+ else {
+ non_basic.push(c);
+ }
+ }
+
+ var b, h;
+ b = h = output.length;
+
+ if (b) {
+ output.push(45); // delimiter '-'
+ }
+
+ var n = INITIAL_N;
+ var bias = INITIAL_BIAS;
+ var delta = 0;
+
+ for (var len = input.length; h < len; ++n, ++delta) {
+ var m = next_smallest_codepoint(non_basic, n);
+ delta += (m - n) * (h + 1);
+ n = m;
+
+ for (var i = 0; i < len; ++i) {
+ var c = input[i];
+ if (c < n) {
+ if (++delta == 0) {
+ throw new Error('Delta overflow.');
+ }
+ }
+ else if (c == n) {
+ // TODO append in-place?
+ // i.e. -> output.push.apply(output, encode_int(bias, delta));
+ output = output.concat(encode_int(bias, delta));
+ bias = adapt_bias(delta, h + 1, b == h);
+ delta = 0;
+ h++;
+ }
+ }
+ }
+
+ return String.fromCharCode.apply(String, output);
+}
+
+function decode(input) {
+ if (typeof input != 'string') {
+ throw new Error('Argument must be a string.');
+ }
+
+ // find basic code points/delta separator
+ var b = 1 + input.lastIndexOf('-');
+
+ input = input.split('').map(function(c) {
+ return c.charCodeAt(0);
+ });
+
+ // start with a copy of the basic code points
+ var output = input.slice(0, b ? (b - 1) : 0);
+
+ var n = INITIAL_N;
+ var bias = INITIAL_BIAS;
+
+ for (var i = 0, len = input.length; b < len; ++i) {
+ var org_i = i;
+
+ for (var k = BASE, w = 1;; k += BASE) {
+ var d = decode_digit(input[b++]);
+
+ // TODO overflow check
+ i += d * w;
+
+ var t = threshold(k, bias);
+ if (d < t) {
+ break;
+ }
+
+ // TODO overflow check
+ w *= BASE - t;
+ }
+
+ var x = 1 + output.length;
+ bias = adapt_bias(i - org_i, x, org_i == 0);
+ // TODO overflow check
+ n += ~~(i / x);
+ i %= x;
+
+ output.splice(i, 0, n);
+ }
+
+ return String.fromCharCode.apply(String, output);
+}
diff --git a/lib/url.js b/lib/url.js
index 8b01c8548f..ed90e5cad1 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -19,6 +19,8 @@
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.
+var punycode = require('punycode');
+
exports.parse = urlParse;
exports.resolve = urlResolve;
exports.resolveObject = urlResolveObject;
@@ -183,24 +185,56 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
var part = hostparts[i];
if (!part) continue;
if (!part.match(hostnamePartPattern)) {
- var validParts = hostparts.slice(0, i);
- var notHost = hostparts.slice(i + 1);
- var bit = part.match(hostnamePartStart);
- if (bit) {
- validParts.push(bit[1]);
- notHost.unshift(bit[2]);
+ var newpart = '';
+ for (var j = 0, k = part.length; j < k; j++) {
+ if (part.charCodeAt(j) > 127) {
+ // we replace non-ASCII char with a temporary placeholder
+ // we need this to make sure size of hostname is not
+ // broken by replacing non-ASCII by nothing
+ newpart += 'x';
+ } else {
+ newpart += part[j];
+ }
}
- if (notHost.length) {
- rest = '/' + notHost.join('.') + rest
+ // we test again with ASCII char only
+ if (!newpart.match(hostnamePartPattern)) {
+ var validParts = hostparts.slice(0, i);
+ var notHost = hostparts.slice(i + 1);
+ var bit = part.match(hostnamePartStart);
+ if (bit) {
+ validParts.push(bit[1]);
+ notHost.unshift(bit[2]);
+ }
+ if (notHost.length) {
+ rest = '/' + notHost.join('.') + rest;
+ }
+ out.hostname = validParts.join('.');
+ break;
}
- out.hostname = validParts.join('.');
- break;
}
}
}
+
// hostnames are always lower case.
out.hostname = out.hostname.toLowerCase();
+ // IDNA Support: Returns a puny coded representation of "domain".
+ // It only converts the part of the domain name that
+ // has non ASCII characters. I.e. it dosent matter if
+ // you call it with a domain that already is in ASCII.
+ try {
+ var domainArray = out.hostname.split('.');
+ var newOut = [];
+ for (var i = 0; i < domainArray.length; ++i) {
+ var s = domainArray[i];
+ newOut.push(s.match(/[^A-Za-z0-9-]/) ?
+ 'xn--' + punycode.encode(s) : s);
+ }
+ out.hostname = newOut.join('.');
+ } catch (e) {
+ // if encode fail for some reason, we just do the classic behavior.
+ }
+
out.host = ((out.auth) ? out.auth + '@' : '') +
(out.hostname || '') +
((out.port) ? ':' + out.port : '');