summaryrefslogtreecommitdiff
path: root/lib/url.js
diff options
context:
space:
mode:
authorJeremy Selier <jeremy@jolicloud.com>2011-06-13 14:43:16 +0200
committerisaacs <i@izs.me>2011-07-06 13:17:50 -0700
commit2a848fa7279002259a3e651223ce6f3230ca22d7 (patch)
tree8e6ff6fbea3a22091537d966d3057dbc05a00632 /lib/url.js
parent08a334fa45e8e303b718226f7384b5c0cef43c19 (diff)
downloadandroid-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.tar.gz
android-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.tar.bz2
android-node-v8-2a848fa7279002259a3e651223ce6f3230ca22d7.zip
Close #1149 IDNA and Punycode support in url.parse
Using @bnoordhuis's punycode lib. Close #1174 also
Diffstat (limited to 'lib/url.js')
-rw-r--r--lib/url.js54
1 files changed, 44 insertions, 10 deletions
diff --git a/lib/url.js b/lib/url.js
index 8b01c8548f..ed90e5cad1 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -19,6 +19,8 @@
// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
// USE OR OTHER DEALINGS IN THE SOFTWARE.
+var punycode = require('punycode');
+
exports.parse = urlParse;
exports.resolve = urlResolve;
exports.resolveObject = urlResolveObject;
@@ -183,24 +185,56 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
var part = hostparts[i];
if (!part) continue;
if (!part.match(hostnamePartPattern)) {
- var validParts = hostparts.slice(0, i);
- var notHost = hostparts.slice(i + 1);
- var bit = part.match(hostnamePartStart);
- if (bit) {
- validParts.push(bit[1]);
- notHost.unshift(bit[2]);
+ var newpart = '';
+ for (var j = 0, k = part.length; j < k; j++) {
+ if (part.charCodeAt(j) > 127) {
+ // we replace non-ASCII char with a temporary placeholder
+ // we need this to make sure size of hostname is not
+ // broken by replacing non-ASCII by nothing
+ newpart += 'x';
+ } else {
+ newpart += part[j];
+ }
}
- if (notHost.length) {
- rest = '/' + notHost.join('.') + rest
+ // we test again with ASCII char only
+ if (!newpart.match(hostnamePartPattern)) {
+ var validParts = hostparts.slice(0, i);
+ var notHost = hostparts.slice(i + 1);
+ var bit = part.match(hostnamePartStart);
+ if (bit) {
+ validParts.push(bit[1]);
+ notHost.unshift(bit[2]);
+ }
+ if (notHost.length) {
+ rest = '/' + notHost.join('.') + rest;
+ }
+ out.hostname = validParts.join('.');
+ break;
}
- out.hostname = validParts.join('.');
- break;
}
}
}
+
// hostnames are always lower case.
out.hostname = out.hostname.toLowerCase();
+ // IDNA Support: Returns a puny coded representation of "domain".
+ // It only converts the part of the domain name that
+ // has non ASCII characters. I.e. it dosent matter if
+ // you call it with a domain that already is in ASCII.
+ try {
+ var domainArray = out.hostname.split('.');
+ var newOut = [];
+ for (var i = 0; i < domainArray.length; ++i) {
+ var s = domainArray[i];
+ newOut.push(s.match(/[^A-Za-z0-9-]/) ?
+ 'xn--' + punycode.encode(s) : s);
+ }
+ out.hostname = newOut.join('.');
+ } catch (e) {
+ // if encode fail for some reason, we just do the classic behavior.
+ }
+
out.host = ((out.auth) ? out.auth + '@' : '') +
(out.hostname || '') +
((out.port) ? ':' + out.port : '');