summaryrefslogtreecommitdiff
path: root/lib/url.js
diff options
context:
space:
mode:
authorisaacs <i@izs.me>2012-09-13 11:09:54 -0700
committerisaacs <i@izs.me>2012-09-17 10:44:23 -0700
commit7144be70db2e6ce337d0f6ec47a28a06802d1c7a (patch)
treef7cd7b778843d55f293e307e31c8ee4299972f68 /lib/url.js
parent3806cf0d640da3024d50208452d2dd9e3cb015d2 (diff)
downloadandroid-node-v8-7144be70db2e6ce337d0f6ec47a28a06802d1c7a.tar.gz
android-node-v8-7144be70db2e6ce337d0f6ec47a28a06802d1c7a.tar.bz2
android-node-v8-7144be70db2e6ce337d0f6ec47a28a06802d1c7a.zip
url: Go much faster by using Url class
V8 loves it when JavaScript pretends to be a Classic inheritance type of language. Before: $ ./node benchmark/url.js benchmarking parse() ... 1.868 sec benchmarking format() ... 1.906 sec benchmarking resolve("../foo/bar?baz=boom") ... 7.800 sec benchmarking resolve("foo/bar") ... 7.099 sec benchmarking resolve("http://nodejs.org") ... 8.403 sec benchmarking resolve("./foo/bar?baz") ... 7.974 sec After: $ ./node benchmark/url.js benchmarking parse() ... 1.769 sec benchmarking format() ... 1.793 sec benchmarking resolve("../foo/bar?baz=boom") ... 4.254 sec benchmarking resolve("foo/bar") ... 3.932 sec benchmarking resolve("http://nodejs.org") ... 4.382 sec benchmarking resolve("./foo/bar?baz") ... 4.293 sec
Diffstat (limited to 'lib/url.js')
-rw-r--r--lib/url.js381
1 files changed, 220 insertions, 161 deletions
diff --git a/lib/url.js b/lib/url.js
index 50eb8b20f6..980a9bb84a 100644
--- a/lib/url.js
+++ b/lib/url.js
@@ -26,6 +26,22 @@ exports.resolve = urlResolve;
exports.resolveObject = urlResolveObject;
exports.format = urlFormat;
+exports.Url = Url;
+
+function Url() {
+ this.protocol = null;
+ this.slashes = null;
+ this.auth = null;
+ this.host = null;
+ this.port = null;
+ this.hostname = null;
+ this.hash = null;
+ this.search = null;
+ this.query = null;
+ this.pathname = null;
+ this.path = null;
+}
+
// Reference: RFC 3986, RFC 1808, RFC 2396
// define these here so at least they only have to be
@@ -90,14 +106,19 @@ var protocolPattern = /^([a-z0-9.+-]+:)/i,
querystring = require('querystring');
function urlParse(url, parseQueryString, slashesDenoteHost) {
- if (url && typeof(url) === 'object' && url.href) return url;
+ if (url && typeof(url) === 'object' && url instanceof Url) return url;
+
+ var u = new Url;
+ u.parse(url, parseQueryString, slashesDenoteHost);
+ return u;
+}
+Url.prototype.parse = function(url, parseQueryString, slashesDenoteHost) {
if (typeof url !== 'string') {
throw new TypeError("Parameter 'url' must be a string, not " + typeof url);
}
- var out = {},
- rest = url;
+ var rest = url;
// trim before proceeding.
// This is to support parse stuff like " http://foo.com \n"
@@ -107,7 +128,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
if (proto) {
proto = proto[0];
var lowerProto = proto.toLowerCase();
- out.protocol = lowerProto;
+ this.protocol = lowerProto;
rest = rest.substr(proto.length);
}
@@ -119,7 +140,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
var slashes = rest.substr(0, 2) === '//';
if (slashes && !(proto && hostlessProtocol[proto])) {
rest = rest.substr(2);
- out.slashes = true;
+ this.slashes = true;
}
}
@@ -149,7 +170,7 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
if (hasAuth) {
// pluck off the auth portion.
- out.auth = decodeURIComponent(auth);
+ this.auth = decodeURIComponent(auth);
rest = rest.substr(atSign + 1);
}
}
@@ -162,35 +183,28 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
}
if (firstNonHost !== -1) {
- out.host = rest.substr(0, firstNonHost);
+ this.host = rest.substr(0, firstNonHost);
rest = rest.substr(firstNonHost);
} else {
- out.host = rest;
+ this.host = rest;
rest = '';
}
// pull out port.
- var p = parseHost(out.host);
- var keys = Object.keys(p);
- for (var i = 0, l = keys.length; i < l; i++) {
- var key = keys[i];
- out[key] = p[key];
- }
+ this.parseHost();
// we've indicated that there is a hostname,
// so even if it's empty, it has to be present.
- out.hostname = out.hostname || '';
+ this.hostname = this.hostname || '';
// if hostname begins with [ and ends with ]
// assume that it's an IPv6 address.
- var ipv6Hostname = out.hostname[0] === '[' &&
- out.hostname[out.hostname.length - 1] === ']';
+ var ipv6Hostname = this.hostname[0] === '[' &&
+ this.hostname[this.hostname.length - 1] === ']';
// validate a little.
- if (out.hostname.length > hostnameMaxLen) {
- out.hostname = '';
- } else if (!ipv6Hostname) {
- var hostparts = out.hostname.split(/\./);
+ if (!ipv6Hostname) {
+ var hostparts = this.hostname.split(/\./);
for (var i = 0, l = hostparts.length; i < l; i++) {
var part = hostparts[i];
if (!part) continue;
@@ -218,38 +232,44 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
if (notHost.length) {
rest = '/' + notHost.join('.') + rest;
}
- out.hostname = validParts.join('.');
+ this.hostname = validParts.join('.');
break;
}
}
}
}
- // hostnames are always lower case.
- out.hostname = out.hostname.toLowerCase();
+ if (this.hostname.length > hostnameMaxLen) {
+ this.hostname = '';
+ } else {
+ // hostnames are always lower case.
+ this.hostname = this.hostname.toLowerCase();
+ }
if (!ipv6Hostname) {
// IDNA Support: Returns a puny coded representation of "domain".
// It only converts the part of the domain name that
// has non ASCII characters. I.e. it dosent matter if
// you call it with a domain that already is in ASCII.
- var domainArray = out.hostname.split('.');
+ var domainArray = this.hostname.split('.');
var newOut = [];
for (var i = 0; i < domainArray.length; ++i) {
var s = domainArray[i];
newOut.push(s.match(/[^A-Za-z0-9_-]/) ?
'xn--' + punycode.encode(s) : s);
}
- out.hostname = newOut.join('.');
+ this.hostname = newOut.join('.');
}
- out.host = (out.hostname || '') +
- ((out.port) ? ':' + out.port : '');
- out.href += out.host;
+ var p = this.port ? ':' + this.port : '';
+ var h = this.hostname || '';
+ this.host = h + p;
+ this.href += this.host;
// strip [ and ] from the hostname
+ // the host field still retains them, though
if (ipv6Hostname) {
- out.hostname = out.hostname.substr(1, out.hostname.length - 2);
+ this.hostname = this.hostname.substr(1, this.hostname.length - 2);
if (rest[0] !== '/') {
rest = '/' + rest;
}
@@ -278,38 +298,39 @@ function urlParse(url, parseQueryString, slashesDenoteHost) {
var hash = rest.indexOf('#');
if (hash !== -1) {
// got a fragment string.
- out.hash = rest.substr(hash);
+ this.hash = rest.substr(hash);
rest = rest.slice(0, hash);
}
var qm = rest.indexOf('?');
if (qm !== -1) {
- out.search = rest.substr(qm);
- out.query = rest.substr(qm + 1);
+ this.search = rest.substr(qm);
+ this.query = rest.substr(qm + 1);
if (parseQueryString) {
- out.query = querystring.parse(out.query);
+ this.query = querystring.parse(this.query);
}
rest = rest.slice(0, qm);
} else if (parseQueryString) {
// no query string, but parseQueryString still requested
- out.search = '';
- out.query = {};
+ this.search = '';
+ this.query = {};
}
- if (rest) out.pathname = rest;
+ if (rest) this.pathname = rest;
if (slashedProtocol[proto] &&
- out.hostname && !out.pathname) {
- out.pathname = '/';
+ this.hostname && !this.pathname) {
+ this.pathname = '/';
}
//to support http.request
- if (out.pathname || out.search) {
- out.path = (out.pathname ? out.pathname : '') +
- (out.search ? out.search : '');
+ if (this.pathname || this.search) {
+ var p = this.pathname || '';
+ var s = this.search || '';
+ this.path = p + s;
}
// finally, reconstruct the href based on what has been validated.
- out.href = urlFormat(out);
- return out;
-}
+ this.href = this.format();
+ return this;
+};
// format a parsed object into a url string
function urlFormat(obj) {
@@ -318,43 +339,47 @@ function urlFormat(obj) {
// this way, you can call url_format() on strings
// to clean up potentially wonky urls.
if (typeof(obj) === 'string') obj = urlParse(obj);
+ if (!(obj instanceof Url)) return Url.prototype.format.call(obj);
+ return obj.format();
+}
- var auth = obj.auth || '';
+Url.prototype.format = function() {
+ var auth = this.auth || '';
if (auth) {
auth = encodeURIComponent(auth);
auth = auth.replace(/%3A/i, ':');
auth += '@';
}
- var protocol = obj.protocol || '',
- pathname = obj.pathname || '',
- hash = obj.hash || '',
+ var protocol = this.protocol || '',
+ pathname = this.pathname || '',
+ hash = this.hash || '',
host = false,
query = '';
- if (obj.host !== undefined) {
- host = auth + obj.host;
- } else if (obj.hostname !== undefined) {
- host = auth + (obj.hostname.indexOf(':') === -1 ?
- obj.hostname :
- '[' + obj.hostname + ']');
- if (obj.port) {
- host += ':' + obj.port;
+ if (this.host) {
+ host = auth + this.host;
+ } else if (this.hostname) {
+ host = auth + (this.hostname.indexOf(':') === -1 ?
+ this.hostname :
+ '[' + this.hostname + ']');
+ if (this.port) {
+ host += ':' + this.port;
}
}
- if (obj.query && typeof obj.query === 'object' &&
- Object.keys(obj.query).length) {
- query = querystring.stringify(obj.query);
+ if (this.query && typeof this.query === 'object' &&
+ Object.keys(this.query).length) {
+ query = querystring.stringify(this.query);
}
- var search = obj.search || (query && ('?' + query)) || '';
+ var search = this.search || (query && ('?' + query)) || '';
if (protocol && protocol.substr(-1) !== ':') protocol += ':';
// only the slashedProtocols get the //. Not mailto:, xmpp:, etc.
// unless they had them to begin with.
- if (obj.slashes ||
+ if (this.slashes ||
(!protocol || slashedProtocol[protocol]) && host !== false) {
host = '//' + (host || '');
if (pathname && pathname.charAt(0) !== '/') pathname = '/' + pathname;
@@ -366,39 +391,62 @@ function urlFormat(obj) {
if (search && search.charAt(0) !== '?') search = '?' + search;
return protocol + host + pathname + search + hash;
-}
+};
function urlResolve(source, relative) {
- return urlFormat(urlResolveObject(source, relative));
+ return urlParse(source, false, true).resolve(relative);
}
+Url.prototype.resolve = function(relative) {
+ return this.resolveObject(urlParse(relative, false, true)).format();
+};
+
function urlResolveObject(source, relative) {
if (!source) return relative;
+ return urlParse(source, false, true).resolveObject(relative);
+}
+
+Url.prototype.resolveObject = function(relative) {
+ if (typeof relative === 'string') {
+ var rel = new Url();
+ rel.parse(relative, false, true);
+ relative = rel;
+ }
- source = urlParse(urlFormat(source), false, true);
- relative = urlParse(urlFormat(relative), false, true);
+ var result = new Url();
+ Object.keys(this).forEach(function(k) {
+ result[k] = this[k];
+ }, this);
// hash is always overridden, no matter what.
- source.hash = relative.hash;
+ // even href="" will remove it.
+ result.hash = relative.hash;
+ // if the relative url is empty, then there's nothing left to do here.
if (relative.href === '') {
- source.href = urlFormat(source);
- return source;
+ result.href = result.format();
+ return result;
}
// hrefs like //foo/bar always cut to the protocol.
if (relative.slashes && !relative.protocol) {
- relative.protocol = source.protocol;
+ // take everything except the protocol from relative
+ Object.keys(relative).forEach(function(k) {
+ if (k !== 'protocol')
+ result[k] = relative[k];
+ });
+
//urlParse appends trailing / to urls like http://www.example.com
- if (slashedProtocol[relative.protocol] &&
- relative.hostname && !relative.pathname) {
- relative.path = relative.pathname = '/';
+ if (slashedProtocol[result.protocol] &&
+ result.hostname && !result.pathname) {
+ result.path = result.pathname = '/';
}
- relative.href = urlFormat(relative);
- return relative;
+
+ result.href = result.format();
+ return result;
}
- if (relative.protocol && relative.protocol !== source.protocol) {
+ if (relative.protocol && relative.protocol !== result.protocol) {
// if it's a known url protocol, then changing
// the protocol does weird things
// first, if it's not file:, then we MUST have a host,
@@ -408,10 +456,14 @@ function urlResolveObject(source, relative) {
// because that's known to be hostless.
// anything else is assumed to be absolute.
if (!slashedProtocol[relative.protocol]) {
- relative.href = urlFormat(relative);
- return relative;
+ Object.keys(relative).forEach(function(k) {
+ result[k] = relative[k];
+ });
+ result.href = result.format();
+ return result;
}
- source.protocol = relative.protocol;
+
+ result.protocol = relative.protocol;
if (!relative.host && !hostlessProtocol[relative.protocol]) {
var relPath = (relative.pathname || '').split('/');
while (relPath.length && !(relative.host = relPath.shift()));
@@ -419,72 +471,72 @@ function urlResolveObject(source, relative) {
if (!relative.hostname) relative.hostname = '';
if (relPath[0] !== '') relPath.unshift('');
if (relPath.length < 2) relPath.unshift('');
- relative.pathname = relPath.join('/');
+ result.pathname = relPath.join('/');
+ } else {
+ result.pathname = relative.pathname;
}
- source.pathname = relative.pathname;
- source.search = relative.search;
- source.query = relative.query;
- source.host = relative.host || '';
- source.auth = relative.auth;
- source.hostname = relative.hostname || relative.host;
- source.port = relative.port;
- //to support http.request
- if (source.pathname !== undefined || source.search !== undefined) {
- source.path = (source.pathname ? source.pathname : '') +
- (source.search ? source.search : '');
+ result.search = relative.search;
+ result.query = relative.query;
+ result.host = relative.host || '';
+ result.auth = relative.auth;
+ result.hostname = relative.hostname || relative.host;
+ result.port = relative.port;
+ // to support http.request
+ if (result.pathname || result.search) {
+ var p = result.pathname || '';
+ var s = result.search || '';
+ result.path = p + s;
}
- source.slashes = source.slashes || relative.slashes;
- source.href = urlFormat(source);
- return source;
+ result.slashes = result.slashes || relative.slashes;
+ result.href = result.format();
+ return result;
}
- var isSourceAbs = (source.pathname && source.pathname.charAt(0) === '/'),
+ var isSourceAbs = (result.pathname && result.pathname.charAt(0) === '/'),
isRelAbs = (
- relative.host !== undefined ||
+ relative.host ||
relative.pathname && relative.pathname.charAt(0) === '/'
),
mustEndAbs = (isRelAbs || isSourceAbs ||
- (source.host && relative.pathname)),
+ (result.host && relative.pathname)),
removeAllDots = mustEndAbs,
- srcPath = source.pathname && source.pathname.split('/') || [],
+ srcPath = result.pathname && result.pathname.split('/') || [],
relPath = relative.pathname && relative.pathname.split('/') || [],
- psychotic = source.protocol &&
- !slashedProtocol[source.protocol];
+ psychotic = result.protocol && !slashedProtocol[result.protocol];
// if the url is a non-slashed url, then relative
// links like ../.. should be able
// to crawl up to the hostname, as well. This is strange.
- // source.protocol has already been set by now.
+ // result.protocol has already been set by now.
// Later on, put the first path part into the host field.
if (psychotic) {
-
- delete source.hostname;
- delete source.port;
- if (source.host) {
- if (srcPath[0] === '') srcPath[0] = source.host;
- else srcPath.unshift(source.host);
+ result.hostname = '';
+ result.port = null;
+ if (result.host) {
+ if (srcPath[0] === '') srcPath[0] = result.host;
+ else srcPath.unshift(result.host);
}
- delete source.host;
+ result.host = '';
if (relative.protocol) {
- delete relative.hostname;
- delete relative.port;
+ relative.hostname = null;
+ relative.port = null;
if (relative.host) {
if (relPath[0] === '') relPath[0] = relative.host;
else relPath.unshift(relative.host);
}
- delete relative.host;
+ relative.host = null;
}
mustEndAbs = mustEndAbs && (relPath[0] === '' || srcPath[0] === '');
}
if (isRelAbs) {
// it's absolute.
- source.host = (relative.host || relative.host === '') ?
- relative.host : source.host;
- source.hostname = (relative.hostname || relative.hostname === '') ?
- relative.hostname : source.hostname;
- source.search = relative.search;
- source.query = relative.query;
+ result.host = (relative.host || relative.host === '') ?
+ relative.host : result.host;
+ result.hostname = (relative.hostname || relative.hostname === '') ?
+ relative.hostname : result.hostname;
+ result.search = relative.search;
+ result.query = relative.query;
srcPath = relPath;
// fall through to the dot-handling below.
} else if (relPath.length) {
@@ -493,53 +545,55 @@ function urlResolveObject(source, relative) {
if (!srcPath) srcPath = [];
srcPath.pop();
srcPath = srcPath.concat(relPath);
- source.search = relative.search;
- source.query = relative.query;
- } else if ('search' in relative) {
+ result.search = relative.search;
+ result.query = relative.query;
+ } else if (relative.search !== null && relative.search !== undefined) {
// just pull out the search.
// like href='?foo'.
// Put this after the other two cases because it simplifies the booleans
if (psychotic) {
- source.hostname = source.host = srcPath.shift();
+ result.hostname = result.host = srcPath.shift();
//occationaly the auth can get stuck only in host
//this especialy happens in cases like
//url.resolveObject('mailto:local1@domain1', 'local2@domain2')
- var authInHost = source.host && source.host.indexOf('@') > 0 ?
- source.host.split('@') : false;
+ var authInHost = result.host && result.host.indexOf('@') > 0 ?
+ result.host.split('@') : false;
if (authInHost) {
- source.auth = authInHost.shift();
- source.host = source.hostname = authInHost.shift();
+ result.auth = authInHost.shift();
+ result.host = result.hostname = authInHost.shift();
}
}
- source.search = relative.search;
- source.query = relative.query;
+ result.search = relative.search;
+ result.query = relative.query;
//to support http.request
- if (source.pathname !== undefined || source.search !== undefined) {
- source.path = (source.pathname ? source.pathname : '') +
- (source.search ? source.search : '');
+ if (result.pathname !== null || result.search !== null) {
+ result.path = (result.pathname ? result.pathname : '') +
+ (result.search ? result.search : '');
}
- source.href = urlFormat(source);
- return source;
+ result.href = result.format();
+ return result;
}
+
if (!srcPath.length) {
// no path at all. easy.
// we've already handled the other stuff above.
- delete source.pathname;
+ result.pathname = null;
//to support http.request
- if (!source.search) {
- source.path = '/' + source.search;
+ if (result.search) {
+ result.path = '/' + result.search;
} else {
- delete source.path;
+ result.path = null;
}
- source.href = urlFormat(source);
- return source;
+ result.href = result.format();
+ return result;
}
+
// if a url ENDs in . or .., then it must get a trailing slash.
// however, if it ends in anything else non-slashy,
// then it must NOT get a trailing slash.
var last = srcPath.slice(-1)[0];
var hasTrailingSlash = (
- (source.host || relative.host) && (last === '.' || last === '..') ||
+ (result.host || relative.host) && (last === '.' || last === '..') ||
last === '');
// strip single dots, resolve double dots to parent dir
@@ -579,47 +633,52 @@ function urlResolveObject(source, relative) {
// put the host back
if (psychotic) {
- source.hostname = source.host = isAbsolute ? '' :
+ result.hostname = result.host = isAbsolute ? '' :
srcPath.length ? srcPath.shift() : '';
//occationaly the auth can get stuck only in host
//this especialy happens in cases like
//url.resolveObject('mailto:local1@domain1', 'local2@domain2')
- var authInHost = source.host && source.host.indexOf('@') > 0 ?
- source.host.split('@') : false;
+ var authInHost = result.host && result.host.indexOf('@') > 0 ?
+ result.host.split('@') : false;
if (authInHost) {
- source.auth = authInHost.shift();
- source.host = source.hostname = authInHost.shift();
+ result.auth = authInHost.shift();
+ result.host = result.hostname = authInHost.shift();
}
}
- mustEndAbs = mustEndAbs || (source.host && srcPath.length);
+ mustEndAbs = mustEndAbs || (result.host && srcPath.length);
if (mustEndAbs && !isAbsolute) {
srcPath.unshift('');
}
- source.pathname = srcPath.join('/');
- //to support request.http
- if (source.pathname !== undefined || source.search !== undefined) {
- source.path = (source.pathname ? source.pathname : '') +
- (source.search ? source.search : '');
+ if (!srcPath.length) {
+ result.pathname = null;
+ result.path = null;
+ } else {
+ result.pathname = srcPath.join('/');
}
- source.auth = relative.auth || source.auth;
- source.slashes = source.slashes || relative.slashes;
- source.href = urlFormat(source);
- return source;
-}
-function parseHost(host) {
- var out = {};
+ //to support request.http
+ if (result.pathname !== null || result.search !== null) {
+ result.path = (result.pathname ? result.pathname : '') +
+ (result.search ? result.search : '');
+ }
+ result.auth = relative.auth || result.auth;
+ result.slashes = result.slashes || relative.slashes;
+ result.href = result.format();
+ return result;
+};
+
+Url.prototype.parseHost = function() {
+ var host = this.host;
var port = portPattern.exec(host);
if (port) {
port = port[0];
if (port !== ':') {
- out.port = port.substr(1);
+ this.port = port.substr(1);
}
host = host.substr(0, host.length - port.length);
}
- if (host) out.hostname = host;
- return out;
-}
+ if (host) this.hostname = host;
+};