summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2016-05-08 15:11:10 +0200
committerDaniel Stenberg <daniel@haxx.se>2016-05-30 23:13:55 +0200
commit5409e1d793de755c7433336b80b0c8370a359d45 (patch)
tree45a9791d3bf2aa623e0999c5b13128b9d967f810 /lib
parented8b8f2456fc485fa81fb3d3eaef684121bb1aef (diff)
downloadgnurl-5409e1d793de755c7433336b80b0c8370a359d45.tar.gz
gnurl-5409e1d793de755c7433336b80b0c8370a359d45.tar.bz2
gnurl-5409e1d793de755c7433336b80b0c8370a359d45.zip
URL parser: allow URLs to use one, two or three slashes
Mostly in order to support broken web sites that redirect to broken URLs that are accepted by browsers. Browsers are typically even more leniant than this as the WHATWG URL spec they should allow an _infinite_ amount. I tested 8000 slashes with Firefox and it just worked. Added test case 1141, 1142 and 1143 to verify the new parser. Closes #791
Diffstat (limited to 'lib')
-rw-r--r--lib/url.c30
1 files changed, 25 insertions, 5 deletions
diff --git a/lib/url.c b/lib/url.c
index 2a3026650..3f0bde258 100644
--- a/lib/url.c
+++ b/lib/url.c
@@ -4141,12 +4141,17 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
}
else {
/* clear path */
+ char slashbuf[4];
path[0]=0;
- if(2 > sscanf(data->change.url,
- "%15[^\n:]://%[^\n/?]%[^\n]",
- protobuf,
- conn->host.name, path)) {
+ rc = sscanf(data->change.url,
+ "%15[^\n:]:%3[/]%[^\n/?]%[^\n]",
+ protobuf, slashbuf, conn->host.name, path);
+ if(2 == rc) {
+ failf(data, "Bad URL");
+ return CURLE_URL_MALFORMAT;
+ }
+ if(3 > rc) {
/*
* The URL was badly formatted, let's try the browser-style _without_
@@ -4197,8 +4202,23 @@ static CURLcode parseurlandfillconn(struct SessionHandle *data,
*prot_missing = TRUE; /* not given in URL */
}
- else
+ else {
+ size_t s = strlen(slashbuf);
protop = protobuf;
+ if(s != 2) {
+ infof(data, "Unwillingly accepted illegal URL using %d slash%s!\n",
+ s, s>1?"es":"");
+
+ if(data->change.url_alloc)
+ free(data->change.url);
+ /* repair the URL to use two slashes */
+ data->change.url = aprintf("%s://%s%s",
+ protobuf, conn->host.name, path);
+ if(!data->change.url)
+ return CURLE_OUT_OF_MEMORY;
+ data->change.url_alloc = TRUE;
+ }
+ }
}
/* We search for '?' in the host name (but only on the right side of a