summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Stenberg <daniel@haxx.se>2014-01-17 00:34:36 +0100
committerDaniel Stenberg <daniel@haxx.se>2014-01-17 08:37:44 +0100
commiteb02a99c6112b185485d7bb43a0837ad60040bd5 (patch)
tree640625a4d19c3ec402e67c319b79cbcc40eb8d49
parent3f5546b2bee5c592bcb6a2cb5ddd1a2e5160eb4f (diff)
downloadgnurl-eb02a99c6112b185485d7bb43a0837ad60040bd5.tar.gz
gnurl-eb02a99c6112b185485d7bb43a0837ad60040bd5.tar.bz2
gnurl-eb02a99c6112b185485d7bb43a0837ad60040bd5.zip
chunked parsing: relax the CR strictness
Allow for chunked-encoding data to get parsed with only LF line endings. This is allowed by browsers.
-rw-r--r--lib/http_chunks.c87
-rw-r--r--lib/http_chunks.h40
2 files changed, 35 insertions, 92 deletions
diff --git a/lib/http_chunks.c b/lib/http_chunks.c
index 236543211..eaefa05d1 100644
--- a/lib/http_chunks.c
+++ b/lib/http_chunks.c
@@ -88,8 +88,8 @@ static bool Curl_isxdigit(char digit)
void Curl_httpchunk_init(struct connectdata *conn)
{
struct Curl_chunker *chunk = &conn->chunk;
- chunk->hexindex=0; /* start at 0 */
- chunk->dataleft=0; /* no data left yet! */
+ chunk->hexindex=0; /* start at 0 */
+ chunk->dataleft=0; /* no data left yet! */
chunk->state = CHUNK_HEX; /* we get hex first! */
}
@@ -143,11 +143,11 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
}
else {
char *endptr;
- if(0 == ch->hexindex) {
+ if(0 == ch->hexindex)
/* This is illegal data, we received junk where we expected
a hexadecimal digit. */
return CHUNKE_ILLEGAL_HEX;
- }
+
/* length and datap are unmodified */
ch->hexbuffer[ch->hexindex]=0;
@@ -164,44 +164,29 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
if(errno == ERANGE)
/* over or underflow is an error */
return CHUNKE_ILLEGAL_HEX;
- ch->state = CHUNK_POSTHEX;
+ ch->state = CHUNK_LF; /* now wait for the CRLF */
}
break;
- case CHUNK_POSTHEX:
- /* In this state, we're waiting for CRLF to arrive. We support
- this to allow so called chunk-extensions to show up here
- before the CRLF comes. */
- if(*datap == 0x0d)
- ch->state = CHUNK_CR;
- length--;
- datap++;
- break;
-
- case CHUNK_CR:
- /* waiting for the LF */
+ case CHUNK_LF:
+ /* waiting for the LF after a chunk size */
if(*datap == 0x0a) {
/* we're now expecting data to come, unless size was zero! */
if(0 == ch->datasize) {
ch->state = CHUNK_TRAILER; /* now check for trailers */
conn->trlPos=0;
}
- else {
+ else
ch->state = CHUNK_DATA;
- }
}
- else
- /* previously we got a fake CR, go back to CR waiting! */
- ch->state = CHUNK_CR;
+
datap++;
length--;
break;
case CHUNK_DATA:
- /* we get pure and fine data
-
- We expect another 'datasize' of data. We have 'length' right now,
- it can be more or less than 'datasize'. Get the smallest piece.
+ /* We expect 'datasize' of data. We have 'length' right now, it can be
+ more or less than 'datasize'. Get the smallest piece.
*/
piece = (ch->datasize >= length)?length:ch->datasize;
@@ -256,37 +241,22 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
if(0 == ch->datasize)
/* end of data this round, we now expect a trailing CRLF */
- ch->state = CHUNK_POSTCR;
- break;
-
- case CHUNK_POSTCR:
- if(*datap == 0x0d) {
ch->state = CHUNK_POSTLF;
- datap++;
- length--;
- }
- else
- return CHUNKE_BAD_CHUNK;
-
break;
case CHUNK_POSTLF:
if(*datap == 0x0a) {
- /*
- * The last one before we go back to hex state and start all
- * over.
- */
- Curl_httpchunk_init(conn);
- datap++;
- length--;
+ /* The last one before we go back to hex state and start all over. */
+ Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */
}
- else
+ else if(*datap != 0x0d)
return CHUNKE_BAD_CHUNK;
-
+ datap++;
+ length--;
break;
case CHUNK_TRAILER:
- if(*datap == 0x0d) {
+ if((*datap == 0x0d) || (*datap == 0x0a)) {
/* this is the end of a trailer, but if the trailer was zero bytes
there was no trailer and we move on */
@@ -312,6 +282,9 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
}
conn->trlPos=0;
ch->state = CHUNK_TRAILER_CR;
+ if(*datap == 0x0a)
+ /* already on the LF */
+ break;
}
else {
/* no trailer, we're on the final CRLF pair */
@@ -357,27 +330,18 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
case CHUNK_TRAILER_POSTCR:
/* We enter this state when a CR should arrive so we expect to
have to first pass a CR before we wait for LF */
- if(*datap != 0x0d) {
+ if((*datap != 0x0d) && (*datap != 0x0a)) {
/* not a CR then it must be another header in the trailer */
ch->state = CHUNK_TRAILER;
break;
}
- datap++;
- length--;
- /* now wait for the final LF */
- ch->state = CHUNK_STOP;
- break;
-
- case CHUNK_STOPCR:
- /* Read the final CRLF that ends all chunk bodies */
-
if(*datap == 0x0d) {
- ch->state = CHUNK_STOP;
+ /* skip if CR */
datap++;
length--;
}
- else
- return CHUNKE_BAD_CHUNK;
+ /* now wait for the final LF */
+ ch->state = CHUNK_STOP;
break;
case CHUNK_STOP:
@@ -392,9 +356,6 @@ CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
}
else
return CHUNKE_BAD_CHUNK;
-
- default:
- return CHUNKE_STATE_ERROR;
}
}
return CHUNKE_OK;
diff --git a/lib/http_chunks.h b/lib/http_chunks.h
index 493d8a426..fc652d51e 100644
--- a/lib/http_chunks.h
+++ b/lib/http_chunks.h
@@ -29,40 +29,25 @@
#define MAXNUM_SIZE 16
typedef enum {
- CHUNK_FIRST, /* never use */
-
- /* In this we await and buffer all hexadecimal digits until we get one
- that isn't a hexadecimal digit. When done, we go POSTHEX */
+ /* await and buffer all hexadecimal digits until we get one that isn't a
+ hexadecimal digit. When done, we go CHUNK_LF */
CHUNK_HEX,
- /* We have received the hexadecimal digit and we eat all characters until
- we get a CRLF pair. When we see a CR we go to the CR state. */
- CHUNK_POSTHEX,
-
- /* A single CR has been found and we should get a LF right away in this
- state or we go back to POSTHEX. When LF is received, we go to DATA.
- If the size given was zero, we set state to STOP and return. */
- CHUNK_CR,
+ /* wait for LF, ignore all else */
+ CHUNK_LF,
/* We eat the amount of data specified. When done, we move on to the
POST_CR state. */
CHUNK_DATA,
- /* POSTCR should get a CR and nothing else, then move to POSTLF */
- CHUNK_POSTCR,
-
- /* POSTLF should get a LF and nothing else, then move back to HEX as the
- CRLF combination marks the end of a chunk */
+ /* POSTLF should get a CR and then a LF and nothing else, then move back to
+ HEX as the CRLF combination marks the end of a chunk. A missing CR is no
+ big deal. */
CHUNK_POSTLF,
- /* Each Chunk body should end with a CRLF. Read a CR and nothing else,
- then move to CHUNK_STOP */
- CHUNK_STOPCR,
-
- /* This is mainly used to really mark that we're out of the game.
- NOTE: that there's a 'dataleft' field in the struct that will tell how
- many bytes that were not passed to the client in the end of the last
- buffer! */
+ /* Used to mark that we're out of the game. NOTE: that there's a 'dataleft'
+ field in the struct that will tell how many bytes that were not passed to
+ the client in the end of the last buffer! */
CHUNK_STOP,
/* At this point optional trailer headers can be found, unless the next line
@@ -77,10 +62,7 @@ typedef enum {
signalled If this is an empty trailer CHUNKE_STOP will be signalled.
Otherwise the trailer will be broadcasted via Curl_client_write() and the
next state will be CHUNK_TRAILER */
- CHUNK_TRAILER_POSTCR,
-
- CHUNK_LAST /* never use */
-
+ CHUNK_TRAILER_POSTCR
} ChunkyState;
typedef enum {