summaryrefslogtreecommitdiff
path: root/deps/http_parser
diff options
context:
space:
mode:
authorRyan Dahl <ry@tinyclouds.org>2010-06-06 17:41:03 -0700
committerRyan Dahl <ry@tinyclouds.org>2010-06-06 17:41:03 -0700
commit9be6c501ecf3d9432bdfca18a1b007ef4868ef5f (patch)
treeecf5f30be6766b65d1d6ff5ca60628f558b86d0b /deps/http_parser
parent935f8437fd375270a0c12a106ad5b0f64da93e01 (diff)
downloadandroid-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.tar.gz
android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.tar.bz2
android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.zip
Upgrade http-parser
Diffstat (limited to 'deps/http_parser')
-rw-r--r--deps/http_parser/Makefile6
-rw-r--r--deps/http_parser/README.md56
-rw-r--r--deps/http_parser/http_parser.c102
-rw-r--r--deps/http_parser/http_parser.h39
-rw-r--r--deps/http_parser/test.c177
5 files changed, 241 insertions, 139 deletions
diff --git a/deps/http_parser/Makefile b/deps/http_parser/Makefile
index 72e9d02c2c..dee994ea50 100644
--- a/deps/http_parser/Makefile
+++ b/deps/http_parser/Makefile
@@ -8,10 +8,10 @@ test: test_g
test_g: http_parser_g.o test_g.o
gcc $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
-test_g.o: test.c Makefile
+test_g.o: test.c http_parser.h Makefile
gcc $(OPT_DEBUG) -c test.c -o $@
-test.o: test.c Makefile
+test.o: test.c http_parser.h Makefile
gcc $(OPT_FAST) -c test.c -o $@
http_parser_g.o: http_parser.c http_parser.h Makefile
@@ -23,7 +23,7 @@ test-valgrind: test_g
http_parser.o: http_parser.c http_parser.h Makefile
gcc $(OPT_FAST) -c http_parser.c
-test_fast: http_parser.o test.c
+test_fast: http_parser.o test.c http_parser.h
gcc $(OPT_FAST) http_parser.o test.c -o $@
test-run-timed: test_fast
diff --git a/deps/http_parser/README.md b/deps/http_parser/README.md
index 6666482ab8..79aef0efb4 100644
--- a/deps/http_parser/README.md
+++ b/deps/http_parser/README.md
@@ -1,30 +1,32 @@
HTTP Parser
===========
-This is a parser for HTTP messages written in C. It parses both requests
-and responses. The parser is designed to be used in performance HTTP
-applications. It does not make any allocations, it does not buffer data, and
-it can be interrupted at anytime. Depending on your architecture, it only
-requires between 100 and 200 bytes of data per message stream (in a web
-server that is per connection).
+This is a parser for HTTP messages written in C. It parses both requests and
+responses. The parser is designed to be used in performance HTTP
+applications. It does not make any syscalls nor allocations, it does not
+buffer data, it can be interrupted at anytime. Depending on your
+architecture, it only requires between 100 and 200 bytes of data per message
+stream (in a web server that is per connection).
Features:
* No dependencies
- * Parses both requests and responses.
- * Handles persistent streams.
+ * Handles persistent streams (keep-alive).
* Decodes chunked encoding.
- * Extracts the following data from a message
- * header fields and values
- * content-length
- * request method
- * response status code
- * transfer-encoding
- * http version
- * request path, query string, fragment
- * message body
- * Defends against buffer overflow attacks.
* Upgrade support
+ * Defends against buffer overflow attacks.
+
+The parser extracts the following information from HTTP messages:
+
+ * Header fields and values
+ * Content-Length
+ * Request method
+ * Response status code
+ * Transfer-Encoding
+ * HTTP version
+ * Request path, query string, fragment
+ * Message body
+
Usage
-----
@@ -55,10 +57,9 @@ When data is received on the socket execute the parser and check for errors.
}
/* Start up / continue the parser.
- * Note we pass the recved==0 to http_parse_requests to signal
- * that EOF has been recieved.
+ * Note we pass recved==0 to signal that EOF has been recieved.
*/
- nparsed = http_parser_execute(parser, settings, buf, recved);
+ nparsed = http_parser_execute(parser, &settings, buf, recved);
if (parser->upgrade) {
/* handle new protocol */
@@ -83,10 +84,6 @@ The parser decodes the transfer-encoding for both requests and responses
transparently. That is, a chunked encoding is decoded before being sent to
the on_body callback.
-It does not decode the content-encoding (gzip). Not all HTTP applications
-need to inspect the body. Decoding gzip is non-neglagable amount of
-processing (and requires making allocations). HTTP proxies using this
-parser, for example, would not want such a feature.
The Special Problem of Upgrade
------------------------------
@@ -109,11 +106,11 @@ information the Web Socket protocol.)
To support this, the parser will treat this as a normal HTTP message without a
body. Issuing both on_headers_complete and on_message_complete callbacks. However
-http_parser_execute() may finish without parsing the entire supplied buffer.
+http_parser_execute() will stop parsing at the end of the headers and return.
-The user needs to check if parser->upgrade has been set to 1 after
-http_parser_execute() returns to determine if a premature exit was due to an
-upgrade or an error.
+The user is expected to check if `parser->upgrade` has been set to 1 after
+`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
+offset by the return value of `http_parser_execute()`.
Callbacks
@@ -166,6 +163,7 @@ and apply following logic:
| | | and append callback data to it |
------------------------ ------------ --------------------------------------------
+
See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C
diff --git a/deps/http_parser/http_parser.c b/deps/http_parser/http_parser.c
index 8ecc94c2ee..7556d92099 100644
--- a/deps/http_parser/http_parser.c
+++ b/deps/http_parser/http_parser.c
@@ -32,9 +32,6 @@
#endif
-#define MAX_FIELD_SIZE (80*1024)
-
-
#define CALLBACK2(FOR) \
do { \
if (settings->on_##FOR) { \
@@ -45,20 +42,16 @@ do { \
#define MARK(FOR) \
do { \
- parser->FOR##_mark = p; \
- parser->FOR##_size = 0; \
+ FOR##_mark = p; \
} while (0)
-
#define CALLBACK_NOCLEAR(FOR) \
do { \
- if (parser->FOR##_mark) { \
- parser->FOR##_size += p - parser->FOR##_mark; \
- if (parser->FOR##_size > MAX_FIELD_SIZE) return (p - data); \
+ if (FOR##_mark) { \
if (settings->on_##FOR) { \
if (0 != settings->on_##FOR(parser, \
- parser->FOR##_mark, \
- p - parser->FOR##_mark)) \
+ FOR##_mark, \
+ p - FOR##_mark)) \
{ \
return (p - data); \
} \
@@ -70,7 +63,7 @@ do { \
#define CALLBACK(FOR) \
do { \
CALLBACK_NOCLEAR(FOR); \
- parser->FOR##_mark = NULL; \
+ FOR##_mark = NULL; \
} while (0)
@@ -132,6 +125,8 @@ static const uint32_t usual[] = {
enum state
{ s_dead = 1 /* important that this is > 0 */
+ , s_start_res_or_resp
+ , s_res_or_resp_H
, s_start_res
, s_res_H
, s_res_HT
@@ -303,12 +298,31 @@ size_t http_parser_execute (http_parser *parser,
return 0;
}
- if (parser->header_field_mark) parser->header_field_mark = data;
- if (parser->header_value_mark) parser->header_value_mark = data;
- if (parser->fragment_mark) parser->fragment_mark = data;
- if (parser->query_string_mark) parser->query_string_mark = data;
- if (parser->path_mark) parser->path_mark = data;
- if (parser->url_mark) parser->url_mark = data;
+ /* technically we could combine all of these (except for url_mark) into one
+ variable, saving stack space, but it seems more clear to have them
+ separated. */
+ const char *header_field_mark = 0;
+ const char *header_value_mark = 0;
+ const char *fragment_mark = 0;
+ const char *query_string_mark = 0;
+ const char *path_mark = 0;
+ const char *url_mark = 0;
+
+ if (state == s_header_field)
+ header_field_mark = data;
+ if (state == s_header_value)
+ header_value_mark = data;
+ if (state == s_req_fragment)
+ fragment_mark = data;
+ if (state == s_req_query_string)
+ query_string_mark = data;
+ if (state == s_req_path)
+ path_mark = data;
+ if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
+ || state == s_req_schema_slash_slash || state == s_req_port
+ || state == s_req_query_string_start || state == s_req_query_string
+ || state == s_req_fragment_start || state == s_req_fragment)
+ url_mark = data;
for (p=data, pe=data+len; p != pe; p++) {
ch = *p;
@@ -326,6 +340,42 @@ size_t http_parser_execute (http_parser *parser,
*/
goto error;
+ case s_start_res_or_resp:
+ {
+ if (ch == CR || ch == LF)
+ break;
+ parser->flags = 0;
+ parser->content_length = -1;
+
+ CALLBACK2(message_begin);
+
+ if (ch == 'H')
+ state = s_res_or_resp_H;
+ else {
+ parser->type = HTTP_REQUEST;
+ if (ch < 'A' || 'Z' < ch) goto error;
+ parser->buffer[0] = ch;
+ index = 0;
+ state = s_req_method;
+ }
+ break;
+ }
+
+ case s_res_or_resp_H:
+ if (ch == 'T') {
+ parser->type = HTTP_RESPONSE;
+ state = s_res_HT;
+ } else {
+ if (ch < 'A' || 'Z' < ch) goto error;
+ parser->type = HTTP_REQUEST;
+ parser->method = (enum http_method) 0;
+ parser->buffer[0] = 'H';
+ parser->buffer[1] = ch;
+ index = 1;
+ state = s_req_method;
+ }
+ break;
+
case s_start_res:
{
parser->flags = 0;
@@ -584,7 +634,7 @@ size_t http_parser_execute (http_parser *parser,
break;
case 9:
- if (ngx_str9cmp(parser->buffer,
+ if (ngx_str9cmp(parser->buffer,
'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H')) {
parser->method = HTTP_PROPPATCH;
break;
@@ -637,6 +687,9 @@ size_t http_parser_execute (http_parser *parser,
if (ch == ':') {
state = s_req_schema_slash;
break;
+ } else if (ch == '.') {
+ state = s_req_host;
+ break;
}
goto error;
@@ -1156,12 +1209,14 @@ size_t http_parser_execute (http_parser *parser,
if (!c) {
if (ch == CR) {
+ CALLBACK(header_value);
header_state = h_general;
state = s_header_almost_done;
break;
}
if (ch == LF) {
+ CALLBACK(header_value);
state = s_header_field_start;
break;
}
@@ -1547,15 +1602,8 @@ void
http_parser_init (http_parser *parser, enum http_parser_type t)
{
parser->type = t;
- parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res);
+ parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_res_or_resp));
parser->nread = 0;
parser->upgrade = 0;
-
- parser->header_field_mark = NULL;
- parser->header_value_mark = NULL;
- parser->query_string_mark = NULL;
- parser->path_mark = NULL;
- parser->url_mark = NULL;
- parser->fragment_mark = NULL;
}
diff --git a/deps/http_parser/http_parser.h b/deps/http_parser/http_parser.h
index 977ecdfa00..a4abf3208f 100644
--- a/deps/http_parser/http_parser.h
+++ b/deps/http_parser/http_parser.h
@@ -89,22 +89,15 @@ enum http_method
};
-enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE };
+enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
struct http_parser {
/** PRIVATE **/
- enum http_parser_type type;
- unsigned short state;
- unsigned short header_state;
- size_t index;
-
- /* 1 = Upgrade header was present and the parser has exited because of that.
- * 0 = No upgrade header present.
- * Should be checked when http_parser_execute() returns in addition to
- * error checking.
- */
- unsigned short upgrade;
+ unsigned char type;
+ unsigned char state;
+ unsigned char header_state;
+ unsigned char index;
char flags;
@@ -112,26 +105,20 @@ struct http_parser {
ssize_t body_read;
ssize_t content_length;
- const char *header_field_mark;
- size_t header_field_size;
- const char *header_value_mark;
- size_t header_value_size;
- const char *query_string_mark;
- size_t query_string_size;
- const char *path_mark;
- size_t path_size;
- const char *url_mark;
- size_t url_size;
- const char *fragment_mark;
- size_t fragment_size;
-
/** READ-ONLY **/
unsigned short status_code; /* responses only */
- enum http_method method; /* requests only */
+ unsigned short method; /* requests only */
unsigned short http_major;
unsigned short http_minor;
char buffer[HTTP_PARSER_MAX_METHOD_LEN];
+ /* 1 = Upgrade header was present and the parser has exited because of that.
+ * 0 = No upgrade header present.
+ * Should be checked when http_parser_execute() returns in addition to
+ * error checking.
+ */
+ char upgrade;
+
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};
diff --git a/deps/http_parser/test.c b/deps/http_parser/test.c
index 03a98ff2a3..a9746adb54 100644
--- a/deps/http_parser/test.c
+++ b/deps/http_parser/test.c
@@ -495,6 +495,30 @@ const struct message requests[] =
,.body= ""
}
+#define CONNECT_REQUEST 17
+, {.name = "connect request"
+ ,.type= HTTP_REQUEST
+ ,.raw= "CONNECT home.netscape.com:443 HTTP/1.0\r\n"
+ "User-agent: Mozilla/1.1N\r\n"
+ "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
+ "\r\n"
+ ,.should_keep_alive= FALSE
+ ,.message_complete_on_eof= FALSE
+ ,.http_major= 1
+ ,.http_minor= 0
+ ,.method= HTTP_CONNECT
+ ,.query_string= ""
+ ,.fragment= ""
+ ,.request_path= ""
+ ,.request_url= "home.netscape.com:443"
+ ,.num_headers= 2
+ ,.upgrade=0
+ ,.headers= { { "User-agent", "Mozilla/1.1N" }
+ , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" }
+ }
+ ,.body= ""
+ }
+
, {.name= NULL } /* sentinel */
};
@@ -721,6 +745,43 @@ const struct message responses[] =
,.body= ""
}
+#define BONJOUR_MADAME_FR 8
+/* The client should not merge two headers fields when the first one doesn't
+ * have a value.
+ */
+, {.name= "bonjourmadame.fr"
+ ,.type= HTTP_RESPONSE
+ ,.raw= "HTTP/1.0 301 Moved Permanently\r\n"
+ "Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n"
+ "Server: Apache/2.2.3 (Red Hat)\r\n"
+ "Cache-Control: public\r\n"
+ "Pragma: \r\n"
+ "Location: http://www.bonjourmadame.fr/\r\n"
+ "Vary: Accept-Encoding\r\n"
+ "Content-Length: 0\r\n"
+ "Content-Type: text/html; charset=UTF-8\r\n"
+ "Connection: keep-alive\r\n"
+ "\r\n"
+ ,.should_keep_alive= TRUE
+ ,.message_complete_on_eof= FALSE
+ ,.http_major= 1
+ ,.http_minor= 0
+ ,.status_code= 301
+ ,.num_headers= 9
+ ,.headers=
+ { { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" }
+ , { "Server", "Apache/2.2.3 (Red Hat)" }
+ , { "Cache-Control", "public" }
+ , { "Pragma", "" }
+ , { "Location", "http://www.bonjourmadame.fr/" }
+ , { "Vary", "Accept-Encoding" }
+ , { "Content-Length", "0" }
+ , { "Content-Type", "text/html; charset=UTF-8" }
+ , { "Connection", "keep-alive" }
+ }
+ ,.body= ""
+ }
+
, {.name= NULL } /* sentinel */
};
@@ -1207,82 +1268,84 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
int total_len = strlen(total);
- int total_ops = (total_len - 1) * (total_len - 2) / 2;
+ int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2;
int ops = 0 ;
size_t buf1_len, buf2_len, buf3_len;
- int i,j;
- for (j = 2; j < total_len; j ++ ) {
- for (i = 1; i < j; i ++ ) {
+ int i,j,type_both;
+ for (type_both = 0; type_both < 2; type_both ++ ) {
+ for (j = 2; j < total_len; j ++ ) {
+ for (i = 1; i < j; i ++ ) {
- if (ops % 1000 == 0) {
- printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops);
- fflush(stdout);
- }
- ops += 1;
+ if (ops % 1000 == 0) {
+ printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops);
+ fflush(stdout);
+ }
+ ops += 1;
- parser_init(r1->type);
+ parser_init(type_both ? HTTP_BOTH : r1->type);
- buf1_len = i;
- strncpy(buf1, total, buf1_len);
- buf1[buf1_len] = 0;
+ buf1_len = i;
+ strncpy(buf1, total, buf1_len);
+ buf1[buf1_len] = 0;
- buf2_len = j - i;
- strncpy(buf2, total+i, buf2_len);
- buf2[buf2_len] = 0;
+ buf2_len = j - i;
+ strncpy(buf2, total+i, buf2_len);
+ buf2[buf2_len] = 0;
- buf3_len = total_len - j;
- strncpy(buf3, total+j, buf3_len);
- buf3[buf3_len] = 0;
+ buf3_len = total_len - j;
+ strncpy(buf3, total+j, buf3_len);
+ buf3[buf3_len] = 0;
- read = parse(buf1, buf1_len);
- if (read != buf1_len) {
- print_error(buf1, read);
- goto error;
- }
+ read = parse(buf1, buf1_len);
+ if (read != buf1_len) {
+ print_error(buf1, read);
+ goto error;
+ }
- read = parse(buf2, buf2_len);
- if (read != buf2_len) {
- print_error(buf2, read);
- goto error;
- }
+ read = parse(buf2, buf2_len);
+ if (read != buf2_len) {
+ print_error(buf2, read);
+ goto error;
+ }
- read = parse(buf3, buf3_len);
- if (read != buf3_len) {
- print_error(buf3, read);
- goto error;
- }
+ read = parse(buf3, buf3_len);
+ if (read != buf3_len) {
+ print_error(buf3, read);
+ goto error;
+ }
- parse(NULL, 0);
+ parse(NULL, 0);
- if (3 != num_messages) {
- fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages);
- goto error;
- }
+ if (3 != num_messages) {
+ fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages);
+ goto error;
+ }
- if (!message_eq(0, r1)) {
- fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n");
- goto error;
- }
+ if (!message_eq(0, r1)) {
+ fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n");
+ goto error;
+ }
- if (!message_eq(1, r2)) {
- fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n");
- goto error;
- }
+ if (!message_eq(1, r2)) {
+ fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n");
+ goto error;
+ }
- if (!message_eq(2, r3)) {
- fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n");
- goto error;
- }
+ if (!message_eq(2, r3)) {
+ fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n");
+ goto error;
+ }
- parser_free();
+ parser_free();
+ }
}
}
puts("\b\b\b\b100%");
return;
-error:
+ error:
fprintf(stderr, "i=%d j=%d\n", i, j);
fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1);
fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2);
@@ -1395,12 +1458,18 @@ main (void)
- printf("response scan 1/1 ");
+ printf("response scan 1/2 ");
test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY]
, &responses[NO_HEADERS_NO_BODY_404]
, &responses[NO_REASON_PHRASE]
);
+ printf("response scan 1/2 ");
+ test_scan( &responses[BONJOUR_MADAME_FR]
+ , &responses[UNDERSTORE_HEADER_KEY]
+ , &responses[NO_CARRIAGE_RET]
+ );
+
puts("responses okay");