Upgrade http-parser

author: Ryan Dahl <ry@tinyclouds.org> 2010-06-06 17:41:03 -0700
committer: Ryan Dahl <ry@tinyclouds.org> 2010-06-06 17:41:03 -0700
commit: 9be6c501ecf3d9432bdfca18a1b007ef4868ef5f (patch)
tree: ecf5f30be6766b65d1d6ff5ca60628f558b86d0b /deps/http_parser
parent: 935f8437fd375270a0c12a106ad5b0f64da93e01 (diff)
download: android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.tar.gz
android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.tar.bz2
android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.zip
5 files changed, 241 insertions, 139 deletions
diff --git a/deps/http_parser/Makefile b/deps/http_parser/Makefile
index 72e9d02c2c..dee994ea50 100644
--- a/deps/http_parser/Makefile
+++ b/deps/http_parser/Makefile
@@ -8,10 +8,10 @@ test: test_g
 test_g: http_parser_g.o test_g.o
 	gcc $(OPT_DEBUG) http_parser_g.o test_g.o -o $@
 
-test_g.o: test.c Makefile
+test_g.o: test.c http_parser.h Makefile
 	gcc $(OPT_DEBUG) -c test.c -o $@
 
-test.o: test.c Makefile
+test.o: test.c http_parser.h Makefile
 	gcc $(OPT_FAST) -c test.c -o $@
 
 http_parser_g.o: http_parser.c http_parser.h Makefile
@@ -23,7 +23,7 @@ test-valgrind: test_g
 http_parser.o: http_parser.c http_parser.h Makefile
 	gcc $(OPT_FAST) -c http_parser.c
 
-test_fast: http_parser.o test.c
+test_fast: http_parser.o test.c http_parser.h
 	gcc $(OPT_FAST) http_parser.o test.c -o $@
 
 test-run-timed: test_fast
diff --git a/deps/http_parser/README.md b/deps/http_parser/README.md
index 6666482ab8..79aef0efb4 100644
--- a/deps/http_parser/README.md
+++ b/deps/http_parser/README.md
@@ -1,30 +1,32 @@
 HTTP Parser
 ===========
 
-This is a parser for HTTP messages written in C. It parses both requests
-and responses. The parser is designed to be used in performance HTTP
-applications. It does not make any allocations, it does not buffer data, and
-it can be interrupted at anytime. Depending on your architecture, it only
-requires between 100 and 200 bytes of data per message stream (in a web
-server that is per connection).
+This is a parser for HTTP messages written in C. It parses both requests and
+responses. The parser is designed to be used in performance HTTP
+applications. It does not make any syscalls nor allocations, it does not
+buffer data, it can be interrupted at anytime. Depending on your
+architecture, it only requires between 100 and 200 bytes of data per message
+stream (in a web server that is per connection).
 
 Features:
 
   * No dependencies
-  * Parses both requests and responses.
-  * Handles persistent streams.
+  * Handles persistent streams (keep-alive).
   * Decodes chunked encoding.
-  * Extracts the following data from a message
-    * header fields and values
-    * content-length
-    * request method
-    * response status code
-    * transfer-encoding
-    * http version
-    * request path, query string, fragment
-    * message body
-  * Defends against buffer overflow attacks.
   * Upgrade support
+  * Defends against buffer overflow attacks.
+
+The parser extracts the following information from HTTP messages:
+
+  * Header fields and values
+  * Content-Length
+  * Request method
+  * Response status code
+  * Transfer-Encoding
+  * HTTP version
+  * Request path, query string, fragment
+  * Message body
+
 
 Usage
 -----
@@ -55,10 +57,9 @@ When data is received on the socket execute the parser and check for errors.
     }
 
     /* Start up / continue the parser.
-     * Note we pass the recved==0 to http_parse_requests to signal
-     * that EOF has been recieved.
+     * Note we pass recved==0 to signal that EOF has been recieved.
      */
-    nparsed = http_parser_execute(parser, settings, buf, recved);
+    nparsed = http_parser_execute(parser, &settings, buf, recved);
 
     if (parser->upgrade) {
       /* handle new protocol */
@@ -83,10 +84,6 @@ The parser decodes the transfer-encoding for both requests and responses
 transparently. That is, a chunked encoding is decoded before being sent to
 the on_body callback.
 
-It does not decode the content-encoding (gzip). Not all HTTP applications
-need to inspect the body. Decoding gzip is non-neglagable amount of
-processing (and requires making allocations). HTTP proxies using this
-parser, for example, would not want such a feature.
 
 The Special Problem of Upgrade
 ------------------------------
@@ -109,11 +106,11 @@ information the Web Socket protocol.)
 
 To support this, the parser will treat this as a normal HTTP message without a
 body. Issuing both on_headers_complete and on_message_complete callbacks. However
-http_parser_execute() may finish without parsing the entire supplied buffer.
+http_parser_execute() will stop parsing at the end of the headers and return.
 
-The user needs to check if parser->upgrade has been set to 1 after
-http_parser_execute() returns to determine if a premature exit was due to an
-upgrade or an error.
+The user is expected to check if `parser->upgrade` has been set to 1 after
+`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
+offset by the return value of `http_parser_execute()`.
 
 
 Callbacks
@@ -166,6 +163,7 @@ and apply following logic:
     |                        |            | and append callback data to it             |
      ------------------------ ------------ --------------------------------------------
 
+
 See examples of reading in headers:
 
 * [partial example](http://gist.github.com/155877) in C
diff --git a/deps/http_parser/http_parser.c b/deps/http_parser/http_parser.c
index 8ecc94c2ee..7556d92099 100644
--- a/deps/http_parser/http_parser.c
+++ b/deps/http_parser/http_parser.c
@@ -32,9 +32,6 @@
 #endif
 
 
-#define MAX_FIELD_SIZE (80*1024)
-
-
 #define CALLBACK2(FOR)                                               \
 do {                                                                 \
   if (settings->on_##FOR) {                                          \
@@ -45,20 +42,16 @@ do {                                                                 \
 
 #define MARK(FOR)                                                    \
 do {                                                                 \
-  parser->FOR##_mark = p;                                            \
-  parser->FOR##_size = 0;                                            \
+  FOR##_mark = p;                                                    \
 } while (0)
 
-
 #define CALLBACK_NOCLEAR(FOR)                                        \
 do {                                                                 \
-  if (parser->FOR##_mark) {                                          \
-    parser->FOR##_size += p - parser->FOR##_mark;                    \
-    if (parser->FOR##_size > MAX_FIELD_SIZE) return (p - data);      \
+  if (FOR##_mark) {                                                  \
     if (settings->on_##FOR) {                                        \
       if (0 != settings->on_##FOR(parser,                            \
-                                 parser->FOR##_mark,                 \
-                                 p - parser->FOR##_mark))            \
+                                 FOR##_mark,                         \
+                                 p - FOR##_mark))                    \
       {                                                              \
         return (p - data);                                           \
       }                                                              \
@@ -70,7 +63,7 @@ do {                                                                 \
 #define CALLBACK(FOR)                                                \
 do {                                                                 \
   CALLBACK_NOCLEAR(FOR);                                             \
-  parser->FOR##_mark = NULL;                                         \
+  FOR##_mark = NULL;                                                 \
 } while (0)
 
 
@@ -132,6 +125,8 @@ static const uint32_t  usual[] = {
 enum state
   { s_dead = 1 /* important that this is > 0 */
 
+  , s_start_res_or_resp
+  , s_res_or_resp_H
   , s_start_res
   , s_res_H
   , s_res_HT
@@ -303,12 +298,31 @@ size_t http_parser_execute (http_parser *parser,
     return 0;
   }
 
-  if (parser->header_field_mark)   parser->header_field_mark   = data;
-  if (parser->header_value_mark)   parser->header_value_mark   = data;
-  if (parser->fragment_mark)       parser->fragment_mark       = data;
-  if (parser->query_string_mark)   parser->query_string_mark   = data;
-  if (parser->path_mark)           parser->path_mark           = data;
-  if (parser->url_mark)            parser->url_mark            = data;
+  /* technically we could combine all of these (except for url_mark) into one
+     variable, saving stack space, but it seems more clear to have them
+     separated. */
+  const char *header_field_mark = 0;
+  const char *header_value_mark = 0;
+  const char *fragment_mark = 0;
+  const char *query_string_mark = 0;
+  const char *path_mark = 0;
+  const char *url_mark = 0;
+
+  if (state == s_header_field)
+    header_field_mark = data;
+  if (state == s_header_value)
+    header_value_mark = data;
+  if (state == s_req_fragment)
+    fragment_mark = data;
+  if (state == s_req_query_string)
+    query_string_mark = data;
+  if (state == s_req_path)
+    path_mark = data;
+  if (state == s_req_path || state == s_req_schema || state == s_req_schema_slash
+      || state == s_req_schema_slash_slash || state == s_req_port
+      || state == s_req_query_string_start || state == s_req_query_string
+      || state == s_req_fragment_start || state == s_req_fragment)
+    url_mark = data;
 
   for (p=data, pe=data+len; p != pe; p++) {
     ch = *p;
@@ -326,6 +340,42 @@ size_t http_parser_execute (http_parser *parser,
          */
         goto error;
 
+      case s_start_res_or_resp:
+      {
+        if (ch == CR || ch == LF)
+          break;
+        parser->flags = 0;
+        parser->content_length = -1;
+
+        CALLBACK2(message_begin);
+
+        if (ch == 'H')
+          state = s_res_or_resp_H;
+        else {
+          parser->type = HTTP_REQUEST;
+          if (ch < 'A' || 'Z' < ch) goto error;
+          parser->buffer[0] = ch;
+          index = 0;
+          state = s_req_method;
+        }
+        break;
+      }
+
+      case s_res_or_resp_H:
+        if (ch == 'T') {
+          parser->type = HTTP_RESPONSE;
+          state = s_res_HT;
+        } else {
+          if (ch < 'A' || 'Z' < ch) goto error;
+          parser->type = HTTP_REQUEST;
+          parser->method = (enum http_method) 0;
+          parser->buffer[0] = 'H';
+          parser->buffer[1] = ch;
+          index = 1;
+          state = s_req_method;
+        }
+        break;
+
       case s_start_res:
       {
         parser->flags = 0;
@@ -584,7 +634,7 @@ size_t http_parser_execute (http_parser *parser,
               break;
 
             case 9:
-              if (ngx_str9cmp(parser->buffer, 
+              if (ngx_str9cmp(parser->buffer,
                     'P', 'R', 'O', 'P', 'P', 'A', 'T', 'C', 'H')) {
                 parser->method = HTTP_PROPPATCH;
                 break;
@@ -637,6 +687,9 @@ size_t http_parser_execute (http_parser *parser,
         if (ch == ':') {
           state = s_req_schema_slash;
           break;
+        } else if (ch == '.') {
+          state = s_req_host;
+          break;
         }
 
         goto error;
@@ -1156,12 +1209,14 @@ size_t http_parser_execute (http_parser *parser,
 
         if (!c) {
           if (ch == CR) {
+            CALLBACK(header_value);
             header_state = h_general;
             state = s_header_almost_done;
             break;
           }
 
           if (ch == LF) {
+            CALLBACK(header_value);
             state = s_header_field_start;
             break;
           }
@@ -1547,15 +1602,8 @@ void
 http_parser_init (http_parser *parser, enum http_parser_type t)
 {
   parser->type = t;
-  parser->state = (t == HTTP_REQUEST ? s_start_req : s_start_res);
+  parser->state = (t == HTTP_REQUEST ? s_start_req : (t == HTTP_RESPONSE ? s_start_res : s_start_res_or_resp));
   parser->nread = 0;
   parser->upgrade = 0;
-
-  parser->header_field_mark = NULL;
-  parser->header_value_mark = NULL;
-  parser->query_string_mark = NULL;
-  parser->path_mark = NULL;
-  parser->url_mark = NULL;
-  parser->fragment_mark = NULL;
 }
 
diff --git a/deps/http_parser/http_parser.h b/deps/http_parser/http_parser.h
index 977ecdfa00..a4abf3208f 100644
--- a/deps/http_parser/http_parser.h
+++ b/deps/http_parser/http_parser.h
@@ -89,22 +89,15 @@ enum http_method
   };
 
 
-enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE };
+enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
 
 
 struct http_parser {
   /** PRIVATE **/
-  enum http_parser_type type;
-  unsigned short state;
-  unsigned short header_state;
-  size_t index;
-
-  /* 1 = Upgrade header was present and the parser has exited because of that.
-   * 0 = No upgrade header present.
-   * Should be checked when http_parser_execute() returns in addition to
-   * error checking.
-   */
-  unsigned short upgrade;
+  unsigned char type;
+  unsigned char state;
+  unsigned char header_state;
+  unsigned char index;
 
   char flags;
 
@@ -112,26 +105,20 @@ struct http_parser {
   ssize_t body_read;
   ssize_t content_length;
 
-  const char *header_field_mark;
-  size_t      header_field_size;
-  const char *header_value_mark;
-  size_t      header_value_size;
-  const char *query_string_mark;
-  size_t      query_string_size;
-  const char *path_mark;
-  size_t      path_size;
-  const char *url_mark;
-  size_t      url_size;
-  const char *fragment_mark;
-  size_t      fragment_size;
-
   /** READ-ONLY **/
   unsigned short status_code; /* responses only */
-  enum http_method method;    /* requests only */
+  unsigned short method;    /* requests only */
   unsigned short http_major;
   unsigned short http_minor;
   char buffer[HTTP_PARSER_MAX_METHOD_LEN];
 
+  /* 1 = Upgrade header was present and the parser has exited because of that.
+   * 0 = No upgrade header present.
+   * Should be checked when http_parser_execute() returns in addition to
+   * error checking.
+   */
+  char upgrade;
+
   /** PUBLIC **/
   void *data; /* A pointer to get hook to the "connection" or "socket" object */
 };
diff --git a/deps/http_parser/test.c b/deps/http_parser/test.c
index 03a98ff2a3..a9746adb54 100644
--- a/deps/http_parser/test.c
+++ b/deps/http_parser/test.c
@@ -495,6 +495,30 @@ const struct message requests[] =
   ,.body= ""
   }
 
+#define CONNECT_REQUEST 17
+, {.name = "connect request"
+  ,.type= HTTP_REQUEST
+  ,.raw= "CONNECT home.netscape.com:443 HTTP/1.0\r\n"
+         "User-agent: Mozilla/1.1N\r\n"
+         "Proxy-authorization: basic aGVsbG86d29ybGQ=\r\n"
+         "\r\n"
+  ,.should_keep_alive= FALSE
+  ,.message_complete_on_eof= FALSE
+  ,.http_major= 1
+  ,.http_minor= 0
+  ,.method= HTTP_CONNECT
+  ,.query_string= ""
+  ,.fragment= ""
+  ,.request_path= ""
+  ,.request_url= "home.netscape.com:443"
+  ,.num_headers= 2
+  ,.upgrade=0
+  ,.headers= { { "User-agent", "Mozilla/1.1N" }
+             , { "Proxy-authorization", "basic aGVsbG86d29ybGQ=" }
+             }
+  ,.body= ""
+  }
+
 , {.name= NULL } /* sentinel */
 };
 
@@ -721,6 +745,43 @@ const struct message responses[] =
   ,.body= ""
   }
 
+#define BONJOUR_MADAME_FR 8
+/* The client should not merge two headers fields when the first one doesn't
+ * have a value.
+ */
+, {.name= "bonjourmadame.fr"
+  ,.type= HTTP_RESPONSE
+  ,.raw= "HTTP/1.0 301 Moved Permanently\r\n"
+         "Date: Thu, 03 Jun 2010 09:56:32 GMT\r\n"
+         "Server: Apache/2.2.3 (Red Hat)\r\n"
+         "Cache-Control: public\r\n"
+         "Pragma: \r\n"
+         "Location: http://www.bonjourmadame.fr/\r\n"
+         "Vary: Accept-Encoding\r\n"
+         "Content-Length: 0\r\n"
+         "Content-Type: text/html; charset=UTF-8\r\n"
+         "Connection: keep-alive\r\n"
+         "\r\n"
+  ,.should_keep_alive= TRUE
+  ,.message_complete_on_eof= FALSE
+  ,.http_major= 1
+  ,.http_minor= 0
+  ,.status_code= 301
+  ,.num_headers= 9
+  ,.headers=
+    { { "Date", "Thu, 03 Jun 2010 09:56:32 GMT" }
+    , { "Server", "Apache/2.2.3 (Red Hat)" }
+    , { "Cache-Control", "public" }
+    , { "Pragma", "" }
+    , { "Location", "http://www.bonjourmadame.fr/" }
+    , { "Vary",  "Accept-Encoding" }
+    , { "Content-Length", "0" }
+    , { "Content-Type", "text/html; charset=UTF-8" }
+    , { "Connection", "keep-alive" }
+    }
+  ,.body= ""
+  }
+
 , {.name= NULL } /* sentinel */
 };
 
@@ -1207,82 +1268,84 @@ test_scan (const struct message *r1, const struct message *r2, const struct mess
 
   int total_len = strlen(total);
 
-  int total_ops = (total_len - 1) * (total_len - 2) / 2;
+  int total_ops = 2 * (total_len - 1) * (total_len - 2) / 2;
   int ops = 0 ;
 
   size_t buf1_len, buf2_len, buf3_len;
 
-  int i,j;
-  for (j = 2; j < total_len; j ++ ) {
-    for (i = 1; i < j; i ++ ) {
+  int i,j,type_both;
+  for (type_both = 0; type_both < 2; type_both ++ ) {
+    for (j = 2; j < total_len; j ++ ) {
+      for (i = 1; i < j; i ++ ) {
 
-      if (ops % 1000 == 0)  {
-        printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops);
-        fflush(stdout);
-      }
-      ops += 1;
+        if (ops % 1000 == 0)  {
+          printf("\b\b\b\b%3.0f%%", 100 * (float)ops /(float)total_ops);
+          fflush(stdout);
+        }
+        ops += 1;
 
-      parser_init(r1->type);
+        parser_init(type_both ? HTTP_BOTH : r1->type);
 
-      buf1_len = i;
-      strncpy(buf1, total, buf1_len);
-      buf1[buf1_len] = 0;
+        buf1_len = i;
+        strncpy(buf1, total, buf1_len);
+        buf1[buf1_len] = 0;
 
-      buf2_len = j - i;
-      strncpy(buf2, total+i, buf2_len);
-      buf2[buf2_len] = 0;
+        buf2_len = j - i;
+        strncpy(buf2, total+i, buf2_len);
+        buf2[buf2_len] = 0;
 
-      buf3_len = total_len - j;
-      strncpy(buf3, total+j, buf3_len);
-      buf3[buf3_len] = 0;
+        buf3_len = total_len - j;
+        strncpy(buf3, total+j, buf3_len);
+        buf3[buf3_len] = 0;
 
-      read = parse(buf1, buf1_len);
-      if (read != buf1_len) {
-        print_error(buf1, read);
-        goto error;
-      }
+        read = parse(buf1, buf1_len);
+        if (read != buf1_len) {
+          print_error(buf1, read);
+          goto error;
+        }
 
-      read = parse(buf2, buf2_len);
-      if (read != buf2_len) {
-        print_error(buf2, read);
-        goto error;
-      }
+        read = parse(buf2, buf2_len);
+        if (read != buf2_len) {
+          print_error(buf2, read);
+          goto error;
+        }
 
-      read = parse(buf3, buf3_len);
-      if (read != buf3_len) {
-        print_error(buf3, read);
-        goto error;
-      }
+        read = parse(buf3, buf3_len);
+        if (read != buf3_len) {
+          print_error(buf3, read);
+          goto error;
+        }
 
-      parse(NULL, 0);
+        parse(NULL, 0);
 
-      if (3 != num_messages) {
-        fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages);
-        goto error;
-      }
+        if (3 != num_messages) {
+          fprintf(stderr, "\n\nParser didn't see 3 messages only %d\n", num_messages);
+          goto error;
+        }
 
-      if (!message_eq(0, r1)) {
-        fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n");
-        goto error;
-      }
+        if (!message_eq(0, r1)) {
+          fprintf(stderr, "\n\nError matching messages[0] in test_scan.\n");
+          goto error;
+        }
 
-      if (!message_eq(1, r2)) {
-        fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n");
-        goto error;
-      }
+        if (!message_eq(1, r2)) {
+          fprintf(stderr, "\n\nError matching messages[1] in test_scan.\n");
+          goto error;
+        }
 
-      if (!message_eq(2, r3)) {
-        fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n");
-        goto error;
-      }
+        if (!message_eq(2, r3)) {
+          fprintf(stderr, "\n\nError matching messages[2] in test_scan.\n");
+          goto error;
+        }
 
-      parser_free();
+        parser_free();
+      }
     }
   }
   puts("\b\b\b\b100%");
   return;
 
-error:
+ error:
   fprintf(stderr, "i=%d  j=%d\n", i, j);
   fprintf(stderr, "buf1 (%u) %s\n\n", (unsigned int)buf1_len, buf1);
   fprintf(stderr, "buf2 (%u) %s\n\n", (unsigned int)buf2_len , buf2);
@@ -1395,12 +1458,18 @@ main (void)
 
 
 
-  printf("response scan 1/1      ");
+  printf("response scan 1/2      ");
   test_scan( &responses[TRAILING_SPACE_ON_CHUNKED_BODY]
            , &responses[NO_HEADERS_NO_BODY_404]
            , &responses[NO_REASON_PHRASE]
            );
 
+  printf("response scan 1/2      ");
+  test_scan( &responses[BONJOUR_MADAME_FR]
+           , &responses[UNDERSTORE_HEADER_KEY]
+           , &responses[NO_CARRIAGE_RET]
+           );
+
   puts("responses okay");
author	Ryan Dahl <ry@tinyclouds.org>	2010-06-06 17:41:03 -0700
committer	Ryan Dahl <ry@tinyclouds.org>	2010-06-06 17:41:03 -0700
commit	9be6c501ecf3d9432bdfca18a1b007ef4868ef5f (patch)
tree	ecf5f30be6766b65d1d6ff5ca60628f558b86d0b /deps/http_parser
parent	935f8437fd375270a0c12a106ad5b0f64da93e01 (diff)
download	android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.tar.gz android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.tar.bz2 android-node-v8-9be6c501ecf3d9432bdfca18a1b007ef4868ef5f.zip