Upgrade http-parser.

This commit is contained in:
Adam Ierymenko 2016-02-25 18:13:42 -08:00
parent 039790cf26
commit f217ce7ff7
2 changed files with 161 additions and 43 deletions

View File

@ -400,6 +400,8 @@ enum http_host_state
, s_http_host , s_http_host
, s_http_host_v6 , s_http_host_v6
, s_http_host_v6_end , s_http_host_v6_end
, s_http_host_v6_zone_start
, s_http_host_v6_zone
, s_http_host_port_start , s_http_host_port_start
, s_http_host_port , s_http_host_port
}; };
@ -433,6 +435,12 @@ enum http_host_state
(IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_') (IS_ALPHANUM(c) || (c) == '.' || (c) == '-' || (c) == '_')
#endif #endif
/**
* Verify that a char is a valid visible (printable) US-ASCII
* character or %x80-FF
**/
#define IS_HEADER_CHAR(ch) \
(ch == CR || ch == LF || ch == 9 || (ch > 31 && ch != 127))
#define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res) #define start_state (parser->type == HTTP_REQUEST ? s_start_req : s_start_res)
@ -637,6 +645,7 @@ size_t http_parser_execute (http_parser *parser,
const char *body_mark = 0; const char *body_mark = 0;
const char *status_mark = 0; const char *status_mark = 0;
enum state p_state = (enum state) parser->state; enum state p_state = (enum state) parser->state;
const unsigned int lenient = parser->lenient_http_headers;
/* We're in an error state. Don't bother doing anything. */ /* We're in an error state. Don't bother doing anything. */
if (HTTP_PARSER_ERRNO(parser) != HPE_OK) { if (HTTP_PARSER_ERRNO(parser) != HPE_OK) {
@ -957,21 +966,23 @@ reexecute:
parser->method = (enum http_method) 0; parser->method = (enum http_method) 0;
parser->index = 1; parser->index = 1;
switch (ch) { switch (ch) {
case 'A': parser->method = HTTP_ACL; break;
case 'B': parser->method = HTTP_BIND; break;
case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break; case 'C': parser->method = HTTP_CONNECT; /* or COPY, CHECKOUT */ break;
case 'D': parser->method = HTTP_DELETE; break; case 'D': parser->method = HTTP_DELETE; break;
case 'G': parser->method = HTTP_GET; break; case 'G': parser->method = HTTP_GET; break;
case 'H': parser->method = HTTP_HEAD; break; case 'H': parser->method = HTTP_HEAD; break;
case 'L': parser->method = HTTP_LOCK; break; case 'L': parser->method = HTTP_LOCK; /* or LINK */ break;
case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break; case 'M': parser->method = HTTP_MKCOL; /* or MOVE, MKACTIVITY, MERGE, M-SEARCH, MKCALENDAR */ break;
case 'N': parser->method = HTTP_NOTIFY; break; case 'N': parser->method = HTTP_NOTIFY; break;
case 'O': parser->method = HTTP_OPTIONS; break; case 'O': parser->method = HTTP_OPTIONS; break;
case 'P': parser->method = HTTP_POST; case 'P': parser->method = HTTP_POST;
/* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */ /* or PROPFIND|PROPPATCH|PUT|PATCH|PURGE */
break; break;
case 'R': parser->method = HTTP_REPORT; break; case 'R': parser->method = HTTP_REPORT; /* or REBIND */ break;
case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break; case 'S': parser->method = HTTP_SUBSCRIBE; /* or SEARCH */ break;
case 'T': parser->method = HTTP_TRACE; break; case 'T': parser->method = HTTP_TRACE; break;
case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE */ break; case 'U': parser->method = HTTP_UNLOCK; /* or UNSUBSCRIBE, UNBIND, UNLINK */ break;
default: default:
SET_ERRNO(HPE_INVALID_METHOD); SET_ERRNO(HPE_INVALID_METHOD);
goto error; goto error;
@ -1027,16 +1038,32 @@ reexecute:
SET_ERRNO(HPE_INVALID_METHOD); SET_ERRNO(HPE_INVALID_METHOD);
goto error; goto error;
} }
} else if (parser->index == 1 && parser->method == HTTP_POST) { } else if (parser->method == HTTP_REPORT) {
if (ch == 'R') { if (parser->index == 2 && ch == 'B') {
parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */ parser->method = HTTP_REBIND;
} else if (ch == 'U') { } else {
parser->method = HTTP_PUT; /* or HTTP_PURGE */ SET_ERRNO(HPE_INVALID_METHOD);
} else if (ch == 'A') { goto error;
parser->method = HTTP_PATCH; }
} else { } else if (parser->index == 1) {
SET_ERRNO(HPE_INVALID_METHOD); if (parser->method == HTTP_POST) {
goto error; if (ch == 'R') {
parser->method = HTTP_PROPFIND; /* or HTTP_PROPPATCH */
} else if (ch == 'U') {
parser->method = HTTP_PUT; /* or HTTP_PURGE */
} else if (ch == 'A') {
parser->method = HTTP_PATCH;
} else {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
} else if (parser->method == HTTP_LOCK) {
if (ch == 'I') {
parser->method = HTTP_LINK;
} else {
SET_ERRNO(HPE_INVALID_METHOD);
goto error;
}
} }
} else if (parser->index == 2) { } else if (parser->index == 2) {
if (parser->method == HTTP_PUT) { if (parser->method == HTTP_PUT) {
@ -1049,6 +1076,8 @@ reexecute:
} else if (parser->method == HTTP_UNLOCK) { } else if (parser->method == HTTP_UNLOCK) {
if (ch == 'S') { if (ch == 'S') {
parser->method = HTTP_UNSUBSCRIBE; parser->method = HTTP_UNSUBSCRIBE;
} else if(ch == 'B') {
parser->method = HTTP_UNBIND;
} else { } else {
SET_ERRNO(HPE_INVALID_METHOD); SET_ERRNO(HPE_INVALID_METHOD);
goto error; goto error;
@ -1059,6 +1088,8 @@ reexecute:
} }
} else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') { } else if (parser->index == 4 && parser->method == HTTP_PROPFIND && ch == 'P') {
parser->method = HTTP_PROPPATCH; parser->method = HTTP_PROPPATCH;
} else if (parser->index == 3 && parser->method == HTTP_UNLOCK && ch == 'I') {
parser->method = HTTP_UNLINK;
} else { } else {
SET_ERRNO(HPE_INVALID_METHOD); SET_ERRNO(HPE_INVALID_METHOD);
goto error; goto error;
@ -1384,7 +1415,12 @@ reexecute:
|| c != CONTENT_LENGTH[parser->index]) { || c != CONTENT_LENGTH[parser->index]) {
parser->header_state = h_general; parser->header_state = h_general;
} else if (parser->index == sizeof(CONTENT_LENGTH)-2) { } else if (parser->index == sizeof(CONTENT_LENGTH)-2) {
if (parser->flags & F_CONTENTLENGTH) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
}
parser->header_state = h_content_length; parser->header_state = h_content_length;
parser->flags |= F_CONTENTLENGTH;
} }
break; break;
@ -1536,6 +1572,11 @@ reexecute:
REEXECUTE(); REEXECUTE();
} }
if (!lenient && !IS_HEADER_CHAR(ch)) {
SET_ERRNO(HPE_INVALID_HEADER_TOKEN);
goto error;
}
c = LOWER(ch); c = LOWER(ch);
switch (h_state) { switch (h_state) {
@ -1703,7 +1744,10 @@ reexecute:
case s_header_almost_done: case s_header_almost_done:
{ {
STRICT_CHECK(ch != LF); if (UNLIKELY(ch != LF)) {
SET_ERRNO(HPE_LF_EXPECTED);
goto error;
}
UPDATE_STATE(s_header_value_lws); UPDATE_STATE(s_header_value_lws);
break; break;
@ -1782,9 +1826,17 @@ reexecute:
if (parser->flags & F_TRAILING) { if (parser->flags & F_TRAILING) {
/* End of a chunked request */ /* End of a chunked request */
UPDATE_STATE(NEW_MESSAGE()); UPDATE_STATE(s_message_done);
CALLBACK_NOTIFY(message_complete); CALLBACK_NOTIFY_NOADVANCE(chunk_complete);
break; REEXECUTE();
}
/* Cannot use chunked encoding and a content-length header together
per the HTTP specification. */
if ((parser->flags & F_CHUNKED) &&
(parser->flags & F_CONTENTLENGTH)) {
SET_ERRNO(HPE_UNEXPECTED_CONTENT_LENGTH);
goto error;
} }
UPDATE_STATE(s_headers_done); UPDATE_STATE(s_headers_done);
@ -1828,12 +1880,16 @@ reexecute:
case s_headers_done: case s_headers_done:
{ {
int hasBody;
STRICT_CHECK(ch != LF); STRICT_CHECK(ch != LF);
parser->nread = 0; parser->nread = 0;
/* Exit, the rest of the connect is in a different protocol. */ hasBody = parser->flags & F_CHUNKED ||
if (parser->upgrade) { (parser->content_length > 0 && parser->content_length != ULLONG_MAX);
if (parser->upgrade && (parser->method == HTTP_CONNECT ||
(parser->flags & F_SKIPBODY) || !hasBody)) {
/* Exit, the rest of the message is in a different protocol. */
UPDATE_STATE(NEW_MESSAGE()); UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete); CALLBACK_NOTIFY(message_complete);
RETURN((p - data) + 1); RETURN((p - data) + 1);
@ -1854,8 +1910,7 @@ reexecute:
/* Content-Length header given and non-zero */ /* Content-Length header given and non-zero */
UPDATE_STATE(s_body_identity); UPDATE_STATE(s_body_identity);
} else { } else {
if (parser->type == HTTP_REQUEST || if (!http_message_needs_eof(parser)) {
!http_message_needs_eof(parser)) {
/* Assume content-length 0 - read the next */ /* Assume content-length 0 - read the next */
UPDATE_STATE(NEW_MESSAGE()); UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete); CALLBACK_NOTIFY(message_complete);
@ -1915,6 +1970,10 @@ reexecute:
case s_message_done: case s_message_done:
UPDATE_STATE(NEW_MESSAGE()); UPDATE_STATE(NEW_MESSAGE());
CALLBACK_NOTIFY(message_complete); CALLBACK_NOTIFY(message_complete);
if (parser->upgrade) {
/* Exit, the rest of the message is in a different protocol. */
RETURN((p - data) + 1);
}
break; break;
case s_chunk_size_start: case s_chunk_size_start:
@ -1994,6 +2053,7 @@ reexecute:
} else { } else {
UPDATE_STATE(s_chunk_data); UPDATE_STATE(s_chunk_data);
} }
CALLBACK_NOTIFY(chunk_header);
break; break;
} }
@ -2033,6 +2093,7 @@ reexecute:
STRICT_CHECK(ch != LF); STRICT_CHECK(ch != LF);
parser->nread = 0; parser->nread = 0;
UPDATE_STATE(s_chunk_size_start); UPDATE_STATE(s_chunk_size_start);
CALLBACK_NOTIFY(chunk_complete);
break; break;
default: default:
@ -2144,13 +2205,13 @@ http_parser_settings_init(http_parser_settings *settings)
const char * const char *
http_errno_name(enum http_errno err) { http_errno_name(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
return http_strerror_tab[err].name; return http_strerror_tab[err].name;
} }
const char * const char *
http_errno_description(enum http_errno err) { http_errno_description(enum http_errno err) {
assert(err < (sizeof(http_strerror_tab)/sizeof(http_strerror_tab[0]))); assert(((size_t) err) < ARRAY_SIZE(http_strerror_tab));
return http_strerror_tab[err].description; return http_strerror_tab[err].description;
} }
@ -2203,6 +2264,23 @@ http_parse_host_char(enum http_host_state s, const char ch) {
return s_http_host_v6; return s_http_host_v6;
} }
if (s == s_http_host_v6 && ch == '%') {
return s_http_host_v6_zone_start;
}
break;
case s_http_host_v6_zone:
if (ch == ']') {
return s_http_host_v6_end;
}
/* FALLTHROUGH */
case s_http_host_v6_zone_start:
/* RFC 6874 Zone ID consists of 1*( unreserved / pct-encoded) */
if (IS_ALPHANUM(ch) || ch == '%' || ch == '.' || ch == '-' || ch == '_' ||
ch == '~') {
return s_http_host_v6_zone;
}
break; break;
case s_http_host_port: case s_http_host_port:
@ -2221,6 +2299,7 @@ http_parse_host_char(enum http_host_state s, const char ch) {
static int static int
http_parse_host(const char * buf, struct http_parser_url *u, int found_at) { http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
assert(u->field_set & (1 << UF_HOST));
enum http_host_state s; enum http_host_state s;
const char *p; const char *p;
@ -2252,6 +2331,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
u->field_data[UF_HOST].len++; u->field_data[UF_HOST].len++;
break; break;
case s_http_host_v6_zone_start:
case s_http_host_v6_zone:
u->field_data[UF_HOST].len++;
break;
case s_http_host_port: case s_http_host_port:
if (s != s_http_host_port) { if (s != s_http_host_port) {
u->field_data[UF_PORT].off = p - buf; u->field_data[UF_PORT].off = p - buf;
@ -2281,6 +2365,8 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
case s_http_host_start: case s_http_host_start:
case s_http_host_v6_start: case s_http_host_v6_start:
case s_http_host_v6: case s_http_host_v6:
case s_http_host_v6_zone_start:
case s_http_host_v6_zone:
case s_http_host_port_start: case s_http_host_port_start:
case s_http_userinfo: case s_http_userinfo:
case s_http_userinfo_start: case s_http_userinfo_start:
@ -2292,6 +2378,11 @@ http_parse_host(const char * buf, struct http_parser_url *u, int found_at) {
return 0; return 0;
} }
void
http_parser_url_init(struct http_parser_url *u) {
memset(u, 0, sizeof(*u));
}
int int
http_parser_parse_url(const char *buf, size_t buflen, int is_connect, http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
struct http_parser_url *u) struct http_parser_url *u)
@ -2365,7 +2456,12 @@ http_parser_parse_url(const char *buf, size_t buflen, int is_connect,
/* host must be present if there is a schema */ /* host must be present if there is a schema */
/* parsing http:///toto will fail */ /* parsing http:///toto will fail */
if ((u->field_set & ((1 << UF_SCHEMA) | (1 << UF_HOST))) != 0) { if ((u->field_set & (1 << UF_SCHEMA)) &&
(u->field_set & (1 << UF_HOST)) == 0) {
return 1;
}
if (u->field_set & (1 << UF_HOST)) {
if (http_parse_host(buf, u, found_at) != 0) { if (http_parse_host(buf, u, found_at) != 0) {
return 1; return 1;
} }

View File

@ -26,11 +26,12 @@ extern "C" {
/* Also update SONAME in the Makefile whenever you change these. */ /* Also update SONAME in the Makefile whenever you change these. */
#define HTTP_PARSER_VERSION_MAJOR 2 #define HTTP_PARSER_VERSION_MAJOR 2
#define HTTP_PARSER_VERSION_MINOR 4 #define HTTP_PARSER_VERSION_MINOR 6
#define HTTP_PARSER_VERSION_PATCH 2 #define HTTP_PARSER_VERSION_PATCH 1
#include <sys/types.h> #include <sys/types.h>
#if defined(_WIN32) && !defined(__MINGW32__) && (!defined(_MSC_VER) || _MSC_VER<1600) #if defined(_WIN32) && !defined(__MINGW32__) && \
(!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__)
#include <BaseTsd.h> #include <BaseTsd.h>
#include <stddef.h> #include <stddef.h>
typedef __int8 int8_t; typedef __int8 int8_t;
@ -95,7 +96,7 @@ typedef int (*http_cb) (http_parser*);
XX(5, CONNECT, CONNECT) \ XX(5, CONNECT, CONNECT) \
XX(6, OPTIONS, OPTIONS) \ XX(6, OPTIONS, OPTIONS) \
XX(7, TRACE, TRACE) \ XX(7, TRACE, TRACE) \
/* webdav */ \ /* WebDAV */ \
XX(8, COPY, COPY) \ XX(8, COPY, COPY) \
XX(9, LOCK, LOCK) \ XX(9, LOCK, LOCK) \
XX(10, MKCOL, MKCOL) \ XX(10, MKCOL, MKCOL) \
@ -104,21 +105,28 @@ typedef int (*http_cb) (http_parser*);
XX(13, PROPPATCH, PROPPATCH) \ XX(13, PROPPATCH, PROPPATCH) \
XX(14, SEARCH, SEARCH) \ XX(14, SEARCH, SEARCH) \
XX(15, UNLOCK, UNLOCK) \ XX(15, UNLOCK, UNLOCK) \
XX(16, BIND, BIND) \
XX(17, REBIND, REBIND) \
XX(18, UNBIND, UNBIND) \
XX(19, ACL, ACL) \
/* subversion */ \ /* subversion */ \
XX(16, REPORT, REPORT) \ XX(20, REPORT, REPORT) \
XX(17, MKACTIVITY, MKACTIVITY) \ XX(21, MKACTIVITY, MKACTIVITY) \
XX(18, CHECKOUT, CHECKOUT) \ XX(22, CHECKOUT, CHECKOUT) \
XX(19, MERGE, MERGE) \ XX(23, MERGE, MERGE) \
/* upnp */ \ /* upnp */ \
XX(20, MSEARCH, M-SEARCH) \ XX(24, MSEARCH, M-SEARCH) \
XX(21, NOTIFY, NOTIFY) \ XX(25, NOTIFY, NOTIFY) \
XX(22, SUBSCRIBE, SUBSCRIBE) \ XX(26, SUBSCRIBE, SUBSCRIBE) \
XX(23, UNSUBSCRIBE, UNSUBSCRIBE) \ XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \
/* RFC-5789 */ \ /* RFC-5789 */ \
XX(24, PATCH, PATCH) \ XX(28, PATCH, PATCH) \
XX(25, PURGE, PURGE) \ XX(29, PURGE, PURGE) \
/* CalDAV */ \ /* CalDAV */ \
XX(26, MKCALENDAR, MKCALENDAR) \ XX(30, MKCALENDAR, MKCALENDAR) \
/* RFC-2068, section 19.6.1.2 */ \
XX(31, LINK, LINK) \
XX(32, UNLINK, UNLINK) \
enum http_method enum http_method
{ {
@ -140,6 +148,7 @@ enum flags
, F_TRAILING = 1 << 4 , F_TRAILING = 1 << 4
, F_UPGRADE = 1 << 5 , F_UPGRADE = 1 << 5
, F_SKIPBODY = 1 << 6 , F_SKIPBODY = 1 << 6
, F_CONTENTLENGTH = 1 << 7
}; };
@ -160,6 +169,8 @@ enum flags
XX(CB_body, "the on_body callback failed") \ XX(CB_body, "the on_body callback failed") \
XX(CB_message_complete, "the on_message_complete callback failed") \ XX(CB_message_complete, "the on_message_complete callback failed") \
XX(CB_status, "the on_status callback failed") \ XX(CB_status, "the on_status callback failed") \
XX(CB_chunk_header, "the on_chunk_header callback failed") \
XX(CB_chunk_complete, "the on_chunk_complete callback failed") \
\ \
/* Parsing-related errors */ \ /* Parsing-related errors */ \
XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \ XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \
@ -180,6 +191,8 @@ enum flags
XX(INVALID_HEADER_TOKEN, "invalid character in header") \ XX(INVALID_HEADER_TOKEN, "invalid character in header") \
XX(INVALID_CONTENT_LENGTH, \ XX(INVALID_CONTENT_LENGTH, \
"invalid character in content-length header") \ "invalid character in content-length header") \
XX(UNEXPECTED_CONTENT_LENGTH, \
"unexpected content-length header") \
XX(INVALID_CHUNK_SIZE, \ XX(INVALID_CHUNK_SIZE, \
"invalid character in chunk size header") \ "invalid character in chunk size header") \
XX(INVALID_CONSTANT, "invalid constant string") \ XX(INVALID_CONSTANT, "invalid constant string") \
@ -204,10 +217,11 @@ enum http_errno {
struct http_parser { struct http_parser {
/** PRIVATE **/ /** PRIVATE **/
unsigned int type : 2; /* enum http_parser_type */ unsigned int type : 2; /* enum http_parser_type */
unsigned int flags : 7; /* F_* values from 'flags' enum; semi-public */ unsigned int flags : 8; /* F_* values from 'flags' enum; semi-public */
unsigned int state : 7; /* enum state from http_parser.c */ unsigned int state : 7; /* enum state from http_parser.c */
unsigned int header_state : 8; /* enum header_state from http_parser.c */ unsigned int header_state : 7; /* enum header_state from http_parser.c */
unsigned int index : 8; /* index into current matcher */ unsigned int index : 7; /* index into current matcher */
unsigned int lenient_http_headers : 1;
uint32_t nread; /* # bytes read in various scenarios */ uint32_t nread; /* # bytes read in various scenarios */
uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */ uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
@ -240,6 +254,11 @@ struct http_parser_settings {
http_cb on_headers_complete; http_cb on_headers_complete;
http_data_cb on_body; http_data_cb on_body;
http_cb on_message_complete; http_cb on_message_complete;
/* When on_chunk_header is called, the current chunk length is stored
* in parser->content_length.
*/
http_cb on_chunk_header;
http_cb on_chunk_complete;
}; };
@ -318,6 +337,9 @@ const char *http_errno_name(enum http_errno err);
/* Return a string description of the given error */ /* Return a string description of the given error */
const char *http_errno_description(enum http_errno err); const char *http_errno_description(enum http_errno err);
/* Initialize all http_parser_url members to 0 */
void http_parser_url_init(struct http_parser_url *u);
/* Parse a URL; return nonzero on failure */ /* Parse a URL; return nonzero on failure */
int http_parser_parse_url(const char *buf, size_t buflen, int http_parser_parse_url(const char *buf, size_t buflen,
int is_connect, int is_connect,