diff --git a/http_server.c b/http_server.c index 277acdf0..ff58d585 100644 --- a/http_server.c +++ b/http_server.c @@ -172,151 +172,6 @@ void http_request_finalise(struct http_request *r) r->phase = DONE; } -#define _BASE64 (1 << 6) -#define _MASK64 ((1 << 6) - 1) -#define _SEP (1 << 7) -#define _BND (1 << 8) - -uint16_t http_ctype[256] = { - ['A'] = _BND | _BASE64 | 0, - ['B'] = _BND | _BASE64 | 1, - ['C'] = _BND | _BASE64 | 2, - ['D'] = _BND | _BASE64 | 3, - ['E'] = _BND | _BASE64 | 4, - ['F'] = _BND | _BASE64 | 5, - ['G'] = _BND | _BASE64 | 6, - ['H'] = _BND | _BASE64 | 7, - ['I'] = _BND | _BASE64 | 8, - ['J'] = _BND | _BASE64 | 9, - ['K'] = _BND | _BASE64 | 10, - ['L'] = _BND | _BASE64 | 11, - ['M'] = _BND | _BASE64 | 12, - ['N'] = _BND | _BASE64 | 13, - ['O'] = _BND | _BASE64 | 14, - ['P'] = _BND | _BASE64 | 15, - ['Q'] = _BND | _BASE64 | 16, - ['R'] = _BND | _BASE64 | 17, - ['S'] = _BND | _BASE64 | 18, - ['T'] = _BND | _BASE64 | 19, - ['U'] = _BND | _BASE64 | 20, - ['V'] = _BND | _BASE64 | 21, - ['W'] = _BND | _BASE64 | 22, - ['X'] = _BND | _BASE64 | 23, - ['Y'] = _BND | _BASE64 | 24, - ['Z'] = _BND | _BASE64 | 25, - ['a'] = _BND | _BASE64 | 26, - ['b'] = _BND | _BASE64 | 27, - ['c'] = _BND | _BASE64 | 28, - ['d'] = _BND | _BASE64 | 29, - ['e'] = _BND | _BASE64 | 30, - ['f'] = _BND | _BASE64 | 31, - ['g'] = _BND | _BASE64 | 32, - ['h'] = _BND | _BASE64 | 33, - ['i'] = _BND | _BASE64 | 34, - ['j'] = _BND | _BASE64 | 35, - ['k'] = _BND | _BASE64 | 36, - ['l'] = _BND | _BASE64 | 37, - ['m'] = _BND | _BASE64 | 38, - ['n'] = _BND | _BASE64 | 39, - ['o'] = _BND | _BASE64 | 40, - ['p'] = _BND | _BASE64 | 41, - ['q'] = _BND | _BASE64 | 42, - ['r'] = _BND | _BASE64 | 43, - ['s'] = _BND | _BASE64 | 44, - ['t'] = _BND | _BASE64 | 45, - ['u'] = _BND | _BASE64 | 46, - ['v'] = _BND | _BASE64 | 47, - ['w'] = _BND | _BASE64 | 48, - ['x'] = _BND | _BASE64 | 49, - ['y'] = _BND | _BASE64 | 50, - ['z'] = _BND | _BASE64 | 51, - ['0'] = _BND | _BASE64 | 52, - ['1'] = _BND | _BASE64 | 53, - ['2'] = _BND | _BASE64 | 54, - ['3'] = _BND | _BASE64 | 55, - ['4'] = _BND | _BASE64 | 56, - ['5'] = _BND | _BASE64 | 57, - ['6'] = _BND | _BASE64 | 58, - ['7'] = _BND | _BASE64 | 59, - ['8'] = _BND | _BASE64 | 60, - ['9'] = _BND | _BASE64 | 61, - ['+'] = _BND | _BASE64 | 62, - ['/'] = _BND | _BASE64 | 63, - ['='] = _SEP | _BND, - ['-'] = _BND, - ['.'] = _BND, - [':'] = _BND, - ['_'] = _BND, - ['('] = _SEP | _BND, - [')'] = _SEP | _BND, - [','] = _SEP | _BND, - ['?'] = _SEP | _BND, - [' '] = _SEP | _BND, - ['\t'] = _SEP, - ['<'] = _SEP, - ['>'] = _SEP, - ['@'] = _SEP, - [';'] = _SEP, - [':'] = _SEP, - ['\\'] = _SEP, - ['"'] = _SEP, - ['/'] = _SEP, - ['['] = _SEP, - [']'] = _SEP, - ['{'] = _SEP, - ['}'] = _SEP, -}; - -inline int is_http_char(char c) -{ - return c >= 0; -} - -inline int is_http_ctl(char c) -{ - return iscntrl(c); -} - -inline int is_base64_digit(char c) -{ - return (http_ctype[(unsigned char) c] & _BASE64) != 0; -} - -inline int is_base64_pad(char c) -{ - return c == '='; -} - -inline uint8_t base64_digit(char c) -{ - return http_ctype[(unsigned char) c] & _MASK64; -} - -inline int is_http_separator(char c) -{ - return (http_ctype[(unsigned char) c] & _SEP) != 0; -} - -inline int is_http_boundary(char c) -{ - return (http_ctype[(unsigned char) c] & _BND) != 0; -} - -inline int is_http_token(char c) -{ - return is_http_char(c) && !is_http_ctl(c) && !is_http_separator(c); -} - -inline int is_valid_http_boundary_string(const char *s) -{ - if (s[0] == '\0') - return 0; - for (; *s; ++s) - if (!is_http_boundary(*s)) - return 0; - return s[-1] != ' '; -} - struct substring { const char *start; const char *end; diff --git a/str.c b/str.c index d5af0349..c968a2de 100644 --- a/str.c +++ b/str.c @@ -43,12 +43,6 @@ char *tohex(char *dstHex, size_t dstStrLen, const unsigned char *srcBinary) return dstHex; } -/* Convert nbinary*2 ASCII hex characters [0-9A-Fa-f] to nbinary bytes of data. Can be used to - * perform the conversion in-place, eg, fromhex(buf, (char*)buf, n); Returns -1 if a non-hex-digit - * character is encountered, otherwise returns the number of binary bytes produced (= nbinary). - * - * @author Andrew Bettison - */ size_t fromhex(unsigned char *dstBinary, const char *srcHex, size_t nbinary) { if (strn_fromhex(dstBinary, nbinary, srcHex, NULL) == nbinary) @@ -56,13 +50,6 @@ size_t fromhex(unsigned char *dstBinary, const char *srcHex, size_t nbinary) return -1; } -/* Convert nbinary*2 ASCII hex characters [0-9A-Fa-f] followed by a nul '\0' character to nbinary - * bytes of data. Can be used to perform the conversion in-place, eg, fromhex(buf, (char*)buf, n); - * Returns -1 if a non-hex-digit character is encountered or the character immediately following the - * last hex digit is not a nul, otherwise returns zero. - * - * @author Andrew Bettison - */ int fromhexstr(unsigned char *dstBinary, const char *srcHex, size_t nbinary) { const char *p; @@ -71,21 +58,6 @@ int fromhexstr(unsigned char *dstBinary, const char *srcHex, size_t nbinary) return -1; } -/* Decode pairs of ASCII hex characters [0-9A-Fa-f] into binary data with an optional upper limit on - * the number of binary bytes produced (destination buffer size). Returns the number of binary - * bytes decoded. If 'afterHex' is not NULL, then sets *afterHex to point to the source character - * immediately following the last hex digit consumed. - * - * Can be used to perform a conversion in-place, eg: - * - * strn_fromhex((unsigned char *)buf, n, (const char *)buf, NULL); - * - * Can also be used to count hex digits without converting, eg: - * - * strn_fromhex(NULL, -1, buf, NULL); - * - * @author Andrew Bettison - */ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstlen, const char *srcHex, const char **afterHex) { unsigned char *dstorig = dstBinary; @@ -107,6 +79,132 @@ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstlen, const char *srcHex return dstBinary - dstorig; } +#define _B64 _SERVAL_CTYPE_0_BASE64 +#define _BND _SERVAL_CTYPE_0_MULTIPART_BOUNDARY + +uint8_t _serval_ctype_0[UINT8_MAX] = { + ['A'] = _BND | _B64 | 0, + ['B'] = _BND | _B64 | 1, + ['C'] = _BND | _B64 | 2, + ['D'] = _BND | _B64 | 3, + ['E'] = _BND | _B64 | 4, + ['F'] = _BND | _B64 | 5, + ['G'] = _BND | _B64 | 6, + ['H'] = _BND | _B64 | 7, + ['I'] = _BND | _B64 | 8, + ['J'] = _BND | _B64 | 9, + ['K'] = _BND | _B64 | 10, + ['L'] = _BND | _B64 | 11, + ['M'] = _BND | _B64 | 12, + ['N'] = _BND | _B64 | 13, + ['O'] = _BND | _B64 | 14, + ['P'] = _BND | _B64 | 15, + ['Q'] = _BND | _B64 | 16, + ['R'] = _BND | _B64 | 17, + ['S'] = _BND | _B64 | 18, + ['T'] = _BND | _B64 | 19, + ['U'] = _BND | _B64 | 20, + ['V'] = _BND | _B64 | 21, + ['W'] = _BND | _B64 | 22, + ['X'] = _BND | _B64 | 23, + ['Y'] = _BND | _B64 | 24, + ['Z'] = _BND | _B64 | 25, + ['a'] = _BND | _B64 | 26, + ['b'] = _BND | _B64 | 27, + ['c'] = _BND | _B64 | 28, + ['d'] = _BND | _B64 | 29, + ['e'] = _BND | _B64 | 30, + ['f'] = _BND | _B64 | 31, + ['g'] = _BND | _B64 | 32, + ['h'] = _BND | _B64 | 33, + ['i'] = _BND | _B64 | 34, + ['j'] = _BND | _B64 | 35, + ['k'] = _BND | _B64 | 36, + ['l'] = _BND | _B64 | 37, + ['m'] = _BND | _B64 | 38, + ['n'] = _BND | _B64 | 39, + ['o'] = _BND | _B64 | 40, + ['p'] = _BND | _B64 | 41, + ['q'] = _BND | _B64 | 42, + ['r'] = _BND | _B64 | 43, + ['s'] = _BND | _B64 | 44, + ['t'] = _BND | _B64 | 45, + ['u'] = _BND | _B64 | 46, + ['v'] = _BND | _B64 | 47, + ['w'] = _BND | _B64 | 48, + ['x'] = _BND | _B64 | 49, + ['y'] = _BND | _B64 | 50, + ['z'] = _BND | _B64 | 51, + ['0'] = _BND | _B64 | 52, + ['1'] = _BND | _B64 | 53, + ['2'] = _BND | _B64 | 54, + ['3'] = _BND | _B64 | 55, + ['4'] = _BND | _B64 | 56, + ['5'] = _BND | _B64 | 57, + ['6'] = _BND | _B64 | 58, + ['7'] = _BND | _B64 | 59, + ['8'] = _BND | _B64 | 60, + ['9'] = _BND | _B64 | 61, + ['+'] = _BND | _B64 | 62, + ['/'] = _BND | _B64 | 63, + ['='] = _BND, + ['-'] = _BND, + ['.'] = _BND, + [':'] = _BND, + ['_'] = _BND, + ['('] = _BND, + [')'] = _BND, + [','] = _BND, + ['?'] = _BND, + [' '] = _BND, +}; + +#define _SEP _SERVAL_CTYPE_1_HTTP_SEPARATOR + +uint8_t _serval_ctype_1[UINT8_MAX] = { + ['A'] = 0xA, + ['B'] = 0xB, + ['C'] = 0xC, + ['D'] = 0xD, + ['E'] = 0xE, + ['F'] = 0xF, + ['a'] = 0xa, + ['b'] = 0xb, + ['c'] = 0xc, + ['d'] = 0xd, + ['e'] = 0xe, + ['f'] = 0xf, + ['0'] = 0, + ['1'] = 1, + ['2'] = 2, + ['3'] = 3, + ['4'] = 4, + ['5'] = 5, + ['6'] = 6, + ['7'] = 7, + ['8'] = 8, + ['9'] = 9, + ['\t'] = _SEP, + [' '] = _SEP, + ['='] = _SEP, + ['<'] = _SEP, + ['>'] = _SEP, + ['@'] = _SEP, + [';'] = _SEP, + [':'] = _SEP, + ['\\'] = _SEP, + ['"'] = _SEP, + ['/'] = _SEP, + ['['] = _SEP, + [']'] = _SEP, + ['{'] = _SEP, + ['}'] = _SEP, + ['('] = _SEP, + [')'] = _SEP, + [','] = _SEP, + ['?'] = _SEP, +}; + /* Does this whole buffer contain the same value? */ int is_all_matching(const unsigned char *ptr, size_t len, unsigned char value) { diff --git a/str.h b/str.h index 026f6c05..c23b3af5 100644 --- a/str.h +++ b/str.h @@ -17,8 +17,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ -#ifndef __STR_H__ -#define __STR_H__ +#ifndef __SERVAL_DNA_STR_H__ +#define __SERVAL_DNA_STR_H__ #include #include @@ -26,20 +26,34 @@ #include #include -#ifndef __STR_INLINE +#ifndef __SERVAL_DNA_STR_INLINE # if __GNUC__ && !__GNUC_STDC_INLINE__ -# define __STR_INLINE extern inline +# define __SERVAL_DNA_STR_INLINE extern inline # else -# define __STR_INLINE inline +# define __SERVAL_DNA_STR_INLINE inline # endif #endif +/* -------------------- Useful functions and macros -------------------- */ + +#define alloca_strdup(str) strcpy(alloca(strlen(str) + 1), (str)) + +int is_all_matching(const unsigned char *ptr, size_t len, unsigned char value); + +char *str_toupper_inplace(char *s); +char *str_tolower_inplace(char *s); + +/* -------------------- Hexadecimal strings -------------------- */ + +extern const char hexdigit_upper[16]; +extern const char hexdigit_lower[16]; + /* Return true iff 'len' bytes starting at 'text' are hex digits, upper or lower case. * Does not check the following byte. * * @author Andrew Bettison */ -__STR_INLINE int is_xsubstring(const char *text, int len) +__SERVAL_DNA_STR_INLINE int is_xsubstring(const char *text, int len) { while (len--) if (!isxdigit(*text++)) @@ -52,7 +66,7 @@ __STR_INLINE int is_xsubstring(const char *text, int len) * * @author Andrew Bettison */ -__STR_INLINE int is_xstring(const char *text, int len) +__SERVAL_DNA_STR_INLINE int is_xstring(const char *text, int len) { while (len--) if (!isxdigit(*text++)) @@ -60,44 +74,112 @@ __STR_INLINE int is_xstring(const char *text, int len) return *text == '\0'; } -extern const char hexdigit_upper[16]; -extern const char hexdigit_lower[16]; +/* Converts a given binary blob to uppercase ASCII hexadecimal. + */ char *tohex(char *dstHex, size_t dstStrlen, const unsigned char *srcBinary); -size_t fromhex(unsigned char *dstBinary, const char *srcHex, size_t nbinary); -int fromhexstr(unsigned char *dstBinary, const char *srcHex, size_t nbinary); -size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstlen, const char *src, const char **afterp); - #define alloca_tohex(buf,bytes) tohex((char *)alloca((bytes)*2+1), (bytes) * 2, (buf)) -#define alloca_strdup(str) strcpy(alloca(strlen(str) + 1), (str)) +/* Convert nbinary*2 ASCII hex characters [0-9A-Fa-f] to nbinary bytes of data. Can be used to + * perform the conversion in-place, eg, fromhex(buf, (char*)buf, n); Returns -1 if a non-hex-digit + * character is encountered, otherwise returns the number of binary bytes produced (= nbinary). + * Does not insist that the last hex digit is followed by a NUL or any particular character. + * + * @author Andrew Bettison + */ +size_t fromhex(unsigned char *dstBinary, const char *srcHex, size_t nbinary); -__STR_INLINE int hexvalue(char c) -{ - switch (c) { - case '0': return 0; - case '1': return 1; - case '2': return 2; - case '3': return 3; - case '4': return 4; - case '5': return 5; - case '6': return 6; - case '7': return 7; - case '8': return 8; - case '9': return 9; - case 'a': case 'A': return 10; - case 'b': case 'B': return 11; - case 'c': case 'C': return 12; - case 'd': case 'D': return 13; - case 'e': case 'E': return 14; - case 'f': case 'F': return 15; - } - return -1; +/* Convert nbinary*2 ASCII hex characters [0-9A-Fa-f] followed by a NUL '\0' character to nbinary + * bytes of data. Can be used to perform the conversion in-place, eg, fromhex(buf, (char*)buf, n); + * Returns -1 if a non-hex-digit character is encountered or the character immediately following the + * last hex digit is not a NUL, otherwise returns zero. + * + * @author Andrew Bettison + */ +int fromhexstr(unsigned char *dstBinary, const char *srcHex, size_t nbinary); + +/* Decode pairs of ASCII hex characters [0-9A-Fa-f] into binary data with an optional upper limit on + * the number of binary bytes produced (destination buffer size). Returns the number of binary + * bytes decoded. If 'afterHex' is not NULL, then sets *afterHex to point to the source character + * immediately following the last hex digit consumed. + * + * Can be used to perform a conversion in-place, eg: + * + * strn_fromhex((unsigned char *)buf, n, (const char *)buf, NULL); + * + * Can also be used to count hex digits without converting, eg: + * + * strn_fromhex(NULL, -1, buf, NULL); + * + * The fromhex() and fromhexstr() functions are both implemented using strn_fromhex(). + * + * @author Andrew Bettison + */ +size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstlen, const char *src, const char **afterp); + +/* -------------------- Character classes -------------------- */ + +#define _SERVAL_CTYPE_0_BASE64_MASK 0x3f +#define _SERVAL_CTYPE_0_BASE64 (1 << 6) +#define _SERVAL_CTYPE_0_MULTIPART_BOUNDARY (1 << 7) + +#define _SERVAL_CTYPE_1_HEX_MASK 0xf +#define _SERVAL_CTYPE_1_HTTP_SEPARATOR (1 << 4) + +extern uint8_t _serval_ctype_0[UINT8_MAX]; +extern uint8_t _serval_ctype_1[UINT8_MAX]; + +__SERVAL_DNA_STR_INLINE int is_http_char(char c) { + return isascii(c); } -int is_all_matching(const unsigned char *ptr, size_t len, unsigned char value); +__SERVAL_DNA_STR_INLINE int is_http_ctl(char c) { + return iscntrl(c); +} -char *str_toupper_inplace(char *s); -char *str_tolower_inplace(char *s); +__SERVAL_DNA_STR_INLINE int is_base64_digit(char c) { + return (_serval_ctype_0[(unsigned char) c] & _SERVAL_CTYPE_0_BASE64) != 0; +} + +__SERVAL_DNA_STR_INLINE int is_base64_pad(char c) { + return c == '='; +} + +__SERVAL_DNA_STR_INLINE uint8_t base64_digit(char c) { + return _serval_ctype_0[(unsigned char) c] & _SERVAL_CTYPE_0_BASE64_MASK; +} + +__SERVAL_DNA_STR_INLINE int is_multipart_boundary(char c) { + return (_serval_ctype_0[(unsigned char) c] & _SERVAL_CTYPE_0_MULTIPART_BOUNDARY) != 0; +} + +__SERVAL_DNA_STR_INLINE int is_valid_multipart_boundary_string(const char *s) +{ + if (s[0] == '\0') + return 0; + for (; *s; ++s) + if (!is_multipart_boundary(*s)) + return 0; + return s[-1] != ' '; +} + +__SERVAL_DNA_STR_INLINE int is_http_separator(char c) { + return (_serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_HTTP_SEPARATOR) != 0; +} + +__SERVAL_DNA_STR_INLINE int is_http_token(char c) { + return is_http_char(c) && !is_http_ctl(c) && !is_http_separator(c); +} + +/* Convert the given ASCII hex digit character into its radix value, eg, '0' -> + * 0, 'b' -> 11. If the argument is not an ASCII hex digit, returns -1. + * + * @author Andrew Bettison + */ +__SERVAL_DNA_STR_INLINE int hexvalue(char c) { + return isxdigit(c) ? _serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_HEX_MASK : -1; +} + +/* -------------------- Printable string representation -------------------- */ char *toprint(char *dstStr, ssize_t dstBufSiz, const char *srcBuf, size_t srcBytes, const char quotes[2]); char *toprint_str(char *dstStr, ssize_t dstBufSiz, const char *srcStr, const char quotes[2]); @@ -111,6 +193,8 @@ size_t strn_fromprint(unsigned char *dst, size_t dstsiz, const char *src, size_t #define alloca_str_toprint_quoted(str, quotes) toprint_str((char *)alloca(toprint_str_len((str), (quotes)) + 1), -1, (str), (quotes)) #define alloca_str_toprint(str) alloca_str_toprint_quoted(str, "``") +/* -------------------- Useful string primitives -------------------- */ + /* Like strchr(3), but only looks for 'c' in the first 'n' characters of 's', stopping at the first * nul char in 's'. * @@ -125,24 +209,24 @@ const char *strnchr(const char *s, size_t n, char c); * @author Andrew Bettison */ -__STR_INLINE ssize_t str_index_dfl(const char *s, char c, ssize_t dfl) +__SERVAL_DNA_STR_INLINE ssize_t str_index_dfl(const char *s, char c, ssize_t dfl) { const char *r = strchr(s, c); return r ? r - s : dfl; } -__STR_INLINE ssize_t str_rindex_dfl(const char *s, char c, ssize_t dfl) +__SERVAL_DNA_STR_INLINE ssize_t str_rindex_dfl(const char *s, char c, ssize_t dfl) { const char *r = strrchr(s, c); return r ? r - s : dfl; } -__STR_INLINE ssize_t str_index(const char *s, char c) +__SERVAL_DNA_STR_INLINE ssize_t str_index(const char *s, char c) { return str_index_dfl(s, c, -1); } -__STR_INLINE ssize_t str_rindex(const char *s, char c) +__SERVAL_DNA_STR_INLINE ssize_t str_rindex(const char *s, char c) { return str_rindex_dfl(s, c, -1); } @@ -270,17 +354,17 @@ int str_to_uint64_interval_ms(const char *str, int64_t *result, const char **aft */ int str_is_uri(const char *uri); -__STR_INLINE int is_uri_char_scheme(char c) +__SERVAL_DNA_STR_INLINE int is_uri_char_scheme(char c) { return isalpha(c) || isdigit(c) || c == '+' || c == '-' || c == '.'; } -__STR_INLINE int is_uri_char_unreserved(char c) +__SERVAL_DNA_STR_INLINE int is_uri_char_unreserved(char c) { return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~'; } -__STR_INLINE int is_uri_char_reserved(char c) +__SERVAL_DNA_STR_INLINE int is_uri_char_reserved(char c) { switch (c) { case ':': case '/': case '?': case '#': case '[': case ']': case '@': @@ -296,7 +380,7 @@ __STR_INLINE int is_uri_char_reserved(char c) * * @author Andrew Bettison */ -__STR_INLINE int str_is_uri_scheme(const char *scheme) +__SERVAL_DNA_STR_INLINE int str_is_uri_scheme(const char *scheme) { if (!isalpha(*scheme++)) return 0; @@ -362,4 +446,4 @@ int str_uri_authority_port(const char *auth, uint16_t *portp); int parse_argv(char *cmdline, char delim, char **argv, int max_argv); -#endif +#endif // __SERVAL_DNA_STR_H__