Refactor URI character class functions

To use array lookup instead of boolean OR expressions
This commit is contained in:
Andrew Bettison 2013-11-15 11:19:51 +10:30
parent 9418f9d65d
commit 4fd94783d3
2 changed files with 97 additions and 49 deletions

122
str.c
View File

@ -159,50 +159,104 @@ uint8_t _serval_ctype_0[UINT8_MAX] = {
[' '] = _BND, [' '] = _BND,
}; };
#define _SEP _SERVAL_CTYPE_1_HTTP_SEPARATOR #define _SEP _SERVAL_CTYPE_1_HTTP_SEPARATOR
#define _URI_SCHEME _SERVAL_CTYPE_1_URI_SCHEME
#define _URI_UNRES _SERVAL_CTYPE_1_URI_UNRESERVED
#define _URI_RES _SERVAL_CTYPE_1_URI_RESERVED
uint8_t _serval_ctype_1[UINT8_MAX] = { uint8_t _serval_ctype_1[UINT8_MAX] = {
['A'] = 0xA, ['A'] = _URI_SCHEME | _URI_UNRES | 0xA,
['B'] = 0xB, ['B'] = _URI_SCHEME | _URI_UNRES | 0xB,
['C'] = 0xC, ['C'] = _URI_SCHEME | _URI_UNRES | 0xC,
['D'] = 0xD, ['D'] = _URI_SCHEME | _URI_UNRES | 0xD,
['E'] = 0xE, ['E'] = _URI_SCHEME | _URI_UNRES | 0xE,
['F'] = 0xF, ['F'] = _URI_SCHEME | _URI_UNRES | 0xF,
['a'] = 0xa, ['G'] = _URI_SCHEME | _URI_UNRES,
['b'] = 0xb, ['H'] = _URI_SCHEME | _URI_UNRES,
['c'] = 0xc, ['I'] = _URI_SCHEME | _URI_UNRES,
['d'] = 0xd, ['J'] = _URI_SCHEME | _URI_UNRES,
['e'] = 0xe, ['K'] = _URI_SCHEME | _URI_UNRES,
['f'] = 0xf, ['L'] = _URI_SCHEME | _URI_UNRES,
['0'] = 0, ['M'] = _URI_SCHEME | _URI_UNRES,
['1'] = 1, ['N'] = _URI_SCHEME | _URI_UNRES,
['2'] = 2, ['O'] = _URI_SCHEME | _URI_UNRES,
['3'] = 3, ['P'] = _URI_SCHEME | _URI_UNRES,
['4'] = 4, ['Q'] = _URI_SCHEME | _URI_UNRES,
['5'] = 5, ['R'] = _URI_SCHEME | _URI_UNRES,
['6'] = 6, ['S'] = _URI_SCHEME | _URI_UNRES,
['7'] = 7, ['T'] = _URI_SCHEME | _URI_UNRES,
['8'] = 8, ['U'] = _URI_SCHEME | _URI_UNRES,
['9'] = 9, ['V'] = _URI_SCHEME | _URI_UNRES,
['W'] = _URI_SCHEME | _URI_UNRES,
['X'] = _URI_SCHEME | _URI_UNRES,
['Y'] = _URI_SCHEME | _URI_UNRES,
['Z'] = _URI_SCHEME | _URI_UNRES,
['a'] = _URI_SCHEME | _URI_UNRES | 0xa,
['b'] = _URI_SCHEME | _URI_UNRES | 0xb,
['c'] = _URI_SCHEME | _URI_UNRES | 0xc,
['d'] = _URI_SCHEME | _URI_UNRES | 0xd,
['e'] = _URI_SCHEME | _URI_UNRES | 0xe,
['f'] = _URI_SCHEME | _URI_UNRES | 0xf,
['g'] = _URI_SCHEME | _URI_UNRES,
['h'] = _URI_SCHEME | _URI_UNRES,
['i'] = _URI_SCHEME | _URI_UNRES,
['j'] = _URI_SCHEME | _URI_UNRES,
['k'] = _URI_SCHEME | _URI_UNRES,
['l'] = _URI_SCHEME | _URI_UNRES,
['m'] = _URI_SCHEME | _URI_UNRES,
['n'] = _URI_SCHEME | _URI_UNRES,
['o'] = _URI_SCHEME | _URI_UNRES,
['p'] = _URI_SCHEME | _URI_UNRES,
['q'] = _URI_SCHEME | _URI_UNRES,
['r'] = _URI_SCHEME | _URI_UNRES,
['s'] = _URI_SCHEME | _URI_UNRES,
['t'] = _URI_SCHEME | _URI_UNRES,
['u'] = _URI_SCHEME | _URI_UNRES,
['v'] = _URI_SCHEME | _URI_UNRES,
['w'] = _URI_SCHEME | _URI_UNRES,
['x'] = _URI_SCHEME | _URI_UNRES,
['y'] = _URI_SCHEME | _URI_UNRES,
['z'] = _URI_SCHEME | _URI_UNRES,
['0'] = _URI_SCHEME | _URI_UNRES | 0,
['1'] = _URI_SCHEME | _URI_UNRES | 1,
['2'] = _URI_SCHEME | _URI_UNRES | 2,
['3'] = _URI_SCHEME | _URI_UNRES | 3,
['4'] = _URI_SCHEME | _URI_UNRES | 4,
['5'] = _URI_SCHEME | _URI_UNRES | 5,
['6'] = _URI_SCHEME | _URI_UNRES | 6,
['7'] = _URI_SCHEME | _URI_UNRES | 7,
['8'] = _URI_SCHEME | _URI_UNRES | 8,
['9'] = _URI_SCHEME | _URI_UNRES | 9,
['\t'] = _SEP, ['\t'] = _SEP,
[' '] = _SEP, [' '] = _SEP,
['='] = _SEP, ['_'] = _URI_UNRES,
['='] = _SEP | _URI_RES,
['<'] = _SEP, ['<'] = _SEP,
['>'] = _SEP, ['>'] = _SEP,
['@'] = _SEP, [';'] = _SEP | _URI_RES,
[';'] = _SEP, [':'] = _SEP | _URI_RES,
[':'] = _SEP,
['\\'] = _SEP, ['\\'] = _SEP,
['\''] = _URI_RES,
['"'] = _SEP, ['"'] = _SEP,
['/'] = _SEP, ['/'] = _SEP | _URI_RES,
['['] = _SEP, ['['] = _SEP | _URI_RES,
[']'] = _SEP, [']'] = _SEP | _URI_RES,
['{'] = _SEP, ['{'] = _SEP,
['}'] = _SEP, ['}'] = _SEP,
['('] = _SEP, ['('] = _SEP | _URI_RES,
[')'] = _SEP, [')'] = _SEP | _URI_RES,
[','] = _SEP, [','] = _SEP | _URI_RES,
['?'] = _SEP, ['.'] = _URI_SCHEME | _URI_UNRES,
['?'] = _SEP | _URI_RES,
['!'] = _URI_RES,
['+'] = _URI_SCHEME | _URI_RES,
['-'] = _URI_SCHEME | _URI_UNRES,
['*'] = _URI_RES,
['$'] = _URI_RES,
['&'] = _URI_RES,
['#'] = _URI_RES,
['@'] = _SEP | _URI_RES,
['~'] = _URI_UNRES,
}; };
/* Does this whole buffer contain the same value? */ /* Does this whole buffer contain the same value? */

24
str.h
View File

@ -124,6 +124,9 @@ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstlen, const char *src, c
#define _SERVAL_CTYPE_1_HEX_MASK 0xf #define _SERVAL_CTYPE_1_HEX_MASK 0xf
#define _SERVAL_CTYPE_1_HTTP_SEPARATOR (1 << 4) #define _SERVAL_CTYPE_1_HTTP_SEPARATOR (1 << 4)
#define _SERVAL_CTYPE_1_URI_SCHEME (1 << 5)
#define _SERVAL_CTYPE_1_URI_UNRESERVED (1 << 6)
#define _SERVAL_CTYPE_1_URI_RESERVED (1 << 7)
extern uint8_t _serval_ctype_0[UINT8_MAX]; extern uint8_t _serval_ctype_0[UINT8_MAX];
extern uint8_t _serval_ctype_1[UINT8_MAX]; extern uint8_t _serval_ctype_1[UINT8_MAX];
@ -354,25 +357,16 @@ int str_to_uint64_interval_ms(const char *str, int64_t *result, const char **aft
*/ */
int str_is_uri(const char *uri); int str_is_uri(const char *uri);
__SERVAL_DNA_STR_INLINE int is_uri_char_scheme(char c) __SERVAL_DNA_STR_INLINE int is_uri_char_scheme(char c) {
{ return (_serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_URI_SCHEME) != 0;
return isalpha(c) || isdigit(c) || c == '+' || c == '-' || c == '.';
} }
__SERVAL_DNA_STR_INLINE int is_uri_char_unreserved(char c) __SERVAL_DNA_STR_INLINE int is_uri_char_unreserved(char c) {
{ return (_serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_URI_UNRESERVED) != 0;
return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~';
} }
__SERVAL_DNA_STR_INLINE int is_uri_char_reserved(char c) __SERVAL_DNA_STR_INLINE int is_uri_char_reserved(char c) {
{ return (_serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_URI_RESERVED) != 0;
switch (c) {
case ':': case '/': case '?': case '#': case '[': case ']': case '@':
case '!': case '$': case '&': case '\'': case '(': case ')':
case '*': case '+': case ',': case ';': case '=':
return 1;
}
return 0;
} }
/* Return true if the string resembles a URI scheme without the terminating colon. /* Return true if the string resembles a URI scheme without the terminating colon.