Refactor URI character class functions

To use array lookup instead of boolean OR expressions
This commit is contained in:
Andrew Bettison 2013-11-15 11:19:51 +10:30
parent 9418f9d65d
commit 4fd94783d3
2 changed files with 97 additions and 49 deletions

122
str.c
View File

@ -159,50 +159,104 @@ uint8_t _serval_ctype_0[UINT8_MAX] = {
[' '] = _BND,
};
#define _SEP _SERVAL_CTYPE_1_HTTP_SEPARATOR
#define _SEP _SERVAL_CTYPE_1_HTTP_SEPARATOR
#define _URI_SCHEME _SERVAL_CTYPE_1_URI_SCHEME
#define _URI_UNRES _SERVAL_CTYPE_1_URI_UNRESERVED
#define _URI_RES _SERVAL_CTYPE_1_URI_RESERVED
uint8_t _serval_ctype_1[UINT8_MAX] = {
['A'] = 0xA,
['B'] = 0xB,
['C'] = 0xC,
['D'] = 0xD,
['E'] = 0xE,
['F'] = 0xF,
['a'] = 0xa,
['b'] = 0xb,
['c'] = 0xc,
['d'] = 0xd,
['e'] = 0xe,
['f'] = 0xf,
['0'] = 0,
['1'] = 1,
['2'] = 2,
['3'] = 3,
['4'] = 4,
['5'] = 5,
['6'] = 6,
['7'] = 7,
['8'] = 8,
['9'] = 9,
['A'] = _URI_SCHEME | _URI_UNRES | 0xA,
['B'] = _URI_SCHEME | _URI_UNRES | 0xB,
['C'] = _URI_SCHEME | _URI_UNRES | 0xC,
['D'] = _URI_SCHEME | _URI_UNRES | 0xD,
['E'] = _URI_SCHEME | _URI_UNRES | 0xE,
['F'] = _URI_SCHEME | _URI_UNRES | 0xF,
['G'] = _URI_SCHEME | _URI_UNRES,
['H'] = _URI_SCHEME | _URI_UNRES,
['I'] = _URI_SCHEME | _URI_UNRES,
['J'] = _URI_SCHEME | _URI_UNRES,
['K'] = _URI_SCHEME | _URI_UNRES,
['L'] = _URI_SCHEME | _URI_UNRES,
['M'] = _URI_SCHEME | _URI_UNRES,
['N'] = _URI_SCHEME | _URI_UNRES,
['O'] = _URI_SCHEME | _URI_UNRES,
['P'] = _URI_SCHEME | _URI_UNRES,
['Q'] = _URI_SCHEME | _URI_UNRES,
['R'] = _URI_SCHEME | _URI_UNRES,
['S'] = _URI_SCHEME | _URI_UNRES,
['T'] = _URI_SCHEME | _URI_UNRES,
['U'] = _URI_SCHEME | _URI_UNRES,
['V'] = _URI_SCHEME | _URI_UNRES,
['W'] = _URI_SCHEME | _URI_UNRES,
['X'] = _URI_SCHEME | _URI_UNRES,
['Y'] = _URI_SCHEME | _URI_UNRES,
['Z'] = _URI_SCHEME | _URI_UNRES,
['a'] = _URI_SCHEME | _URI_UNRES | 0xa,
['b'] = _URI_SCHEME | _URI_UNRES | 0xb,
['c'] = _URI_SCHEME | _URI_UNRES | 0xc,
['d'] = _URI_SCHEME | _URI_UNRES | 0xd,
['e'] = _URI_SCHEME | _URI_UNRES | 0xe,
['f'] = _URI_SCHEME | _URI_UNRES | 0xf,
['g'] = _URI_SCHEME | _URI_UNRES,
['h'] = _URI_SCHEME | _URI_UNRES,
['i'] = _URI_SCHEME | _URI_UNRES,
['j'] = _URI_SCHEME | _URI_UNRES,
['k'] = _URI_SCHEME | _URI_UNRES,
['l'] = _URI_SCHEME | _URI_UNRES,
['m'] = _URI_SCHEME | _URI_UNRES,
['n'] = _URI_SCHEME | _URI_UNRES,
['o'] = _URI_SCHEME | _URI_UNRES,
['p'] = _URI_SCHEME | _URI_UNRES,
['q'] = _URI_SCHEME | _URI_UNRES,
['r'] = _URI_SCHEME | _URI_UNRES,
['s'] = _URI_SCHEME | _URI_UNRES,
['t'] = _URI_SCHEME | _URI_UNRES,
['u'] = _URI_SCHEME | _URI_UNRES,
['v'] = _URI_SCHEME | _URI_UNRES,
['w'] = _URI_SCHEME | _URI_UNRES,
['x'] = _URI_SCHEME | _URI_UNRES,
['y'] = _URI_SCHEME | _URI_UNRES,
['z'] = _URI_SCHEME | _URI_UNRES,
['0'] = _URI_SCHEME | _URI_UNRES | 0,
['1'] = _URI_SCHEME | _URI_UNRES | 1,
['2'] = _URI_SCHEME | _URI_UNRES | 2,
['3'] = _URI_SCHEME | _URI_UNRES | 3,
['4'] = _URI_SCHEME | _URI_UNRES | 4,
['5'] = _URI_SCHEME | _URI_UNRES | 5,
['6'] = _URI_SCHEME | _URI_UNRES | 6,
['7'] = _URI_SCHEME | _URI_UNRES | 7,
['8'] = _URI_SCHEME | _URI_UNRES | 8,
['9'] = _URI_SCHEME | _URI_UNRES | 9,
['\t'] = _SEP,
[' '] = _SEP,
['='] = _SEP,
['_'] = _URI_UNRES,
['='] = _SEP | _URI_RES,
['<'] = _SEP,
['>'] = _SEP,
['@'] = _SEP,
[';'] = _SEP,
[':'] = _SEP,
[';'] = _SEP | _URI_RES,
[':'] = _SEP | _URI_RES,
['\\'] = _SEP,
['\''] = _URI_RES,
['"'] = _SEP,
['/'] = _SEP,
['['] = _SEP,
[']'] = _SEP,
['/'] = _SEP | _URI_RES,
['['] = _SEP | _URI_RES,
[']'] = _SEP | _URI_RES,
['{'] = _SEP,
['}'] = _SEP,
['('] = _SEP,
[')'] = _SEP,
[','] = _SEP,
['?'] = _SEP,
['('] = _SEP | _URI_RES,
[')'] = _SEP | _URI_RES,
[','] = _SEP | _URI_RES,
['.'] = _URI_SCHEME | _URI_UNRES,
['?'] = _SEP | _URI_RES,
['!'] = _URI_RES,
['+'] = _URI_SCHEME | _URI_RES,
['-'] = _URI_SCHEME | _URI_UNRES,
['*'] = _URI_RES,
['$'] = _URI_RES,
['&'] = _URI_RES,
['#'] = _URI_RES,
['@'] = _SEP | _URI_RES,
['~'] = _URI_UNRES,
};
/* Does this whole buffer contain the same value? */

24
str.h
View File

@ -124,6 +124,9 @@ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstlen, const char *src, c
#define _SERVAL_CTYPE_1_HEX_MASK 0xf
#define _SERVAL_CTYPE_1_HTTP_SEPARATOR (1 << 4)
#define _SERVAL_CTYPE_1_URI_SCHEME (1 << 5)
#define _SERVAL_CTYPE_1_URI_UNRESERVED (1 << 6)
#define _SERVAL_CTYPE_1_URI_RESERVED (1 << 7)
extern uint8_t _serval_ctype_0[UINT8_MAX];
extern uint8_t _serval_ctype_1[UINT8_MAX];
@ -354,25 +357,16 @@ int str_to_uint64_interval_ms(const char *str, int64_t *result, const char **aft
*/
int str_is_uri(const char *uri);
__SERVAL_DNA_STR_INLINE int is_uri_char_scheme(char c)
{
return isalpha(c) || isdigit(c) || c == '+' || c == '-' || c == '.';
__SERVAL_DNA_STR_INLINE int is_uri_char_scheme(char c) {
return (_serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_URI_SCHEME) != 0;
}
__SERVAL_DNA_STR_INLINE int is_uri_char_unreserved(char c)
{
return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~';
__SERVAL_DNA_STR_INLINE int is_uri_char_unreserved(char c) {
return (_serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_URI_UNRESERVED) != 0;
}
__SERVAL_DNA_STR_INLINE int is_uri_char_reserved(char c)
{
switch (c) {
case ':': case '/': case '?': case '#': case '[': case ']': case '@':
case '!': case '$': case '&': case '\'': case '(': case ')':
case '*': case '+': case ',': case ';': case '=':
return 1;
}
return 0;
__SERVAL_DNA_STR_INLINE int is_uri_char_reserved(char c) {
return (_serval_ctype_1[(unsigned char) c] & _SERVAL_CTYPE_1_URI_RESERVED) != 0;
}
/* Return true if the string resembles a URI scheme without the terminating colon.