From 960a6293e1e4d94ef2eb6322f2a104f572969b5a Mon Sep 17 00:00:00 2001 From: Andrew Bettison Date: Wed, 21 Nov 2012 18:08:06 +1030 Subject: [PATCH] Move str_is_uri() from dataformats.c to str.c Add lots of URI unpacking functions as well. --- dataformats.c | 43 --------- serval.h | 1 - str.c | 242 ++++++++++++++++++++++++++++++++++++++++++++++++++ str.h | 63 +++++++++++++ 4 files changed, 305 insertions(+), 44 deletions(-) diff --git a/dataformats.c b/dataformats.c index 436f4e4c..b6176c39 100644 --- a/dataformats.c +++ b/dataformats.c @@ -268,46 +268,3 @@ int safeZeroField(unsigned char *packet,int start,int count) return 0; } - -int is_uri_char_scheme(char c) -{ - return isalpha(c) || isdigit(c) || c == '+' || c == '-' || c == '.'; -} - -int is_uri_char_unreserved(char c) -{ - return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~'; -} - -int is_uri_char_reserved(char c) -{ - switch (c) { - case ':': case '/': case '?': case '#': case '[': case ']': case '@': - case '!': case '$': case '&': case '\'': case '(': case ')': - case '*': case '+': case ',': case ';': case '=': - return 1; - } - return 0; -} - -/* Return true if the string resembles a URI. - Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part. - @author Andrew Bettison - */ -int str_is_uri(const char *uri) -{ - const char *p = uri; - // Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - if (!isalpha(*p++)) - return 0; - while (is_uri_char_scheme(*p)) - ++p; - // Scheme is followed by colon ":". - if (*p++ != ':') - return 0; - // Hierarchical part must contain only valid characters. - const char *q = p; - while (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) - ++p; - return p != q && *p == '\0'; -} diff --git a/serval.h b/serval.h index 04487af1..da3a5f09 100644 --- a/serval.h +++ b/serval.h @@ -439,7 +439,6 @@ int str_is_subscriber_id(const char *sid); int strn_is_subscriber_id(const char *sid, size_t *lenp); int str_is_did(const char *did); int strn_is_did(const char *did, size_t *lenp); -int str_is_uri(const char *uri); int stowSid(unsigned char *packet, int ofs, const char *sid); void srandomdev(); diff --git a/str.c b/str.c index edd302eb..ab940e07 100644 --- a/str.c +++ b/str.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include char hexdigit[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; @@ -262,3 +264,243 @@ size_t str_fromprint(unsigned char *dst, const char *src) } return dst - odst; } + +int is_uri_char_scheme(char c) +{ + return isalpha(c) || isdigit(c) || c == '+' || c == '-' || c == '.'; +} + +int is_uri_char_unreserved(char c) +{ + return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~'; +} + +int is_uri_char_reserved(char c) +{ + switch (c) { + case ':': case '/': case '?': case '#': case '[': case ']': case '@': + case '!': case '$': case '&': case '\'': case '(': case ')': + case '*': case '+': case ',': case ';': case '=': + return 1; + } + return 0; +} + +/* Return true if the string resembles a URI. + * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part. + * + * @author Andrew Bettison + */ +int str_is_uri(const char *uri) +{ + const char *p; + size_t len; + if (!str_uri_scheme(uri, &p, &len)) + return 0; + const char *const q = (p += len + 1); + for (; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) + ; + if (p == q) + return 0; + if (*p == '?') + for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) + ; + if (*p == '#') + for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) + ; + return !*p; +} + +int str_uri_scheme(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + // Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + if (!isalpha(*p++)) + return 0; + while (is_uri_char_scheme(*p)) + ++p; + // Scheme is followed by colon ":". + if (*p != ':') + return 0; + if (partp) + *partp = uri; + if (lenp) + *lenp = p - uri; + return 1; +} + +int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + while (*p && *p != ':') + ++p; + if (*p != ':') + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#') + ++p; + if (p == q) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_query(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + while (*p && *p != '?') + ++p; + if (*p != '?') + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '#') + ++p; + if (p == q || (*p && *p != '#')) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_fragment(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + while (*p && *p != '#') + ++p; + if (*p != '#') + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p))) + ++p; + if (p == q || *p) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp) +{ + if (hier[0] != '/' || hier[1] != '/') + return 0; + const char *const q = hier + 2; + const char *p = q; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#') + ++p; + if (p == q || (*p && *p != '/' && *p != '?' && *p != '#')) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp) +{ + if (hier[0] != '/' || hier[1] != '/') + return 0; + const char *p = hier + 2; + while (*p && *p != '/' && *p != '?' && *p != '#') + ++p; + if (!*p) + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#') + ++p; + if (p == q || (*p && *p != '/' && *p != '?' && *p != '#')) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp) +{ + const char *p; + for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p) + ; + if (*p != '@') + return 0; + for (p = auth; *p && *p != ':' && *p != '@'; ++p) + ; + if (*p != ':') + return 0; + if (partp) + *partp = auth; + if (lenp) + *lenp = p - auth; + return 1; +} + +int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp) +{ + const char *p; + for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p) + ; + if (*p != '@') + return 0; + for (p = auth; *p && *p != ':' && *p != '@'; ++p) + ; + if (*p != ':') + return 0; + const char *const q = ++p; + for (; *p && *p != '@'; ++p) + ; + assert(*p == '@'); + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp) +{ + const char *p; + const char *q = auth; + for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p) + if (*p == '@') + q = p + 1; + const char *r = p; + while (r > q && isdigit(*--r)) + ; + if (r < p - 1 && *r == ':') + q = r; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_authority_port(const char *auth, unsigned short *portp) +{ + const char *p; + const char *q = auth; + for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p) + if (*p == '@') + q = p + 1; + const char *r = p; + while (r > q && isdigit(*--r)) + ; + if (r < p - 1 && *r == ':') { + for (++r; *r == '0'; ++r) + ; + int n; + if (p - r <= 5 && (n = atoi(r)) <= USHRT_MAX) { + *portp = n; + return 1; + } + } + return 0; +} diff --git a/str.h b/str.h index 24aa19be..fd6fa51e 100644 --- a/str.h +++ b/str.h @@ -138,6 +138,69 @@ char *str_str(char *haystack, const char *needle, int haystack_len); */ int str_to_ll_scaled(const char *str, int base, long long *result, const char **afterp); +/* Return true if the string resembles a nul-terminated URI. + * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part. + * + * uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ] + * + * @author Andrew Bettison + */ +int str_is_uri(const char *uri); + +/* Pick apart a URI into its basic parts. + * + * uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ] + * + * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical + * part. If the respective part is found, sets (*partp) to point to the start + * of the part within the supplied 'uri' string, sets (*lenp) to the length of + * the part substring and returns 1. Otherwise returns 0. These functions + * do not reliably validate that the string in 'uri' is a valid URI; that must + * be done by calling str_is_uri(). + * + * @author Andrew Bettison + */ +int str_uri_scheme(const char *uri, const char **partp, size_t *lenp); +int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp); +int str_uri_query(const char *uri, const char **partp, size_t *lenp); +int str_uri_fragment(const char *uri, const char **partp, size_t *lenp); + +/* Pick apart a URI hierarchical part into its basic parts. + * + * hierarchical := "//" authority [ "/" path ] + * + * If the respective part is found, sets (*partp) to point to the start of the + * part within the supplied 'uri' string, sets (*lenp) to the length of the + * part substring and returns 1. Otherwise returns 0. + * + * These functions may be called directly on the part returned by + * str_uri_hierarchical(), even though it is not nul-terminated, because they + * treat "?" and "#" as equally valid terminators. + * + * @author Andrew Bettison + */ +int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp); +int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp); + +/* Pick apart a URI authority into its basic parts. + * + * authority := [ username ":" password "@" ] hostname [ ":" port ] + * + * If the respective part is found, sets (*partp) to point to the start of the + * part within the supplied 'uri' string, sets (*lenp) to the length of the + * part substring and returns 1. Otherwise returns 0. + * + * These functions may be called directly on the part returned by + * str_uri_hierarchical_authority(), even though it is not nul-terminated, + * because they treat "/", "?" and "#" as equally valid terminators. + * + * @author Andrew Bettison + */ +int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp); +int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp); +int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp); +int str_uri_authority_port(const char *auth, unsigned short *portp); + int parse_argv(char *cmdline, char delim, char **argv, int max_argv);