From 5a77008aa817fd62e64a2369eb8d179a679ca5d3 Mon Sep 17 00:00:00 2001 From: Andrew Bettison Date: Tue, 20 Sep 2016 12:55:18 +0930 Subject: [PATCH] Move URI primitives from "str.h" to "uri.h" --- conf_schema.c | 1 + dna_helper.c | 1 + headerfiles.mk | 1 + http_server.c | 1 + network_cli.c | 1 + sourcefiles.mk | 1 + str.c | 328 +------------------------------------------- str.h | 160 +++------------------- uri.c | 360 +++++++++++++++++++++++++++++++++++++++++++++++++ uri.h | 146 ++++++++++++++++++++ 10 files changed, 529 insertions(+), 471 deletions(-) create mode 100644 uri.c create mode 100644 uri.h diff --git a/conf_schema.c b/conf_schema.c index 64bb0f6c..37d9aaf3 100644 --- a/conf_schema.c +++ b/conf_schema.c @@ -31,6 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #include "debug.h" #include "mem.h" #include "str.h" +#include "uri.h" #include "numeric_str.h" #include "strbuf_helpers.h" #include "conf.h" diff --git a/dna_helper.c b/dna_helper.c index de0285f5..4117a810 100644 --- a/dna_helper.c +++ b/dna_helper.c @@ -55,6 +55,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #include "serval.h" #include "conf.h" #include "str.h" +#include "uri.h" #include "strbuf.h" #include "strbuf_helpers.h" #include "dataformats.h" diff --git a/headerfiles.mk b/headerfiles.mk index e39b2256..80d348d3 100644 --- a/headerfiles.mk +++ b/headerfiles.mk @@ -25,6 +25,7 @@ HDRS= fifo.h \ str.h \ numeric_str.h \ base64.h \ + uri.h \ rotbuf.h \ mem.h \ os.h \ diff --git a/http_server.c b/http_server.c index 8b80b3a6..a353c9f0 100644 --- a/http_server.c +++ b/http_server.c @@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. #include "debug.h" #include "numeric_str.h" #include "base64.h" +#include "uri.h" #include "strbuf.h" #include "strbuf_helpers.h" #include "net.h" diff --git a/network_cli.c b/network_cli.c index cab3e64f..52d814d9 100644 --- a/network_cli.c +++ b/network_cli.c @@ -30,6 +30,7 @@ #include "instance.h" #include "serval.h" #include "numeric_str.h" +#include "uri.h" #include "overlay_buffer.h" diff --git a/sourcefiles.mk b/sourcefiles.mk index 4737a72f..e2c31387 100644 --- a/sourcefiles.mk +++ b/sourcefiles.mk @@ -25,6 +25,7 @@ SERVAL_CLIENT_SOURCES = \ str.c \ numeric_str.c \ base64.c \ + uri.c \ strlcpy.c \ uuid.c \ whence.c \ diff --git a/str.c b/str.c index 4cdb82d3..6363ffbc 100644 --- a/str.c +++ b/str.c @@ -23,11 +23,10 @@ #include #include // for NULL -#include // for iovec #include // for strlen(), strncmp() etc. #include #include -#include +#include // for UINT8_MAX const char hexdigit_upper[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; const char hexdigit_lower[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; @@ -85,112 +84,6 @@ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstsiz, const char *srcHex return dstBinary - dstorig; } -static size_t _uri_encodev(int www_form, char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) -{ - char * dst = dstUrienc; - char * const dstend = dstUrienc + dstsiz; - while (*iovcntp && (dstsiz == -1 || dst < dstend)) { - if ((*iovp)->iov_len == 0) { - --*iovcntp; - ++*iovp; - } else { - unsigned char c = *(unsigned char *)(*iovp)->iov_base; - if (www_form && c == ' ') { - if (dstUrienc) - *dst = '+'; - ++dst; - } else if (is_uri_char_unreserved(c)) { - if (dstUrienc) - *dst = c; - ++dst; - } else if (dst + 3 <= dstend) { - if (dstUrienc) { - dst[0] = '%'; - dst[1] = hexdigit_upper[c & 0xf]; - dst[2] = hexdigit_upper[c >> 4]; - } - dst += 3; - } else { - break; - } - ++(*iovp)->iov_base; - --(*iovp)->iov_len; - } - } - return dst - dstUrienc; -} - -static size_t _uri_encode(int www_form, char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) -{ - struct iovec _iov; - _iov.iov_base = (void *) src; - _iov.iov_len = srclen; - struct iovec *iov = &_iov; - int ioc = 1; - size_t encoded = _uri_encodev(www_form, dstUrienc, dstsiz, &iov, &ioc); - if (afterp) - *afterp = _iov.iov_base; - return encoded; -} - -size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) -{ - return _uri_encode(0, dstUrienc, dstsiz, src, srclen, afterp); -} - -size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) -{ - return _uri_encode(1, dstUrienc, dstsiz, src, srclen, afterp); -} - -size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) -{ - return _uri_encodev(0, dstUrienc, dstsiz, iovp, iovcntp); -} - -size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) -{ - return _uri_encodev(1, dstUrienc, dstsiz, iovp, iovcntp); -} - -static size_t _uri_decode(int www_form, char *const dstOrig, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) -{ - char *dst = dstOrig; - char *const dstend = dst + dstsiz; - while (srclen && (dstsiz == -1 || dst < dstend)) { - if (www_form && *srcUrienc == '+') { - if (dstOrig) - *dst = ' '; - ++srcUrienc; - --srclen; - } else if (srclen >= 3 && srcUrienc[0] == '%' && isxdigit(srcUrienc[1]) && isxdigit(srcUrienc[2])) { - if (dstOrig) - *dst = (hexvalue(srcUrienc[1]) << 4) + hexvalue(srcUrienc[2]); - srcUrienc += 3; - srclen -= 3; - } else { - if (dstOrig) - *dst = *srcUrienc; - ++srcUrienc; - --srclen; - } - ++dst; - } - if (afterp) - *afterp = srcUrienc; - return dst - dstOrig; -} - -size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) -{ - return _uri_decode(0, dst, dstsiz, srcUrienc, srclen, afterp); -} - -size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) -{ - return _uri_decode(1, dst, dstsiz, srcUrienc, srclen, afterp); -} - #define _B64 _SERVAL_CTYPE_0_BASE64 #define _B64U _SERVAL_CTYPE_0_BASE64URL @@ -686,222 +579,3 @@ void strn_digest_passphrase(unsigned char *dstBinary, size_t dstsiz, const char crypto_hash_sha512_final(&context, hash); bcopy(hash, dstBinary, dstsiz); } - -/* Return true if the string resembles a URI. - * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part. - * - * @author Andrew Bettison - */ -int str_is_uri(const char *uri) -{ - const char *p; - size_t len; - if (!str_uri_scheme(uri, &p, &len)) - return 0; - const char *const q = (p += len + 1); - for (; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) - ; - if (p == q) - return 0; - if (*p == '?') - for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) - ; - if (*p == '#') - for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) - ; - return !*p; -} - -int str_uri_scheme(const char *uri, const char **partp, size_t *lenp) -{ - const char *p = uri; - // Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) - if (!isalpha(*p++)) - return 0; - while (is_uri_char_scheme(*p)) - ++p; - // Scheme is followed by colon ":". - if (*p != ':') - return 0; - if (partp) - *partp = uri; - if (lenp) - *lenp = p - uri; - return 1; -} - -int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp) -{ - const char *p = uri; - while (*p && *p != ':') - ++p; - if (*p != ':') - return 0; - const char *const q = ++p; - while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#') - ++p; - if (p == q) - return 0; - if (partp) - *partp = q; - if (lenp) - *lenp = p - q; - return 1; -} - -int str_uri_query(const char *uri, const char **partp, size_t *lenp) -{ - const char *p = uri; - while (*p && *p != '?') - ++p; - if (*p != '?') - return 0; - const char *const q = ++p; - while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '#') - ++p; - if (p == q || (*p && *p != '#')) - return 0; - if (partp) - *partp = q; - if (lenp) - *lenp = p - q; - return 1; -} - -int str_uri_fragment(const char *uri, const char **partp, size_t *lenp) -{ - const char *p = uri; - while (*p && *p != '#') - ++p; - if (*p != '#') - return 0; - const char *const q = ++p; - while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p))) - ++p; - if (p == q || *p) - return 0; - if (partp) - *partp = q; - if (lenp) - *lenp = p - q; - return 1; -} - -int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp) -{ - if (hier[0] != '/' || hier[1] != '/') - return 0; - const char *const q = hier + 2; - const char *p = q; - while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#') - ++p; - if (p == q || (*p && *p != '/' && *p != '?' && *p != '#')) - return 0; - if (partp) - *partp = q; - if (lenp) - *lenp = p - q; - return 1; -} - -int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp) -{ - if (hier[0] != '/' || hier[1] != '/') - return 0; - const char *p = hier + 2; - while (*p && *p != '/' && *p != '?' && *p != '#') - ++p; - if (!*p) - return 0; - const char *const q = ++p; - while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#') - ++p; - if (p == q || (*p && *p != '/' && *p != '?' && *p != '#')) - return 0; - if (partp) - *partp = q; - if (lenp) - *lenp = p - q; - return 1; -} - -int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp) -{ - const char *p; - for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p) - ; - if (*p != '@') - return 0; - for (p = auth; *p && *p != ':' && *p != '@'; ++p) - ; - if (*p != ':') - return 0; - if (partp) - *partp = auth; - if (lenp) - *lenp = p - auth; - return 1; -} - -int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp) -{ - const char *p; - for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p) - ; - if (*p != '@') - return 0; - for (p = auth; *p && *p != ':' && *p != '@'; ++p) - ; - if (*p != ':') - return 0; - const char *const q = ++p; - for (; *p && *p != '@'; ++p) - ; - assert(*p == '@'); - if (partp) - *partp = q; - if (lenp) - *lenp = p - q; - return 1; -} - -int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp) -{ - const char *p; - const char *q = auth; - for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p) - if (*p == '@') - q = p + 1; - const char *r = p; - while (r > q && isdigit(*--r)) - ; - if (r < p - 1 && *r == ':') - p = r; - if (partp) - *partp = q; - if (lenp) - *lenp = p - q; - return 1; -} - -int str_uri_authority_port(const char *auth, uint16_t *portp) -{ - const char *p; - const char *q = auth; - for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p) - if (*p == '@') - q = p + 1; - const char *r = p; - while (r > q && isdigit(*--r)) - ; - if (r < p - 1 && *r == ':') { - for (++r; *r == '0'; ++r) - ; - unsigned int n; - if (p - r <= 5 && (n = atoi(r)) <= USHRT_MAX) { - *portp = n; - return 1; - } - } - return 0; -} diff --git a/str.h b/str.h index 3d7ffe8d..bebfddb8 100644 --- a/str.h +++ b/str.h @@ -20,10 +20,10 @@ #ifndef __SERVAL_DNA__STR_H__ #define __SERVAL_DNA__STR_H__ -#include -#include -#include -#include +#include // for strcpy(), strlen() etc. +#include // for uint8_t +#include // for size_t +#include // for isascii(), isxdigit() etc. #include #ifndef __SERVAL_DNA__STR_INLINE @@ -202,6 +202,18 @@ __SERVAL_DNA__STR_INLINE int is_http_token(int c) { return is_http_char(c) && !is_http_ctl(c) && !is_http_separator(c); } +__SERVAL_DNA__STR_INLINE int is_uri_char_scheme(int c) { + return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_SCHEME) != 0; +} + +__SERVAL_DNA__STR_INLINE int is_uri_char_unreserved(int c) { + return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_UNRESERVED) != 0; +} + +__SERVAL_DNA__STR_INLINE int is_uri_char_reserved(int c) { + return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_RESERVED) != 0; +} + /* Convert the given ASCII hex digit character into its radix value, eg, '0' -> * 0, 'b' -> 11. If the argument is not an ASCII hex digit, returns -1. * @@ -338,146 +350,6 @@ int strn_str_casecmp(const char *str1, size_t len1, const char *str2); */ char *str_str(char *haystack, const char *needle, size_t haystack_len); -/* -------------------- URI encoding and decoding -------------------- */ - -/* Encode up to 'srclen' bytes of byte data (or up to first nul if 'srclen' == -1) at 'src' into at - * most 'dstsiz' bytes of URI-encoded (or www-form-urlencoded) representation at 'dstUrienc'. If - * 'dstsiz' is -1 or 'dstUrienc' is NULL, does not write any encoded bytes, but still counts them. - * If 'afterp' is not NULL, then sets *afterp to point to the source byte immediately following the - * last character encoded. A "%xx" sequence will never be partially encoded; if all the "%xx" does - * not fit within the destination buffer, then none of it is produced. - * - * - * Returns the total number of encoded bytes written at 'dstUrienc'. - * - * Can be used to count encoded bytes without actually encoding, eg: - * - * uri_encode(NULL, -1, buf, buflen, NULL); - * - * The uri_encodev() and www_form_uri_encodev() functions are a multi-buffer gather variants, - * analagous to readv(2) and writev(2). Modifies the supplied *iovp, *iovcntp parameters and the - * iovec structures at (*iovp)[...] to represent the remaining source bytes not encoded. - * - * @author Andrew Bettison - */ -size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp); -size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp); - -struct iovec; -size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp -size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp - -/* Decode up to 'srclen' bytes of URI-encoded (or www-form-urlencoded) data at 'srcUrienc' into at - * most 'dstsiz' bytes at 'dst'. If 'dstsiz' is -1 or 'dst' is NULL, then does not write any - * decoded bytes, but still counts them. If 'afterp' is not NULL, then sets *afterp to point to the - * source byte immediately following the last byte decoded. - * - * Returns the total number of decoded bytes written at 'dst'. - * - * Can be used to decode in-place, eg: - * - * uri_decode((char *)buf, n, (const unsigned char *)buf, n, NULL); - * - * Can be used to count decoded bytes without actually decoding, eg: - * - * uri_decode(NULL, -1, buf, buflen, NULL); - * - * @author Andrew Bettison - */ -size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp); -size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp); - -/* -------------------- URI parsing -------------------- */ - -/* Return true if the string resembles a nul-terminated URI. - * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part. - * - * uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ] - * - * @author Andrew Bettison - */ -int str_is_uri(const char *uri); - -__SERVAL_DNA__STR_INLINE int is_uri_char_scheme(int c) { - return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_SCHEME) != 0; -} - -__SERVAL_DNA__STR_INLINE int is_uri_char_unreserved(int c) { - return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_UNRESERVED) != 0; -} - -__SERVAL_DNA__STR_INLINE int is_uri_char_reserved(int c) { - return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_RESERVED) != 0; -} - -/* Return true if the string resembles a URI scheme without the terminating colon. - * Based on RFC-3986 generic syntax. - * - * @author Andrew Bettison - */ -__SERVAL_DNA__STR_INLINE int str_is_uri_scheme(const char *scheme) -{ - if (!isalpha(*scheme++)) - return 0; - while (is_uri_char_scheme(*scheme)) - ++scheme; - return *scheme == '\0'; -} - -/* Pick apart a URI into its basic parts. - * - * uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ] - * - * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical - * part. If the respective part is found, sets (*partp) to point to the start - * of the part within the supplied 'uri' string, sets (*lenp) to the length of - * the part substring and returns 1. Otherwise returns 0. These functions - * do not reliably validate that the string in 'uri' is a valid URI; that must - * be done by calling str_is_uri(). - * - * @author Andrew Bettison - */ -int str_uri_scheme(const char *uri, const char **partp, size_t *lenp); -int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp); -int str_uri_query(const char *uri, const char **partp, size_t *lenp); -int str_uri_fragment(const char *uri, const char **partp, size_t *lenp); - -/* Pick apart a URI hierarchical part into its basic parts. - * - * hierarchical := "//" authority [ "/" path ] - * - * If the respective part is found, sets (*partp) to point to the start of the - * part within the supplied 'uri' string, sets (*lenp) to the length of the - * part substring and returns 1. Otherwise returns 0. - * - * These functions may be called directly on the part returned by - * str_uri_hierarchical(), even though it is not nul-terminated, because they - * treat "?" and "#" as equally valid terminators. - * - * @author Andrew Bettison - */ -int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp); -int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp); - -/* Pick apart a URI authority into its basic parts. - * - * authority := [ username ":" password "@" ] hostname [ ":" port ] - * - * If the respective part is found, sets (*partp) to point to the start of the - * part within the supplied 'uri' string, sets (*lenp) to the length of the - * part substring and returns 1. Otherwise returns 0. - * - * These functions may be called directly on the part returned by - * str_uri_hierarchical_authority(), even though it is not nul-terminated, - * because they treat "/", "?" and "#" as equally valid terminators. - * - * @author Andrew Bettison - */ -int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp); -int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp); -int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp); -int str_uri_authority_port(const char *auth, uint16_t *portp); - /* -------------------- Command-line strings -------------------- */ int parse_argv(char *cmdline, char delim, char **argv, int max_argv); diff --git a/uri.c b/uri.c new file mode 100644 index 00000000..61c0318a --- /dev/null +++ b/uri.c @@ -0,0 +1,360 @@ +/* + Serval URI primitives + Copyright (C) 2012-2016 Serval Project Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "uri.h" +#include "str.h" +#include +#include // for atoi() +#include // for iovec +#include // for USHRT_MAX +#include + +static size_t _uri_encodev(int www_form, char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) +{ + char * dst = dstUrienc; + char * const dstend = dstUrienc + dstsiz; + while (*iovcntp && (dstsiz == -1 || dst < dstend)) { + if ((*iovp)->iov_len == 0) { + --*iovcntp; + ++*iovp; + } else { + unsigned char c = *(unsigned char *)(*iovp)->iov_base; + if (www_form && c == ' ') { + if (dstUrienc) + *dst = '+'; + ++dst; + } else if (is_uri_char_unreserved(c)) { + if (dstUrienc) + *dst = c; + ++dst; + } else if (dst + 3 <= dstend) { + if (dstUrienc) { + dst[0] = '%'; + dst[1] = hexdigit_upper[c & 0xf]; + dst[2] = hexdigit_upper[c >> 4]; + } + dst += 3; + } else { + break; + } + ++(*iovp)->iov_base; + --(*iovp)->iov_len; + } + } + return dst - dstUrienc; +} + +static size_t _uri_encode(int www_form, char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) +{ + struct iovec _iov; + _iov.iov_base = (void *) src; + _iov.iov_len = srclen; + struct iovec *iov = &_iov; + int ioc = 1; + size_t encoded = _uri_encodev(www_form, dstUrienc, dstsiz, &iov, &ioc); + if (afterp) + *afterp = _iov.iov_base; + return encoded; +} + +size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) +{ + return _uri_encode(0, dstUrienc, dstsiz, src, srclen, afterp); +} + +size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) +{ + return _uri_encode(1, dstUrienc, dstsiz, src, srclen, afterp); +} + +size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) +{ + return _uri_encodev(0, dstUrienc, dstsiz, iovp, iovcntp); +} + +size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) +{ + return _uri_encodev(1, dstUrienc, dstsiz, iovp, iovcntp); +} + +static size_t _uri_decode(int www_form, char *const dstOrig, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) +{ + char *dst = dstOrig; + char *const dstend = dst + dstsiz; + while (srclen && (dstsiz == -1 || dst < dstend)) { + if (www_form && *srcUrienc == '+') { + if (dstOrig) + *dst = ' '; + ++srcUrienc; + --srclen; + } else if (srclen >= 3 && srcUrienc[0] == '%' && isxdigit(srcUrienc[1]) && isxdigit(srcUrienc[2])) { + if (dstOrig) + *dst = (hexvalue(srcUrienc[1]) << 4) + hexvalue(srcUrienc[2]); + srcUrienc += 3; + srclen -= 3; + } else { + if (dstOrig) + *dst = *srcUrienc; + ++srcUrienc; + --srclen; + } + ++dst; + } + if (afterp) + *afterp = srcUrienc; + return dst - dstOrig; +} + +size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) +{ + return _uri_decode(0, dst, dstsiz, srcUrienc, srclen, afterp); +} + +size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) +{ + return _uri_decode(1, dst, dstsiz, srcUrienc, srclen, afterp); +} + +/* Return true if the string resembles a URI. + * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part. + * + * @author Andrew Bettison + */ +int str_is_uri(const char *uri) +{ + const char *p; + size_t len; + if (!str_uri_scheme(uri, &p, &len)) + return 0; + const char *const q = (p += len + 1); + for (; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) + ; + if (p == q) + return 0; + if (*p == '?') + for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) + ; + if (*p == '#') + for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p) + ; + return !*p; +} + +int str_is_uri_scheme(const char *scheme) +{ + if (!isalpha(*scheme++)) + return 0; + while (is_uri_char_scheme(*scheme)) + ++scheme; + return *scheme == '\0'; +} + +int str_uri_scheme(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + // Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + if (!isalpha(*p++)) + return 0; + while (is_uri_char_scheme(*p)) + ++p; + // Scheme is followed by colon ":". + if (*p != ':') + return 0; + if (partp) + *partp = uri; + if (lenp) + *lenp = p - uri; + return 1; +} + +int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + while (*p && *p != ':') + ++p; + if (*p != ':') + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#') + ++p; + if (p == q) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_query(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + while (*p && *p != '?') + ++p; + if (*p != '?') + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '#') + ++p; + if (p == q || (*p && *p != '#')) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_fragment(const char *uri, const char **partp, size_t *lenp) +{ + const char *p = uri; + while (*p && *p != '#') + ++p; + if (*p != '#') + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p))) + ++p; + if (p == q || *p) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp) +{ + if (hier[0] != '/' || hier[1] != '/') + return 0; + const char *const q = hier + 2; + const char *p = q; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#') + ++p; + if (p == q || (*p && *p != '/' && *p != '?' && *p != '#')) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp) +{ + if (hier[0] != '/' || hier[1] != '/') + return 0; + const char *p = hier + 2; + while (*p && *p != '/' && *p != '?' && *p != '#') + ++p; + if (!*p) + return 0; + const char *const q = ++p; + while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#') + ++p; + if (p == q || (*p && *p != '/' && *p != '?' && *p != '#')) + return 0; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp) +{ + const char *p; + for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p) + ; + if (*p != '@') + return 0; + for (p = auth; *p && *p != ':' && *p != '@'; ++p) + ; + if (*p != ':') + return 0; + if (partp) + *partp = auth; + if (lenp) + *lenp = p - auth; + return 1; +} + +int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp) +{ + const char *p; + for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p) + ; + if (*p != '@') + return 0; + for (p = auth; *p && *p != ':' && *p != '@'; ++p) + ; + if (*p != ':') + return 0; + const char *const q = ++p; + for (; *p && *p != '@'; ++p) + ; + assert(*p == '@'); + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp) +{ + const char *p; + const char *q = auth; + for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p) + if (*p == '@') + q = p + 1; + const char *r = p; + while (r > q && isdigit(*--r)) + ; + if (r < p - 1 && *r == ':') + p = r; + if (partp) + *partp = q; + if (lenp) + *lenp = p - q; + return 1; +} + +int str_uri_authority_port(const char *auth, uint16_t *portp) +{ + const char *p; + const char *q = auth; + for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p) + if (*p == '@') + q = p + 1; + const char *r = p; + while (r > q && isdigit(*--r)) + ; + if (r < p - 1 && *r == ':') { + for (++r; *r == '0'; ++r) + ; + unsigned int n; + if (p - r <= 5 && (n = atoi(r)) <= USHRT_MAX) { + *portp = n; + return 1; + } + } + return 0; +} diff --git a/uri.h b/uri.h new file mode 100644 index 00000000..a0623efe --- /dev/null +++ b/uri.h @@ -0,0 +1,146 @@ +/* + Serval URI primitives + Copyright (C) 2012-2016 Serval Project Inc. + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; either version 2 + of the License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#ifndef __SERVAL_DNA__URI_H__ +#define __SERVAL_DNA__URI_H__ + +#include // for uint16_t +#include // for size_t + +/* -------------------- URI encoding and decoding -------------------- */ + +/* Encode up to 'srclen' bytes of byte data (or up to first nul if 'srclen' == -1) at 'src' into at + * most 'dstsiz' bytes of URI-encoded (or www-form-urlencoded) representation at 'dstUrienc'. If + * 'dstsiz' is -1 or 'dstUrienc' is NULL, does not write any encoded bytes, but still counts them. + * If 'afterp' is not NULL, then sets *afterp to point to the source byte immediately following the + * last character encoded. A "%xx" sequence will never be partially encoded; if all the "%xx" does + * not fit within the destination buffer, then none of it is produced. + * + * Returns the total number of encoded bytes written at 'dstUrienc'. + * + * Can be used to count encoded bytes without actually encoding, eg: + * + * uri_encode(NULL, -1, buf, buflen, NULL); + * + * The uri_encodev() and www_form_uri_encodev() functions are a multi-buffer gather variants, + * analagous to readv(2) and writev(2). Modifies the supplied *iovp, *iovcntp parameters and the + * iovec structures at (*iovp)[...] to represent the remaining source bytes not encoded. + * + * @author Andrew Bettison + */ +size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp); +size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp); + +struct iovec; +size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp +size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp + +/* Decode up to 'srclen' bytes of URI-encoded (or www-form-urlencoded) data at 'srcUrienc' into at + * most 'dstsiz' bytes at 'dst'. If 'dstsiz' is -1 or 'dst' is NULL, then does not write any + * decoded bytes, but still counts them. If 'afterp' is not NULL, then sets *afterp to point to the + * source byte immediately following the last byte decoded. + * + * Returns the total number of decoded bytes written at 'dst'. + * + * Can be used to decode in-place, eg: + * + * uri_decode((char *)buf, n, (const unsigned char *)buf, n, NULL); + * + * Can be used to count decoded bytes without actually decoding, eg: + * + * uri_decode(NULL, -1, buf, buflen, NULL); + * + * @author Andrew Bettison + */ +size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp); +size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp); + +/* -------------------- URI parsing -------------------- */ + +/* Return true if the string resembles a nul-terminated URI. + * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part. + * + * uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ] + * + * @author Andrew Bettison + */ +int str_is_uri(const char *uri); + +/* Return true if the string resembles a URI scheme without the terminating colon. + * Based on RFC-3986 generic syntax. + * + * @author Andrew Bettison + */ +int str_is_uri_scheme(const char *scheme); + +/* Pick apart a URI into its basic parts. + * + * uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ] + * + * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical + * part. If the respective part is found, sets (*partp) to point to the start + * of the part within the supplied 'uri' string, sets (*lenp) to the length of + * the part substring and returns 1. Otherwise returns 0. These functions + * do not reliably validate that the string in 'uri' is a valid URI; that must + * be done by calling str_is_uri(). + * + * @author Andrew Bettison + */ +int str_uri_scheme(const char *uri, const char **partp, size_t *lenp); +int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp); +int str_uri_query(const char *uri, const char **partp, size_t *lenp); +int str_uri_fragment(const char *uri, const char **partp, size_t *lenp); + +/* Pick apart a URI hierarchical part into its basic parts. + * + * hierarchical := "//" authority [ "/" path ] + * + * If the respective part is found, sets (*partp) to point to the start of the + * part within the supplied 'uri' string, sets (*lenp) to the length of the + * part substring and returns 1. Otherwise returns 0. + * + * These functions may be called directly on the part returned by + * str_uri_hierarchical(), even though it is not nul-terminated, because they + * treat "?" and "#" as equally valid terminators. + * + * @author Andrew Bettison + */ +int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp); +int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp); + +/* Pick apart a URI authority into its basic parts. + * + * authority := [ username ":" password "@" ] hostname [ ":" port ] + * + * If the respective part is found, sets (*partp) to point to the start of the + * part within the supplied 'uri' string, sets (*lenp) to the length of the + * part substring and returns 1. Otherwise returns 0. + * + * These functions may be called directly on the part returned by + * str_uri_hierarchical_authority(), even though it is not nul-terminated, + * because they treat "/", "?" and "#" as equally valid terminators. + * + * @author Andrew Bettison + */ +int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp); +int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp); +int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp); +int str_uri_authority_port(const char *auth, uint16_t *portp); + +#endif // __SERVAL_DNA__URI_H__