From 0a40d9849c365ff99b412f204d1a2291d45fc6ea Mon Sep 17 00:00:00 2001 From: Andrew Bettison Date: Mon, 17 Aug 2015 19:46:50 +0930 Subject: [PATCH] Add uri and www-form-uri encode/decode functions --- str.c | 106 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ str.h | 50 ++++++++++++++++++++++++++- 2 files changed, 155 insertions(+), 1 deletion(-) diff --git a/str.c b/str.c index 1b0ea787..ef5aa2e7 100644 --- a/str.c +++ b/str.c @@ -81,6 +81,112 @@ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstsiz, const char *srcHex return dstBinary - dstorig; } +static size_t _uri_encodev(int www_form, char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) +{ + char * dst = dstUrienc; + char * const dstend = dstUrienc + dstsiz; + while (*iovcntp && (dstsiz == -1 || dst < dstend)) { + if ((*iovp)->iov_len == 0) { + --*iovcntp; + ++*iovp; + } else { + unsigned char c = *(unsigned char *)(*iovp)->iov_base; + if (www_form && c == ' ') { + if (dstUrienc) + *dst = '+'; + ++dst; + } else if (is_uri_char_unreserved(c)) { + if (dstUrienc) + *dst = c; + ++dst; + } else if (dst + 3 <= dstend) { + if (dstUrienc) { + dst[0] = '%'; + dst[1] = hexdigit_upper[c & 0xf]; + dst[2] = hexdigit_upper[c >> 4]; + } + dst += 3; + } else { + break; + } + ++(*iovp)->iov_base; + --(*iovp)->iov_len; + } + } + return dst - dstUrienc; +} + +static size_t _uri_encode(int www_form, char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) +{ + struct iovec _iov; + _iov.iov_base = (void *) src; + _iov.iov_len = srclen; + struct iovec *iov = &_iov; + int ioc = 1; + size_t encoded = _uri_encodev(www_form, dstUrienc, dstsiz, &iov, &ioc); + if (afterp) + *afterp = _iov.iov_base; + return encoded; +} + +size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) +{ + return _uri_encode(0, dstUrienc, dstsiz, src, srclen, afterp); +} + +size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp) +{ + return _uri_encode(1, dstUrienc, dstsiz, src, srclen, afterp); +} + +size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) +{ + return _uri_encodev(0, dstUrienc, dstsiz, iovp, iovcntp); +} + +size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp) +{ + return _uri_encodev(1, dstUrienc, dstsiz, iovp, iovcntp); +} + +static size_t _uri_decode(int www_form, char *const dstOrig, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) +{ + char *dst = dstOrig; + char *const dstend = dst + dstsiz; + while (srclen && (dstsiz == -1 || dst < dstend)) { + if (www_form && *srcUrienc == '+') { + if (dstOrig) + *dst = ' '; + ++srcUrienc; + --srclen; + } else if (srclen >= 3 && srcUrienc[0] == '%' && isxdigit(srcUrienc[1]) && isxdigit(srcUrienc[2])) { + if (dstOrig) + *dst = (hexvalue(srcUrienc[1]) << 4) + hexvalue(srcUrienc[2]); + srcUrienc += 3; + srclen -= 3; + } else { + if (dstOrig) + *dst = *srcUrienc; + ++srcUrienc; + --srclen; + } + ++dst; + } + if (afterp) + *afterp = srcUrienc; + return dst - dstOrig; +} + +size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) +{ + return _uri_decode(0, dst, dstsiz, srcUrienc, srclen, afterp); +} + +size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp) +{ + return _uri_decode(1, dst, dstsiz, srcUrienc, srclen, afterp); +} + const char base64_symbols[65] = { 'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P', 'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f', diff --git a/str.h b/str.h index 1d8f53c4..eb58b499 100644 --- a/str.h +++ b/str.h @@ -495,7 +495,55 @@ int uint64_scaled_to_str(char *str, size_t len, uint64_t value); */ int str_to_uint64_interval_ms(const char *str, int64_t *result, const char **afterp); -/* -------------------- URI strings -------------------- */ +/* -------------------- URI encoding and decoding -------------------- */ + +/* Encode up to 'srclen' bytes of byte data (or up to first nul if 'srclen' == -1) at 'src' into at + * most 'dstsiz' bytes of URI-encoded (or www-form-urlencoded) representation at 'dstUrienc'. If + * 'dstsiz' is -1 or 'dstUrienc' is NULL, does not write any encoded bytes, but still counts them. + * If 'afterp' is not NULL, then sets *afterp to point to the source byte immediately following the + * last character encoded. A "%xx" sequence will never be partially encoded; if all the "%xx" does + * not fit within the destination buffer, then none of it is produced. + * + * + * Returns the total number of encoded bytes written at 'dstUrienc'. + * + * Can be used to count encoded bytes without actually encoding, eg: + * + * uri_encode(NULL, -1, buf, buflen, NULL); + * + * The uri_encodev() and www_form_uri_encodev() functions are a multi-buffer gather variants, + * analagous to readv(2) and writev(2). Modifies the supplied *iovp, *iovcntp parameters and the + * iovec structures at (*iovp)[...] to represent the remaining source bytes not encoded. + * + * @author Andrew Bettison + */ +size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp); +size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp); + +size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp +size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp + +/* Decode up to 'srclen' bytes of URI-encoded (or www-form-urlencoded) data at 'srcUrienc' into at + * most 'dstsiz' bytes at 'dst'. If 'dstsiz' is -1 or 'dst' is NULL, then does not write any + * decoded bytes, but still counts them. If 'afterp' is not NULL, then sets *afterp to point to the + * source byte immediately following the last byte decoded. + * + * Returns the total number of decoded bytes written at 'dst'. + * + * Can be used to decode in-place, eg: + * + * uri_decode((char *)buf, n, (const unsigned char *)buf, n, NULL); + * + * Can be used to count decoded bytes without actually decoding, eg: + * + * uri_decode(NULL, -1, buf, buflen, NULL); + * + * @author Andrew Bettison + */ +size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp); +size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp); + +/* -------------------- URI parsing -------------------- */ /* Return true if the string resembles a nul-terminated URI. * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.