Add uri and www-form-uri encode/decode functions

This commit is contained in:
Andrew Bettison 2015-08-17 19:46:50 +09:30
parent e73d50b48a
commit 0a40d9849c
2 changed files with 155 additions and 1 deletions

106
str.c
View File

@ -81,6 +81,112 @@ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstsiz, const char *srcHex
return dstBinary - dstorig;
}
static size_t _uri_encodev(int www_form, char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
char * dst = dstUrienc;
char * const dstend = dstUrienc + dstsiz;
while (*iovcntp && (dstsiz == -1 || dst < dstend)) {
if ((*iovp)->iov_len == 0) {
--*iovcntp;
++*iovp;
} else {
unsigned char c = *(unsigned char *)(*iovp)->iov_base;
if (www_form && c == ' ') {
if (dstUrienc)
*dst = '+';
++dst;
} else if (is_uri_char_unreserved(c)) {
if (dstUrienc)
*dst = c;
++dst;
} else if (dst + 3 <= dstend) {
if (dstUrienc) {
dst[0] = '%';
dst[1] = hexdigit_upper[c & 0xf];
dst[2] = hexdigit_upper[c >> 4];
}
dst += 3;
} else {
break;
}
++(*iovp)->iov_base;
--(*iovp)->iov_len;
}
}
return dst - dstUrienc;
}
static size_t _uri_encode(int www_form, char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
struct iovec _iov;
_iov.iov_base = (void *) src;
_iov.iov_len = srclen;
struct iovec *iov = &_iov;
int ioc = 1;
size_t encoded = _uri_encodev(www_form, dstUrienc, dstsiz, &iov, &ioc);
if (afterp)
*afterp = _iov.iov_base;
return encoded;
}
size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
return _uri_encode(0, dstUrienc, dstsiz, src, srclen, afterp);
}
size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
return _uri_encode(1, dstUrienc, dstsiz, src, srclen, afterp);
}
size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
return _uri_encodev(0, dstUrienc, dstsiz, iovp, iovcntp);
}
size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
return _uri_encodev(1, dstUrienc, dstsiz, iovp, iovcntp);
}
static size_t _uri_decode(int www_form, char *const dstOrig, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
char *dst = dstOrig;
char *const dstend = dst + dstsiz;
while (srclen && (dstsiz == -1 || dst < dstend)) {
if (www_form && *srcUrienc == '+') {
if (dstOrig)
*dst = ' ';
++srcUrienc;
--srclen;
} else if (srclen >= 3 && srcUrienc[0] == '%' && isxdigit(srcUrienc[1]) && isxdigit(srcUrienc[2])) {
if (dstOrig)
*dst = (hexvalue(srcUrienc[1]) << 4) + hexvalue(srcUrienc[2]);
srcUrienc += 3;
srclen -= 3;
} else {
if (dstOrig)
*dst = *srcUrienc;
++srcUrienc;
--srclen;
}
++dst;
}
if (afterp)
*afterp = srcUrienc;
return dst - dstOrig;
}
size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
return _uri_decode(0, dst, dstsiz, srcUrienc, srclen, afterp);
}
size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
return _uri_decode(1, dst, dstsiz, srcUrienc, srclen, afterp);
}
const char base64_symbols[65] = {
'A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P',
'Q','R','S','T','U','V','W','X','Y','Z','a','b','c','d','e','f',

50
str.h
View File

@ -495,7 +495,55 @@ int uint64_scaled_to_str(char *str, size_t len, uint64_t value);
*/
int str_to_uint64_interval_ms(const char *str, int64_t *result, const char **afterp);
/* -------------------- URI strings -------------------- */
/* -------------------- URI encoding and decoding -------------------- */
/* Encode up to 'srclen' bytes of byte data (or up to first nul if 'srclen' == -1) at 'src' into at
* most 'dstsiz' bytes of URI-encoded (or www-form-urlencoded) representation at 'dstUrienc'. If
* 'dstsiz' is -1 or 'dstUrienc' is NULL, does not write any encoded bytes, but still counts them.
* If 'afterp' is not NULL, then sets *afterp to point to the source byte immediately following the
* last character encoded. A "%xx" sequence will never be partially encoded; if all the "%xx" does
* not fit within the destination buffer, then none of it is produced.
*
*
* Returns the total number of encoded bytes written at 'dstUrienc'.
*
* Can be used to count encoded bytes without actually encoding, eg:
*
* uri_encode(NULL, -1, buf, buflen, NULL);
*
* The uri_encodev() and www_form_uri_encodev() functions are a multi-buffer gather variants,
* analagous to readv(2) and writev(2). Modifies the supplied *iovp, *iovcntp parameters and the
* iovec structures at (*iovp)[...] to represent the remaining source bytes not encoded.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp);
size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp);
size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp
size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp
/* Decode up to 'srclen' bytes of URI-encoded (or www-form-urlencoded) data at 'srcUrienc' into at
* most 'dstsiz' bytes at 'dst'. If 'dstsiz' is -1 or 'dst' is NULL, then does not write any
* decoded bytes, but still counts them. If 'afterp' is not NULL, then sets *afterp to point to the
* source byte immediately following the last byte decoded.
*
* Returns the total number of decoded bytes written at 'dst'.
*
* Can be used to decode in-place, eg:
*
* uri_decode((char *)buf, n, (const unsigned char *)buf, n, NULL);
*
* Can be used to count decoded bytes without actually decoding, eg:
*
* uri_decode(NULL, -1, buf, buflen, NULL);
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp);
size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp);
/* -------------------- URI parsing -------------------- */
/* Return true if the string resembles a nul-terminated URI.
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.