Move URI primitives from "str.h" to "uri.h"

This commit is contained in:
Andrew Bettison 2016-09-20 12:55:18 +09:30
parent 5b3d997896
commit 5a77008aa8
10 changed files with 529 additions and 471 deletions

View File

@ -31,6 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include "debug.h"
#include "mem.h"
#include "str.h"
#include "uri.h"
#include "numeric_str.h"
#include "strbuf_helpers.h"
#include "conf.h"

View File

@ -55,6 +55,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include "serval.h"
#include "conf.h"
#include "str.h"
#include "uri.h"
#include "strbuf.h"
#include "strbuf_helpers.h"
#include "dataformats.h"

View File

@ -25,6 +25,7 @@ HDRS= fifo.h \
str.h \
numeric_str.h \
base64.h \
uri.h \
rotbuf.h \
mem.h \
os.h \

View File

@ -28,6 +28,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#include "debug.h"
#include "numeric_str.h"
#include "base64.h"
#include "uri.h"
#include "strbuf.h"
#include "strbuf_helpers.h"
#include "net.h"

View File

@ -30,6 +30,7 @@
#include "instance.h"
#include "serval.h"
#include "numeric_str.h"
#include "uri.h"
#include "overlay_buffer.h"

View File

@ -25,6 +25,7 @@ SERVAL_CLIENT_SOURCES = \
str.c \
numeric_str.c \
base64.c \
uri.c \
strlcpy.c \
uuid.c \
whence.c \

328
str.c
View File

@ -23,11 +23,10 @@
#include <sodium.h>
#include <stdio.h> // for NULL
#include <sys/uio.h> // for iovec
#include <string.h> // for strlen(), strncmp() etc.
#include <ctype.h>
#include <assert.h>
#include <limits.h>
#include <limits.h> // for UINT8_MAX
const char hexdigit_upper[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
const char hexdigit_lower[16] = {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'};
@ -85,112 +84,6 @@ size_t strn_fromhex(unsigned char *dstBinary, ssize_t dstsiz, const char *srcHex
return dstBinary - dstorig;
}
static size_t _uri_encodev(int www_form, char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
char * dst = dstUrienc;
char * const dstend = dstUrienc + dstsiz;
while (*iovcntp && (dstsiz == -1 || dst < dstend)) {
if ((*iovp)->iov_len == 0) {
--*iovcntp;
++*iovp;
} else {
unsigned char c = *(unsigned char *)(*iovp)->iov_base;
if (www_form && c == ' ') {
if (dstUrienc)
*dst = '+';
++dst;
} else if (is_uri_char_unreserved(c)) {
if (dstUrienc)
*dst = c;
++dst;
} else if (dst + 3 <= dstend) {
if (dstUrienc) {
dst[0] = '%';
dst[1] = hexdigit_upper[c & 0xf];
dst[2] = hexdigit_upper[c >> 4];
}
dst += 3;
} else {
break;
}
++(*iovp)->iov_base;
--(*iovp)->iov_len;
}
}
return dst - dstUrienc;
}
static size_t _uri_encode(int www_form, char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
struct iovec _iov;
_iov.iov_base = (void *) src;
_iov.iov_len = srclen;
struct iovec *iov = &_iov;
int ioc = 1;
size_t encoded = _uri_encodev(www_form, dstUrienc, dstsiz, &iov, &ioc);
if (afterp)
*afterp = _iov.iov_base;
return encoded;
}
size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
return _uri_encode(0, dstUrienc, dstsiz, src, srclen, afterp);
}
size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
return _uri_encode(1, dstUrienc, dstsiz, src, srclen, afterp);
}
size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
return _uri_encodev(0, dstUrienc, dstsiz, iovp, iovcntp);
}
size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
return _uri_encodev(1, dstUrienc, dstsiz, iovp, iovcntp);
}
static size_t _uri_decode(int www_form, char *const dstOrig, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
char *dst = dstOrig;
char *const dstend = dst + dstsiz;
while (srclen && (dstsiz == -1 || dst < dstend)) {
if (www_form && *srcUrienc == '+') {
if (dstOrig)
*dst = ' ';
++srcUrienc;
--srclen;
} else if (srclen >= 3 && srcUrienc[0] == '%' && isxdigit(srcUrienc[1]) && isxdigit(srcUrienc[2])) {
if (dstOrig)
*dst = (hexvalue(srcUrienc[1]) << 4) + hexvalue(srcUrienc[2]);
srcUrienc += 3;
srclen -= 3;
} else {
if (dstOrig)
*dst = *srcUrienc;
++srcUrienc;
--srclen;
}
++dst;
}
if (afterp)
*afterp = srcUrienc;
return dst - dstOrig;
}
size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
return _uri_decode(0, dst, dstsiz, srcUrienc, srclen, afterp);
}
size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
return _uri_decode(1, dst, dstsiz, srcUrienc, srclen, afterp);
}
#define _B64 _SERVAL_CTYPE_0_BASE64
#define _B64U _SERVAL_CTYPE_0_BASE64URL
@ -686,222 +579,3 @@ void strn_digest_passphrase(unsigned char *dstBinary, size_t dstsiz, const char
crypto_hash_sha512_final(&context, hash);
bcopy(hash, dstBinary, dstsiz);
}
/* Return true if the string resembles a URI.
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri(const char *uri)
{
const char *p;
size_t len;
if (!str_uri_scheme(uri, &p, &len))
return 0;
const char *const q = (p += len + 1);
for (; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
if (p == q)
return 0;
if (*p == '?')
for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
if (*p == '#')
for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
return !*p;
}
int str_uri_scheme(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
// Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
if (!isalpha(*p++))
return 0;
while (is_uri_char_scheme(*p))
++p;
// Scheme is followed by colon ":".
if (*p != ':')
return 0;
if (partp)
*partp = uri;
if (lenp)
*lenp = p - uri;
return 1;
}
int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != ':')
++p;
if (*p != ':')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#')
++p;
if (p == q)
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_query(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != '?')
++p;
if (*p != '?')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '#')
++p;
if (p == q || (*p && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_fragment(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != '#')
++p;
if (*p != '#')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)))
++p;
if (p == q || *p)
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp)
{
if (hier[0] != '/' || hier[1] != '/')
return 0;
const char *const q = hier + 2;
const char *p = q;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
++p;
if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp)
{
if (hier[0] != '/' || hier[1] != '/')
return 0;
const char *p = hier + 2;
while (*p && *p != '/' && *p != '?' && *p != '#')
++p;
if (!*p)
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
++p;
if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
;
if (*p != '@')
return 0;
for (p = auth; *p && *p != ':' && *p != '@'; ++p)
;
if (*p != ':')
return 0;
if (partp)
*partp = auth;
if (lenp)
*lenp = p - auth;
return 1;
}
int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
;
if (*p != '@')
return 0;
for (p = auth; *p && *p != ':' && *p != '@'; ++p)
;
if (*p != ':')
return 0;
const char *const q = ++p;
for (; *p && *p != '@'; ++p)
;
assert(*p == '@');
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
const char *q = auth;
for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
if (*p == '@')
q = p + 1;
const char *r = p;
while (r > q && isdigit(*--r))
;
if (r < p - 1 && *r == ':')
p = r;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_port(const char *auth, uint16_t *portp)
{
const char *p;
const char *q = auth;
for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
if (*p == '@')
q = p + 1;
const char *r = p;
while (r > q && isdigit(*--r))
;
if (r < p - 1 && *r == ':') {
for (++r; *r == '0'; ++r)
;
unsigned int n;
if (p - r <= 5 && (n = atoi(r)) <= USHRT_MAX) {
*portp = n;
return 1;
}
}
return 0;
}

160
str.h
View File

@ -20,10 +20,10 @@
#ifndef __SERVAL_DNA__STR_H__
#define __SERVAL_DNA__STR_H__
#include <string.h>
#include <stdint.h>
#include <sys/types.h>
#include <ctype.h>
#include <string.h> // for strcpy(), strlen() etc.
#include <stdint.h> // for uint8_t
#include <sys/types.h> // for size_t
#include <ctype.h> // for isascii(), isxdigit() etc.
#include <alloca.h>
#ifndef __SERVAL_DNA__STR_INLINE
@ -202,6 +202,18 @@ __SERVAL_DNA__STR_INLINE int is_http_token(int c) {
return is_http_char(c) && !is_http_ctl(c) && !is_http_separator(c);
}
__SERVAL_DNA__STR_INLINE int is_uri_char_scheme(int c) {
return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_SCHEME) != 0;
}
__SERVAL_DNA__STR_INLINE int is_uri_char_unreserved(int c) {
return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_UNRESERVED) != 0;
}
__SERVAL_DNA__STR_INLINE int is_uri_char_reserved(int c) {
return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_RESERVED) != 0;
}
/* Convert the given ASCII hex digit character into its radix value, eg, '0' ->
* 0, 'b' -> 11. If the argument is not an ASCII hex digit, returns -1.
*
@ -338,146 +350,6 @@ int strn_str_casecmp(const char *str1, size_t len1, const char *str2);
*/
char *str_str(char *haystack, const char *needle, size_t haystack_len);
/* -------------------- URI encoding and decoding -------------------- */
/* Encode up to 'srclen' bytes of byte data (or up to first nul if 'srclen' == -1) at 'src' into at
* most 'dstsiz' bytes of URI-encoded (or www-form-urlencoded) representation at 'dstUrienc'. If
* 'dstsiz' is -1 or 'dstUrienc' is NULL, does not write any encoded bytes, but still counts them.
* If 'afterp' is not NULL, then sets *afterp to point to the source byte immediately following the
* last character encoded. A "%xx" sequence will never be partially encoded; if all the "%xx" does
* not fit within the destination buffer, then none of it is produced.
*
*
* Returns the total number of encoded bytes written at 'dstUrienc'.
*
* Can be used to count encoded bytes without actually encoding, eg:
*
* uri_encode(NULL, -1, buf, buflen, NULL);
*
* The uri_encodev() and www_form_uri_encodev() functions are a multi-buffer gather variants,
* analagous to readv(2) and writev(2). Modifies the supplied *iovp, *iovcntp parameters and the
* iovec structures at (*iovp)[...] to represent the remaining source bytes not encoded.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp);
size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp);
struct iovec;
size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp
size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp
/* Decode up to 'srclen' bytes of URI-encoded (or www-form-urlencoded) data at 'srcUrienc' into at
* most 'dstsiz' bytes at 'dst'. If 'dstsiz' is -1 or 'dst' is NULL, then does not write any
* decoded bytes, but still counts them. If 'afterp' is not NULL, then sets *afterp to point to the
* source byte immediately following the last byte decoded.
*
* Returns the total number of decoded bytes written at 'dst'.
*
* Can be used to decode in-place, eg:
*
* uri_decode((char *)buf, n, (const unsigned char *)buf, n, NULL);
*
* Can be used to count decoded bytes without actually decoding, eg:
*
* uri_decode(NULL, -1, buf, buflen, NULL);
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp);
size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp);
/* -------------------- URI parsing -------------------- */
/* Return true if the string resembles a nul-terminated URI.
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
*
* uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ]
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri(const char *uri);
__SERVAL_DNA__STR_INLINE int is_uri_char_scheme(int c) {
return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_SCHEME) != 0;
}
__SERVAL_DNA__STR_INLINE int is_uri_char_unreserved(int c) {
return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_UNRESERVED) != 0;
}
__SERVAL_DNA__STR_INLINE int is_uri_char_reserved(int c) {
return (_serval_ctype_1[(uint8_t) c] & _SERVAL_CTYPE_1_URI_RESERVED) != 0;
}
/* Return true if the string resembles a URI scheme without the terminating colon.
* Based on RFC-3986 generic syntax.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
__SERVAL_DNA__STR_INLINE int str_is_uri_scheme(const char *scheme)
{
if (!isalpha(*scheme++))
return 0;
while (is_uri_char_scheme(*scheme))
++scheme;
return *scheme == '\0';
}
/* Pick apart a URI into its basic parts.
*
* uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ]
*
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical
* part. If the respective part is found, sets (*partp) to point to the start
* of the part within the supplied 'uri' string, sets (*lenp) to the length of
* the part substring and returns 1. Otherwise returns 0. These functions
* do not reliably validate that the string in 'uri' is a valid URI; that must
* be done by calling str_is_uri().
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_scheme(const char *uri, const char **partp, size_t *lenp);
int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp);
int str_uri_query(const char *uri, const char **partp, size_t *lenp);
int str_uri_fragment(const char *uri, const char **partp, size_t *lenp);
/* Pick apart a URI hierarchical part into its basic parts.
*
* hierarchical := "//" authority [ "/" path ]
*
* If the respective part is found, sets (*partp) to point to the start of the
* part within the supplied 'uri' string, sets (*lenp) to the length of the
* part substring and returns 1. Otherwise returns 0.
*
* These functions may be called directly on the part returned by
* str_uri_hierarchical(), even though it is not nul-terminated, because they
* treat "?" and "#" as equally valid terminators.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp);
int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp);
/* Pick apart a URI authority into its basic parts.
*
* authority := [ username ":" password "@" ] hostname [ ":" port ]
*
* If the respective part is found, sets (*partp) to point to the start of the
* part within the supplied 'uri' string, sets (*lenp) to the length of the
* part substring and returns 1. Otherwise returns 0.
*
* These functions may be called directly on the part returned by
* str_uri_hierarchical_authority(), even though it is not nul-terminated,
* because they treat "/", "?" and "#" as equally valid terminators.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_port(const char *auth, uint16_t *portp);
/* -------------------- Command-line strings -------------------- */
int parse_argv(char *cmdline, char delim, char **argv, int max_argv);

360
uri.c Normal file
View File

@ -0,0 +1,360 @@
/*
Serval URI primitives
Copyright (C) 2012-2016 Serval Project Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#include "uri.h"
#include "str.h"
#include <ctype.h>
#include <stdlib.h> // for atoi()
#include <sys/uio.h> // for iovec
#include <limits.h> // for USHRT_MAX
#include <assert.h>
static size_t _uri_encodev(int www_form, char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
char * dst = dstUrienc;
char * const dstend = dstUrienc + dstsiz;
while (*iovcntp && (dstsiz == -1 || dst < dstend)) {
if ((*iovp)->iov_len == 0) {
--*iovcntp;
++*iovp;
} else {
unsigned char c = *(unsigned char *)(*iovp)->iov_base;
if (www_form && c == ' ') {
if (dstUrienc)
*dst = '+';
++dst;
} else if (is_uri_char_unreserved(c)) {
if (dstUrienc)
*dst = c;
++dst;
} else if (dst + 3 <= dstend) {
if (dstUrienc) {
dst[0] = '%';
dst[1] = hexdigit_upper[c & 0xf];
dst[2] = hexdigit_upper[c >> 4];
}
dst += 3;
} else {
break;
}
++(*iovp)->iov_base;
--(*iovp)->iov_len;
}
}
return dst - dstUrienc;
}
static size_t _uri_encode(int www_form, char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
struct iovec _iov;
_iov.iov_base = (void *) src;
_iov.iov_len = srclen;
struct iovec *iov = &_iov;
int ioc = 1;
size_t encoded = _uri_encodev(www_form, dstUrienc, dstsiz, &iov, &ioc);
if (afterp)
*afterp = _iov.iov_base;
return encoded;
}
size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
return _uri_encode(0, dstUrienc, dstsiz, src, srclen, afterp);
}
size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
return _uri_encode(1, dstUrienc, dstsiz, src, srclen, afterp);
}
size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
return _uri_encodev(0, dstUrienc, dstsiz, iovp, iovcntp);
}
size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
return _uri_encodev(1, dstUrienc, dstsiz, iovp, iovcntp);
}
static size_t _uri_decode(int www_form, char *const dstOrig, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
char *dst = dstOrig;
char *const dstend = dst + dstsiz;
while (srclen && (dstsiz == -1 || dst < dstend)) {
if (www_form && *srcUrienc == '+') {
if (dstOrig)
*dst = ' ';
++srcUrienc;
--srclen;
} else if (srclen >= 3 && srcUrienc[0] == '%' && isxdigit(srcUrienc[1]) && isxdigit(srcUrienc[2])) {
if (dstOrig)
*dst = (hexvalue(srcUrienc[1]) << 4) + hexvalue(srcUrienc[2]);
srcUrienc += 3;
srclen -= 3;
} else {
if (dstOrig)
*dst = *srcUrienc;
++srcUrienc;
--srclen;
}
++dst;
}
if (afterp)
*afterp = srcUrienc;
return dst - dstOrig;
}
size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
return _uri_decode(0, dst, dstsiz, srcUrienc, srclen, afterp);
}
size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
return _uri_decode(1, dst, dstsiz, srcUrienc, srclen, afterp);
}
/* Return true if the string resembles a URI.
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri(const char *uri)
{
const char *p;
size_t len;
if (!str_uri_scheme(uri, &p, &len))
return 0;
const char *const q = (p += len + 1);
for (; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
if (p == q)
return 0;
if (*p == '?')
for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
if (*p == '#')
for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
return !*p;
}
int str_is_uri_scheme(const char *scheme)
{
if (!isalpha(*scheme++))
return 0;
while (is_uri_char_scheme(*scheme))
++scheme;
return *scheme == '\0';
}
int str_uri_scheme(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
// Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
if (!isalpha(*p++))
return 0;
while (is_uri_char_scheme(*p))
++p;
// Scheme is followed by colon ":".
if (*p != ':')
return 0;
if (partp)
*partp = uri;
if (lenp)
*lenp = p - uri;
return 1;
}
int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != ':')
++p;
if (*p != ':')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#')
++p;
if (p == q)
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_query(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != '?')
++p;
if (*p != '?')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '#')
++p;
if (p == q || (*p && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_fragment(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != '#')
++p;
if (*p != '#')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)))
++p;
if (p == q || *p)
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp)
{
if (hier[0] != '/' || hier[1] != '/')
return 0;
const char *const q = hier + 2;
const char *p = q;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
++p;
if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp)
{
if (hier[0] != '/' || hier[1] != '/')
return 0;
const char *p = hier + 2;
while (*p && *p != '/' && *p != '?' && *p != '#')
++p;
if (!*p)
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
++p;
if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
;
if (*p != '@')
return 0;
for (p = auth; *p && *p != ':' && *p != '@'; ++p)
;
if (*p != ':')
return 0;
if (partp)
*partp = auth;
if (lenp)
*lenp = p - auth;
return 1;
}
int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
;
if (*p != '@')
return 0;
for (p = auth; *p && *p != ':' && *p != '@'; ++p)
;
if (*p != ':')
return 0;
const char *const q = ++p;
for (; *p && *p != '@'; ++p)
;
assert(*p == '@');
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
const char *q = auth;
for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
if (*p == '@')
q = p + 1;
const char *r = p;
while (r > q && isdigit(*--r))
;
if (r < p - 1 && *r == ':')
p = r;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_port(const char *auth, uint16_t *portp)
{
const char *p;
const char *q = auth;
for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
if (*p == '@')
q = p + 1;
const char *r = p;
while (r > q && isdigit(*--r))
;
if (r < p - 1 && *r == ':') {
for (++r; *r == '0'; ++r)
;
unsigned int n;
if (p - r <= 5 && (n = atoi(r)) <= USHRT_MAX) {
*portp = n;
return 1;
}
}
return 0;
}

146
uri.h Normal file
View File

@ -0,0 +1,146 @@
/*
Serval URI primitives
Copyright (C) 2012-2016 Serval Project Inc.
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
#ifndef __SERVAL_DNA__URI_H__
#define __SERVAL_DNA__URI_H__
#include <stdint.h> // for uint16_t
#include <sys/types.h> // for size_t
/* -------------------- URI encoding and decoding -------------------- */
/* Encode up to 'srclen' bytes of byte data (or up to first nul if 'srclen' == -1) at 'src' into at
* most 'dstsiz' bytes of URI-encoded (or www-form-urlencoded) representation at 'dstUrienc'. If
* 'dstsiz' is -1 or 'dstUrienc' is NULL, does not write any encoded bytes, but still counts them.
* If 'afterp' is not NULL, then sets *afterp to point to the source byte immediately following the
* last character encoded. A "%xx" sequence will never be partially encoded; if all the "%xx" does
* not fit within the destination buffer, then none of it is produced.
*
* Returns the total number of encoded bytes written at 'dstUrienc'.
*
* Can be used to count encoded bytes without actually encoding, eg:
*
* uri_encode(NULL, -1, buf, buflen, NULL);
*
* The uri_encodev() and www_form_uri_encodev() functions are a multi-buffer gather variants,
* analagous to readv(2) and writev(2). Modifies the supplied *iovp, *iovcntp parameters and the
* iovec structures at (*iovp)[...] to represent the remaining source bytes not encoded.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp);
size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp);
struct iovec;
size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp
size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec **iovp, int *iovcntp); // modifies *iovp, (*iovp)[...] and *iovcntp
/* Decode up to 'srclen' bytes of URI-encoded (or www-form-urlencoded) data at 'srcUrienc' into at
* most 'dstsiz' bytes at 'dst'. If 'dstsiz' is -1 or 'dst' is NULL, then does not write any
* decoded bytes, but still counts them. If 'afterp' is not NULL, then sets *afterp to point to the
* source byte immediately following the last byte decoded.
*
* Returns the total number of decoded bytes written at 'dst'.
*
* Can be used to decode in-place, eg:
*
* uri_decode((char *)buf, n, (const unsigned char *)buf, n, NULL);
*
* Can be used to count decoded bytes without actually decoding, eg:
*
* uri_decode(NULL, -1, buf, buflen, NULL);
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp);
size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp);
/* -------------------- URI parsing -------------------- */
/* Return true if the string resembles a nul-terminated URI.
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
*
* uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ]
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri(const char *uri);
/* Return true if the string resembles a URI scheme without the terminating colon.
* Based on RFC-3986 generic syntax.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri_scheme(const char *scheme);
/* Pick apart a URI into its basic parts.
*
* uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ]
*
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical
* part. If the respective part is found, sets (*partp) to point to the start
* of the part within the supplied 'uri' string, sets (*lenp) to the length of
* the part substring and returns 1. Otherwise returns 0. These functions
* do not reliably validate that the string in 'uri' is a valid URI; that must
* be done by calling str_is_uri().
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_scheme(const char *uri, const char **partp, size_t *lenp);
int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp);
int str_uri_query(const char *uri, const char **partp, size_t *lenp);
int str_uri_fragment(const char *uri, const char **partp, size_t *lenp);
/* Pick apart a URI hierarchical part into its basic parts.
*
* hierarchical := "//" authority [ "/" path ]
*
* If the respective part is found, sets (*partp) to point to the start of the
* part within the supplied 'uri' string, sets (*lenp) to the length of the
* part substring and returns 1. Otherwise returns 0.
*
* These functions may be called directly on the part returned by
* str_uri_hierarchical(), even though it is not nul-terminated, because they
* treat "?" and "#" as equally valid terminators.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp);
int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp);
/* Pick apart a URI authority into its basic parts.
*
* authority := [ username ":" password "@" ] hostname [ ":" port ]
*
* If the respective part is found, sets (*partp) to point to the start of the
* part within the supplied 'uri' string, sets (*lenp) to the length of the
* part substring and returns 1. Otherwise returns 0.
*
* These functions may be called directly on the part returned by
* str_uri_hierarchical_authority(), even though it is not nul-terminated,
* because they treat "/", "?" and "#" as equally valid terminators.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_port(const char *auth, uint16_t *portp);
#endif // __SERVAL_DNA__URI_H__