Move str_is_uri() from dataformats.c to str.c

Add lots of URI unpacking functions as well.
This commit is contained in:
Andrew Bettison 2012-11-21 18:08:06 +10:30
parent f78098afd8
commit 960a6293e1
4 changed files with 305 additions and 44 deletions

View File

@ -268,46 +268,3 @@ int safeZeroField(unsigned char *packet,int start,int count)
return 0;
}
int is_uri_char_scheme(char c)
{
return isalpha(c) || isdigit(c) || c == '+' || c == '-' || c == '.';
}
int is_uri_char_unreserved(char c)
{
return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~';
}
int is_uri_char_reserved(char c)
{
switch (c) {
case ':': case '/': case '?': case '#': case '[': case ']': case '@':
case '!': case '$': case '&': case '\'': case '(': case ')':
case '*': case '+': case ',': case ';': case '=':
return 1;
}
return 0;
}
/* Return true if the string resembles a URI.
Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
@author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri(const char *uri)
{
const char *p = uri;
// Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
if (!isalpha(*p++))
return 0;
while (is_uri_char_scheme(*p))
++p;
// Scheme is followed by colon ":".
if (*p++ != ':')
return 0;
// Hierarchical part must contain only valid characters.
const char *q = p;
while (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p))
++p;
return p != q && *p == '\0';
}

View File

@ -439,7 +439,6 @@ int str_is_subscriber_id(const char *sid);
int strn_is_subscriber_id(const char *sid, size_t *lenp);
int str_is_did(const char *did);
int strn_is_did(const char *did, size_t *lenp);
int str_is_uri(const char *uri);
int stowSid(unsigned char *packet, int ofs, const char *sid);
void srandomdev();

242
str.c
View File

@ -25,6 +25,8 @@
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <limits.h>
char hexdigit[16] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'};
@ -262,3 +264,243 @@ size_t str_fromprint(unsigned char *dst, const char *src)
}
return dst - odst;
}
int is_uri_char_scheme(char c)
{
return isalpha(c) || isdigit(c) || c == '+' || c == '-' || c == '.';
}
int is_uri_char_unreserved(char c)
{
return isalpha(c) || isdigit(c) || c == '-' || c == '.' || c == '_' || c == '~';
}
int is_uri_char_reserved(char c)
{
switch (c) {
case ':': case '/': case '?': case '#': case '[': case ']': case '@':
case '!': case '$': case '&': case '\'': case '(': case ')':
case '*': case '+': case ',': case ';': case '=':
return 1;
}
return 0;
}
/* Return true if the string resembles a URI.
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri(const char *uri)
{
const char *p;
size_t len;
if (!str_uri_scheme(uri, &p, &len))
return 0;
const char *const q = (p += len + 1);
for (; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
if (p == q)
return 0;
if (*p == '?')
for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
if (*p == '#')
for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
;
return !*p;
}
int str_uri_scheme(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
// Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
if (!isalpha(*p++))
return 0;
while (is_uri_char_scheme(*p))
++p;
// Scheme is followed by colon ":".
if (*p != ':')
return 0;
if (partp)
*partp = uri;
if (lenp)
*lenp = p - uri;
return 1;
}
int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != ':')
++p;
if (*p != ':')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#')
++p;
if (p == q)
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_query(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != '?')
++p;
if (*p != '?')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '#')
++p;
if (p == q || (*p && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_fragment(const char *uri, const char **partp, size_t *lenp)
{
const char *p = uri;
while (*p && *p != '#')
++p;
if (*p != '#')
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)))
++p;
if (p == q || *p)
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp)
{
if (hier[0] != '/' || hier[1] != '/')
return 0;
const char *const q = hier + 2;
const char *p = q;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
++p;
if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp)
{
if (hier[0] != '/' || hier[1] != '/')
return 0;
const char *p = hier + 2;
while (*p && *p != '/' && *p != '?' && *p != '#')
++p;
if (!*p)
return 0;
const char *const q = ++p;
while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
++p;
if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
return 0;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
;
if (*p != '@')
return 0;
for (p = auth; *p && *p != ':' && *p != '@'; ++p)
;
if (*p != ':')
return 0;
if (partp)
*partp = auth;
if (lenp)
*lenp = p - auth;
return 1;
}
int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
;
if (*p != '@')
return 0;
for (p = auth; *p && *p != ':' && *p != '@'; ++p)
;
if (*p != ':')
return 0;
const char *const q = ++p;
for (; *p && *p != '@'; ++p)
;
assert(*p == '@');
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp)
{
const char *p;
const char *q = auth;
for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
if (*p == '@')
q = p + 1;
const char *r = p;
while (r > q && isdigit(*--r))
;
if (r < p - 1 && *r == ':')
q = r;
if (partp)
*partp = q;
if (lenp)
*lenp = p - q;
return 1;
}
int str_uri_authority_port(const char *auth, unsigned short *portp)
{
const char *p;
const char *q = auth;
for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
if (*p == '@')
q = p + 1;
const char *r = p;
while (r > q && isdigit(*--r))
;
if (r < p - 1 && *r == ':') {
for (++r; *r == '0'; ++r)
;
int n;
if (p - r <= 5 && (n = atoi(r)) <= USHRT_MAX) {
*portp = n;
return 1;
}
}
return 0;
}

63
str.h
View File

@ -138,6 +138,69 @@ char *str_str(char *haystack, const char *needle, int haystack_len);
*/
int str_to_ll_scaled(const char *str, int base, long long *result, const char **afterp);
/* Return true if the string resembles a nul-terminated URI.
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
*
* uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ]
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_is_uri(const char *uri);
/* Pick apart a URI into its basic parts.
*
* uri := scheme ":" hierarchical [ "?" query ] [ "#" fragment ]
*
* Based on RFC-3986 generic syntax, assuming nothing about the hierarchical
* part. If the respective part is found, sets (*partp) to point to the start
* of the part within the supplied 'uri' string, sets (*lenp) to the length of
* the part substring and returns 1. Otherwise returns 0. These functions
* do not reliably validate that the string in 'uri' is a valid URI; that must
* be done by calling str_is_uri().
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_scheme(const char *uri, const char **partp, size_t *lenp);
int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp);
int str_uri_query(const char *uri, const char **partp, size_t *lenp);
int str_uri_fragment(const char *uri, const char **partp, size_t *lenp);
/* Pick apart a URI hierarchical part into its basic parts.
*
* hierarchical := "//" authority [ "/" path ]
*
* If the respective part is found, sets (*partp) to point to the start of the
* part within the supplied 'uri' string, sets (*lenp) to the length of the
* part substring and returns 1. Otherwise returns 0.
*
* These functions may be called directly on the part returned by
* str_uri_hierarchical(), even though it is not nul-terminated, because they
* treat "?" and "#" as equally valid terminators.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp);
int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp);
/* Pick apart a URI authority into its basic parts.
*
* authority := [ username ":" password "@" ] hostname [ ":" port ]
*
* If the respective part is found, sets (*partp) to point to the start of the
* part within the supplied 'uri' string, sets (*lenp) to the length of the
* part substring and returns 1. Otherwise returns 0.
*
* These functions may be called directly on the part returned by
* str_uri_hierarchical_authority(), even though it is not nul-terminated,
* because they treat "/", "?" and "#" as equally valid terminators.
*
* @author Andrew Bettison <andrew@servalproject.com>
*/
int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp);
int str_uri_authority_port(const char *auth, unsigned short *portp);
int parse_argv(char *cmdline, char delim, char **argv, int max_argv);