/*
 Serval URI primitives
 Copyright (C) 2015 Serval Project Inc.
 
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
 
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 */

#include "uri.h"
#include "str.h"
#include <ctype.h>
#include <stdlib.h>  // for atoi()
#include <sys/uio.h> // for iovec
#include <limits.h>  // for USHRT_MAX
#include <assert.h>

static size_t _uri_encodev(int www_form, char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
  char * dst = dstUrienc;
  char * const dstend = dstUrienc + dstsiz;
  uint8_t *ptr = (uint8_t *)(*iovp)->iov_base;
  while (*iovcntp && (dstsiz == -1 || dst < dstend)) {
    if ((*iovp)->iov_len == 0) {
      --*iovcntp;
      ++*iovp;
    } else {
      uint8_t c = *ptr;
      if (www_form && c == ' ') {
	if (dstUrienc)
	  *dst = '+';
	++dst;
      } else if (is_uri_char_unreserved(c)) {
	if (dstUrienc)
	  *dst = c;
	++dst;
      } else if (dst + 3 <= dstend) {
	if (dstUrienc) {
	  dst[0] = '%';
	  dst[1] = hexdigit_upper[c & 0xf];
	  dst[2] = hexdigit_upper[c >> 4];
	}
	dst += 3;
      } else {
	break;
      }
      --(*iovp)->iov_len;
    }
  }
  (*iovp)->iov_base = ptr;
  return dst - dstUrienc;
}

static size_t _uri_encode(int www_form, char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
  struct iovec _iov;
  _iov.iov_base = (void *) src;
  _iov.iov_len = srclen;
  struct iovec *iov = &_iov;
  int ioc = 1;
  size_t encoded = _uri_encodev(www_form, dstUrienc, dstsiz, &iov, &ioc);
  if (afterp)
    *afterp = _iov.iov_base;
  return encoded;
}

size_t uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
  return _uri_encode(0, dstUrienc, dstsiz, src, srclen, afterp);
}

size_t www_form_uri_encode(char *const dstUrienc, ssize_t dstsiz, const char *src, size_t srclen, const char **afterp)
{
  return _uri_encode(1, dstUrienc, dstsiz, src, srclen, afterp);
}

size_t uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
  return _uri_encodev(0, dstUrienc, dstsiz, iovp, iovcntp);
}

size_t www_form_uri_encodev(char *const dstUrienc, ssize_t dstsiz, struct iovec ** iovp, int *iovcntp)
{
  return _uri_encodev(1, dstUrienc, dstsiz, iovp, iovcntp);
}

static size_t _uri_decode(int www_form, char *const dstOrig, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
  char *dst = dstOrig;
  char *const dstend = dst + dstsiz;
  while (srclen && (dstsiz == -1 || dst < dstend)) {
    if (www_form && *srcUrienc == '+') {
      if (dstOrig)
	*dst = ' ';
      ++srcUrienc;
      --srclen;
    } else if (srclen >= 3 && srcUrienc[0] == '%' && isxdigit(srcUrienc[1]) && isxdigit(srcUrienc[2])) {
      if (dstOrig)
	*dst = (hexvalue(srcUrienc[1]) << 4) + hexvalue(srcUrienc[2]);
      srcUrienc += 3;
      srclen -= 3;
    } else {
      if (dstOrig)
	*dst = *srcUrienc;
      ++srcUrienc;
      --srclen;
    }
    ++dst;
  }
  if (afterp)
    *afterp = srcUrienc;
  return dst - dstOrig;
}

size_t uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
  return _uri_decode(0, dst, dstsiz, srcUrienc, srclen, afterp);
}

size_t www_form_uri_decode(char *const dst, ssize_t dstsiz, const char *srcUrienc, size_t srclen, const char **afterp)
{
  return _uri_decode(1, dst, dstsiz, srcUrienc, srclen, afterp);
}

/* Return true if the string resembles a URI.
 * Based on RFC-3986 generic syntax, assuming nothing about the hierarchical part.
 *
 * @author Andrew Bettison <andrew@servalproject.com>
 */
int str_is_uri(const char *uri)
{
  const char *p;
  size_t len;
  if (!str_uri_scheme(uri, &p, &len))
    return 0;
  const char *const q = (p += len + 1);
  for (; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
    ;
  if (p == q)
    return 0;
  if (*p == '?')
    for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
      ;
  if (*p == '#')
    for (++p; *p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#'; ++p)
      ;
  return !*p;
}

int str_is_uri_scheme(const char *scheme)
{
  if (!isalpha(*scheme++))
    return 0;
  while (is_uri_char_scheme(*scheme))
    ++scheme;
  return *scheme == '\0';
}

int str_uri_scheme(const char *uri, const char **partp, size_t *lenp)
{
  const char *p = uri;
  // Scheme is ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
  if (!isalpha(*p++))
    return 0;
  while (is_uri_char_scheme(*p))
    ++p;
  // Scheme is followed by colon ":".
  if (*p != ':')
    return 0;
  if (partp)
    *partp = uri;
  if (lenp)
    *lenp = p - uri;
  return 1;
}

int str_uri_hierarchical(const char *uri, const char **partp, size_t *lenp)
{
  const char *p = uri;
  while (*p && *p != ':')
    ++p;
  if (*p != ':')
    return 0;
  const char *const q = ++p;
  while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '?' && *p != '#')
    ++p;
  if (p == q)
    return 0;
  if (partp)
    *partp = q;
  if (lenp)
    *lenp = p - q;
  return 1;
}

int str_uri_query(const char *uri, const char **partp, size_t *lenp)
{
  const char *p = uri;
  while (*p && *p != '?')
    ++p;
  if (*p != '?')
    return 0;
  const char *const q = ++p;
  while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '#')
    ++p;
  if (p == q || (*p && *p != '#'))
    return 0;
  if (partp)
    *partp = q;
  if (lenp)
    *lenp = p - q;
  return 1;
}

int str_uri_fragment(const char *uri, const char **partp, size_t *lenp)
{
  const char *p = uri;
  while (*p && *p != '#')
    ++p;
  if (*p != '#')
    return 0;
  const char *const q = ++p;
  while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)))
    ++p;
  if (p == q || *p)
    return 0;
  if (partp)
    *partp = q;
  if (lenp)
    *lenp = p - q;
  return 1;
}

int str_uri_hierarchical_authority(const char *hier, const char **partp, size_t *lenp)
{
  if (hier[0] != '/' || hier[1] != '/')
    return 0;
  const char *const q = hier + 2;
  const char *p = q;
  while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
    ++p;
  if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
    return 0;
  if (partp)
    *partp = q;
  if (lenp)
    *lenp = p - q;
  return 1;
}

int str_uri_hierarchical_path(const char *hier, const char **partp, size_t *lenp)
{
  if (hier[0] != '/' || hier[1] != '/')
    return 0;
  const char *p = hier + 2;
  while (*p && *p != '/' && *p != '?' && *p != '#')
    ++p;
  if (!*p)
    return 0;
  const char *const q = ++p;
  while (*p && (is_uri_char_unreserved(*p) || is_uri_char_reserved(*p)) && *p != '/' && *p != '?' && *p != '#')
    ++p;
  if (p == q || (*p && *p != '/' && *p != '?' && *p != '#'))
    return 0;
  if (partp)
    *partp = q;
  if (lenp)
    *lenp = p - q;
  return 1;
}

int str_uri_authority_username(const char *auth, const char **partp, size_t *lenp)
{
  const char *p;
  for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
      ;
  if (*p != '@')
    return 0;
  for (p = auth; *p && *p != ':' && *p != '@'; ++p)
    ;
  if (*p != ':')
    return 0;
  if (partp)
    *partp = auth;
  if (lenp)
    *lenp = p - auth;
  return 1;
}

int str_uri_authority_password(const char *auth, const char **partp, size_t *lenp)
{
  const char *p;
  for (p = auth; *p && *p != '@' && *p != '/' && *p != '?' && *p != '#'; ++p)
      ;
  if (*p != '@')
    return 0;
  for (p = auth; *p && *p != ':' && *p != '@'; ++p)
    ;
  if (*p != ':')
    return 0;
  const char *const q = ++p;
  for (; *p && *p != '@'; ++p)
    ;
  assert(*p == '@');
  if (partp)
    *partp = q;
  if (lenp)
    *lenp = p - q;
  return 1;
}

int str_uri_authority_hostname(const char *auth, const char **partp, size_t *lenp)
{
  const char *p;
  const char *q = auth;
  for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
      if (*p == '@')
	q = p + 1;
  const char *r = p;
  while (r > q && isdigit(*--r))
    ;
  if (r < p - 1 && *r == ':')
    p = r;
  if (partp)
    *partp = q;
  if (lenp)
    *lenp = p - q;
  return 1;
}

int str_uri_authority_port(const char *auth, uint16_t *portp)
{
  const char *p;
  const char *q = auth;
  for (p = auth; *p && *p != '/' && *p != '?' && *p != '#'; ++p)
      if (*p == '@')
	q = p + 1;
  const char *r = p;
  while (r > q && isdigit(*--r))
    ;
  if (r < p - 1 && *r == ':') {
    for (++r; *r == '0'; ++r)
      ;
    unsigned int n;
    if (p - r <= 5 && (n = atoi(r)) <= USHRT_MAX) {
      *portp = n;
      return 1;
    }
  }
  return 0;
}