mirror of
https://github.com/servalproject/serval-dna.git
synced 2024-12-19 05:07:56 +00:00
d796a482b7
code from general rhizome direct BAR buffer gathering and processing.
479 lines
17 KiB
C
479 lines
17 KiB
C
/*
|
|
Serval Mesh Software
|
|
Copyright (C) 2010-2012 Paul Gardner-Stephen
|
|
|
|
This program is free software; you can redistribute it and/or
|
|
modify it under the terms of the GNU General Public License
|
|
as published by the Free Software Foundation; either version 2
|
|
of the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; if not, write to the Free Software
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "serval.h"
|
|
#include "rhizome.h"
|
|
#include "str.h"
|
|
#include <assert.h>
|
|
|
|
|
|
int rhizome_direct_clear_temporary_files(rhizome_http_request *r)
|
|
{
|
|
char filename[1024];
|
|
char *fields[]={"manifest","data","unknown",NULL};
|
|
int i;
|
|
|
|
for(i=0;fields[i];i++) {
|
|
snprintf(filename,1024,"rhizomedirect.%d.%s",r->alarm.poll.fd,fields[i]);
|
|
filename[1023]=0;
|
|
DEBUGF("Unlinking '%s'",filename);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int rhizome_direct_form_received(rhizome_http_request *r)
|
|
{
|
|
/* XXX This needs to be implemented.
|
|
For now we just put out a "no content" response that makes testing convenient
|
|
*/
|
|
|
|
/* XXX process completed form based on the set of fields seen */
|
|
switch(r->fields_seen) {
|
|
case RD_MIME_STATE_MANIFESTHEADERS
|
|
|RD_MIME_STATE_DATAHEADERS:
|
|
/* A bundle to import */
|
|
DEBUGF("Call bundle import for rhizomedata.%d.{data,file}",
|
|
r->alarm.poll.fd);
|
|
char cmd[1024];
|
|
snprintf(cmd,1024,
|
|
"servald rhizome import bundle rhizomedirect.%d.data rhizomedirect.%d.manifest",
|
|
r->alarm.poll.fd,r->alarm.poll.fd);
|
|
cmd[1023]=0;
|
|
int rv=system(cmd);
|
|
int status=-1;
|
|
|
|
if (rv!=-1) status=WEXITSTATUS(rv);
|
|
|
|
DEBUGF("Import returned %d",status);
|
|
|
|
/* clean up after ourselves */
|
|
rhizome_direct_clear_temporary_files(r);
|
|
/* and report back to caller.
|
|
201 = content created, which is probably appropriate for when we successfully
|
|
import a bundle (or if we already have it).
|
|
403 = forbidden, which might be appropriate if we refuse to accept it, e.g.,
|
|
the import fails due to malformed data etc.
|
|
|
|
For now we are just returning "no content" as a place-holder while debugging.
|
|
*/
|
|
rhizome_server_simple_http_response(r, 204, "Move along. Nothing to see.");
|
|
break;
|
|
default:
|
|
/* Clean up after ourselves */
|
|
rhizome_direct_clear_temporary_files(r);
|
|
|
|
|
|
}
|
|
|
|
return rhizome_server_simple_http_response(r, 204, "Move along. Nothing to see.");
|
|
|
|
}
|
|
|
|
|
|
int rhizome_direct_process_mime_line(rhizome_http_request *r,char *buffer,int count)
|
|
{
|
|
/* Check for boundary line at start of buffer.
|
|
Boundary line = CRLF + "--" + boundary_string + optional whitespace + CRLF
|
|
EXCEPT end of form boundary, which is:
|
|
CRLF + "--" + boundary_string + "--" + CRLF
|
|
|
|
NOTE: We attach the "--" to boundary_string when setting things up so that
|
|
we don't have to keep manually checking for it here.
|
|
|
|
NOTE: The parser eats the CRLF from the front, and attaches it to the end
|
|
of the previous line. This means we need to rewind 2 bytes from whatever
|
|
file we were writing to whenever we encounter a boundary line, at least
|
|
if those last two bytes were CRLF. That can be safely assumed if we
|
|
assume that the boundary string has been chosen to be a string never appearing
|
|
anywhere in the contents of the form. In practice, that is only "almost
|
|
certain" (according to the mathematical meaning of that phrase) if boundary
|
|
strings are randomly selected and are of sufficient length.
|
|
|
|
NOTE: We are not supporting nested/mixed parts, as that would considerably
|
|
complicate the parser. If the need arises in future, we will deal with it
|
|
then. In the meantime, we will have something that meets our immediate
|
|
needs for Rhizome Direct and a variety of use cases.
|
|
*/
|
|
|
|
/* Regardless of the state of the parser, the presence of boundary lines
|
|
is significant, so lets just check once, and remember the result.
|
|
Similarly check a few other conditions. */
|
|
int boundaryLine=0;
|
|
if (!bcmp(buffer,r->boundary_string,r->boundary_string_length))
|
|
boundaryLine=1;
|
|
|
|
int endOfForm=0;
|
|
if (boundaryLine&&
|
|
buffer[r->boundary_string_length]=='-'&&
|
|
buffer[r->boundary_string_length+1]=='-')
|
|
endOfForm=1;
|
|
int blankLine=0;
|
|
if (!strcmp(buffer,"\r\n")) blankLine=1;
|
|
|
|
DEBUGF("mime state: 0x%x, blankLine=%d, boundary=%d, EOF=%d, bytes=%d",
|
|
r->source_flags,blankLine,boundaryLine,endOfForm,count);
|
|
switch(r->source_flags) {
|
|
case RD_MIME_STATE_INITIAL:
|
|
if (boundaryLine) r->source_flags=RD_MIME_STATE_PARTHEADERS;
|
|
break;
|
|
case RD_MIME_STATE_PARTHEADERS:
|
|
case RD_MIME_STATE_MANIFESTHEADERS:
|
|
case RD_MIME_STATE_DATAHEADERS:
|
|
DEBUGF("mime line: %s",r->request);
|
|
if (blankLine) {
|
|
/* End of headers */
|
|
if (r->source_flags==RD_MIME_STATE_PARTHEADERS)
|
|
{
|
|
/* Multiple content-disposition lines. This is very naughty. */
|
|
rhizome_server_simple_http_response
|
|
(r, 400, "<html><h1>Malformed multi-part form POST: Missing content-disposition lines in MIME encoded part.</h1></html>\r\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Prepare to write to file for field.
|
|
We may have multiple rhizome direct transactions running at the same
|
|
time on different TCP connections. So serialise using file descriptor.
|
|
We could use the boundary string or some other random thing, but using
|
|
the file descriptor places a reasonable upper limit on the clutter that
|
|
is possible, while still preventing collisions -- provided that we don't
|
|
close the file descriptor until we have completed processing the
|
|
request. */
|
|
r->field_file=NULL;
|
|
char filename[1024];
|
|
char *field="unknown";
|
|
switch(r->source_flags) {
|
|
case RD_MIME_STATE_DATAHEADERS: field="data"; break;
|
|
case RD_MIME_STATE_MANIFESTHEADERS: field="manifest"; break;
|
|
}
|
|
snprintf(filename,1024,"rhizomedirect.%d.%s",r->alarm.poll.fd,field);
|
|
filename[1023]=0;
|
|
DEBUGF("Writing to '%s'",filename);
|
|
r->field_file=fopen(filename,"w");
|
|
if (!r->field_file) {
|
|
rhizome_direct_clear_temporary_files(r);
|
|
rhizome_server_simple_http_response
|
|
(r, 500, "<html><h1>Sorry, couldn't complete your request, reasonable as it was. Perhaps try again later.</h1></html>\r\n");
|
|
return -1;
|
|
}
|
|
r->source_flags=RD_MIME_STATE_BODY;
|
|
} else {
|
|
char name[1024];
|
|
char field[1024];
|
|
if (sscanf(buffer,
|
|
"Content-Disposition: form-data; name=\"%[^\"]\";"
|
|
" filename=\"%[^\"]\"",field,name)==2)
|
|
{
|
|
if (r->source_flags!=RD_MIME_STATE_PARTHEADERS)
|
|
{
|
|
/* Multiple content-disposition lines. This is very naughty. */
|
|
rhizome_server_simple_http_response
|
|
(r, 400, "<html><h1>Malformed multi-part form POST: Multiple content-disposition lines in single MIME encoded part.</h1></html>\r\n");
|
|
return -1;
|
|
}
|
|
DEBUGF("Found form part '%s' name '%s'",field,name);
|
|
if (!strcasecmp(field,"manifest"))
|
|
r->source_flags=RD_MIME_STATE_MANIFESTHEADERS;
|
|
if (!strcasecmp(field,"data"))
|
|
r->source_flags=RD_MIME_STATE_DATAHEADERS;
|
|
if (r->source_flags!=RD_MIME_STATE_PARTHEADERS)
|
|
r->fields_seen|=r->source_flags;
|
|
}
|
|
}
|
|
break;
|
|
case RD_MIME_STATE_BODY:
|
|
if (boundaryLine) {
|
|
r->source_flags=RD_MIME_STATE_PARTHEADERS;
|
|
|
|
/* We will have written an extra CRLF to the end of the file,
|
|
so prune that off. */
|
|
fflush(r->field_file);
|
|
int fd=fileno(r->field_file);
|
|
off_t correct_size=ftell(r->field_file)-2;
|
|
ftruncate(fd,correct_size);
|
|
fclose(r->field_file);
|
|
r->field_file=NULL;
|
|
}
|
|
else {
|
|
int written=fwrite(r->request,count,1,r->field_file);
|
|
DEBUGF("wrote %d lump of %d bytes",written,count);
|
|
}
|
|
break;
|
|
}
|
|
|
|
if (endOfForm) {
|
|
/* End of form marker found.
|
|
Pass it to function that deals with what has been received,
|
|
and will also send response or close the http request if required. */
|
|
|
|
/* XXX Rewind last two bytes from file if open, and close file */
|
|
|
|
DEBUGF("Found end of form");
|
|
return rhizome_direct_form_received(r);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int rhizome_direct_process_post_multipart_bytes
|
|
(rhizome_http_request *r,const char *bytes,int count)
|
|
{
|
|
{
|
|
DEBUGF("Saw %d multi-part form bytes",count);
|
|
FILE *f=fopen("post.log","a");
|
|
if (f) fwrite(bytes,count,1,f);
|
|
if (f) fclose(f);
|
|
}
|
|
|
|
/* This function looks for multi-part form separators and descriptor lines,
|
|
and streams any "manifest" or "data" blocks to respectively named files.
|
|
|
|
The challenge is that we might only get a partial boundary string passed
|
|
to us. So we need to remember the last KB or so of data and glue it to
|
|
the front of the current set of bytes.
|
|
|
|
In multi-part form parsing we don't need r->request for anything, so if
|
|
we are not in a form part already, then we can stow the bytes there
|
|
for reexamination when more bytes arrive.
|
|
|
|
Side effect will be that the entire boundary string and associated bits will
|
|
need to be <=1KB, the size of r->request. This seems quite reasonable.
|
|
|
|
Example of such a block is:
|
|
|
|
------WebKitFormBoundaryEoJwSoSVW4qsrBZW
|
|
Content-Disposition: form-data; name="manifest"; filename="spleen"
|
|
Content-Type: application/octet-stream
|
|
*/
|
|
|
|
int o;
|
|
|
|
/* Split into lines and process each line separately using a
|
|
simple state machine.
|
|
Lines containing binary are truncated into arbitrarily length pieces, but
|
|
a newline will ALWAYS break the line.
|
|
*/
|
|
|
|
for(o=0;o<count;o++)
|
|
{
|
|
int newline=0;
|
|
if (bytes[o]=='\n')
|
|
if (r->request_length>0&&r->request[r->request_length-1]=='\r')
|
|
{ newline=1; r->request_length--; }
|
|
if (r->request_length>1020) newline=2;
|
|
if (newline) {
|
|
/* Found end of line, so process it */
|
|
if (newline==1) {
|
|
/* Put the real new line onto the end if it was present, so that
|
|
we don't go doing anything silly, like joining lines in files
|
|
that really were separated by CRLF, or similarly inserting CRLF
|
|
in the middle of slabs of bytes that were not CRLF terminated.
|
|
*/
|
|
r->request[r->request_length++]='\r';
|
|
r->request[r->request_length++]='\n';
|
|
}
|
|
r->request[r->request_length]=0;
|
|
if (rhizome_direct_process_mime_line(r,r->request,r->request_length))
|
|
return -1;
|
|
r->request_length=0;
|
|
/* If a real new line was detected, then
|
|
don't include the \n as part of the next line.
|
|
But if it wasn't a real new line, then make sure we
|
|
don't loose the byte. */
|
|
if (newline==1) continue;
|
|
}
|
|
|
|
r->request[r->request_length++]=bytes[o];
|
|
}
|
|
|
|
r->source_count-=count;
|
|
if (r->source_count<=0) {
|
|
DEBUGF("Got to end of multi-part form data");
|
|
|
|
/* If the form is still being processed, then flush things through */
|
|
if (r->request_type<0) {
|
|
/* Flush out any remaining data */
|
|
if (r->request_length) {
|
|
DEBUGF("Flushing last %d bytes",r->request_length);
|
|
r->request[r->request_length]=0;
|
|
rhizome_direct_process_mime_line(r,r->request,r->request_length);
|
|
}
|
|
return rhizome_direct_form_received(r);
|
|
} else {
|
|
/* Form has already been processed, so do nothing */
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
int rhizome_direct_parse_http_request(rhizome_http_request *r)
|
|
{
|
|
/* Switching to writing, so update the call-back */
|
|
r->alarm.poll.events=POLLOUT;
|
|
watch(&r->alarm);
|
|
// Start building up a response.
|
|
r->request_type = 0;
|
|
// Parse the HTTP "GET" line.
|
|
char *path = NULL;
|
|
size_t pathlen = 0;
|
|
if (str_startswith(r->request, "GET ", &path)) {
|
|
char *p;
|
|
// This loop is guaranteed to terminate before the end of the buffer, because we know that the
|
|
// buffer contains at least "\n\n" and maybe "\r\n\r\n" at the end of the header block.
|
|
for (p = path; !isspace(*p); ++p)
|
|
;
|
|
pathlen = p - path;
|
|
if ( str_startswith(p, " HTTP/1.", &p)
|
|
&& (str_startswith(p, "0", &p) || str_startswith(p, "1", &p))
|
|
&& (str_startswith(p, "\r\n", &p) || str_startswith(p, "\n", &p))
|
|
)
|
|
path[pathlen] = '\0';
|
|
else
|
|
path = NULL;
|
|
|
|
if (path) {
|
|
char *id = NULL;
|
|
INFOF("RHIZOME HTTP SERVER, GET %s", alloca_toprint(1024, path, pathlen));
|
|
if (strcmp(path, "/favicon.ico") == 0) {
|
|
r->request_type = RHIZOME_HTTP_REQUEST_FAVICON;
|
|
rhizome_server_http_response_header(r, 200, "image/vnd.microsoft.icon", favicon_len);
|
|
} else {
|
|
rhizome_server_simple_http_response(r, 404, "<html><h1>Not found</h1></html>\r\n");
|
|
}
|
|
}
|
|
} else if (str_startswith(r->request, "POST ", &path)) {
|
|
char *p;
|
|
|
|
// This loop is guaranteed to terminate before the end of the buffer, because we know that the
|
|
// buffer contains at least "\n\n" and maybe "\r\n\r\n" at the end of the header block.
|
|
for (p = path; !isspace(*p); ++p)
|
|
;
|
|
pathlen = p - path;
|
|
if ( str_startswith(p, " HTTP/1.", &p)
|
|
&& (str_startswith(p, "0", &p) || str_startswith(p, "1", &p))
|
|
&& (str_startswith(p, "\r\n", &p) || str_startswith(p, "\n", &p))
|
|
)
|
|
path[pathlen] = '\0';
|
|
else
|
|
path = NULL;
|
|
|
|
if (path) {
|
|
char *id = NULL;
|
|
INFOF("RHIZOME HTTP SERVER, POST %s", alloca_toprint(1024, path, pathlen));
|
|
if (strcmp(path, "/bundle") == 0) {
|
|
/*
|
|
We know we have the complete header, so get the content length and content type
|
|
fields. From those we work out what to do with the body. */
|
|
char *headers=&path[pathlen+1];
|
|
int headerlen=r->request_length-(headers-r->request);
|
|
const char *cl_str=str_str(headers,"Content-Length: ",headerlen);
|
|
const char *ct_str=str_str(headers,"Content-Type: multipart/form-data; boundary=",headerlen);
|
|
if (!cl_str)
|
|
return
|
|
rhizome_server_simple_http_response(r,400,"<html><h1>POST without content-length</h1></html>\r\n");
|
|
if (!ct_str)
|
|
return
|
|
rhizome_server_simple_http_response(r,400,"<html><h1>POST without content-type (or unsupported content-type)</h1></html>\r\n");
|
|
/* ok, we have content-type and content-length, now make sure they are
|
|
well formed. */
|
|
long long cl;
|
|
if (sscanf(cl_str,"Content-Length: %lld",&cl)!=1)
|
|
return
|
|
rhizome_server_simple_http_response(r,400,"<html><h1>malformed Content-Length: header</h1></html>\r\n");
|
|
char boundary_string[1024];
|
|
int i;
|
|
ct_str+=strlen("Content-Type: multipart/form-data; boundary=");
|
|
for(i=0;i<1023&&*ct_str&&*ct_str!='\n'&&*ct_str!='\r';i++,ct_str++)
|
|
boundary_string[i]=*ct_str;
|
|
boundary_string[i]=0;
|
|
if (i<4||i>128)
|
|
return
|
|
rhizome_server_simple_http_response(r,400,"<html><h1>malformed Content-Type: header</h1></html>\r\n");
|
|
|
|
DEBUGF("HTTP POST content-length=%lld, boundary string='%s'",
|
|
cl,boundary_string);
|
|
|
|
/* Now start receiving and parsing multi-part data.
|
|
We may have already received some of the post-header data, so
|
|
rewind that if necessary. Need to start by finding actual end of
|
|
headers, and passing any body bytes to the parser.
|
|
Also need to tell the HTTP request that it has moved to multipart
|
|
form data parsing, and what the actual requested action is.
|
|
*/
|
|
|
|
/* Remember boundary string and source path.
|
|
Put the preceeding -- on the front to make our life easier when
|
|
parsing the rest later. */
|
|
snprintf(&r->boundary_string[0],1023,"--%s",boundary_string);
|
|
r->boundary_string[1023]=0;
|
|
r->boundary_string_length=strlen(r->boundary_string);
|
|
r->source_index=0;
|
|
r->source_count=cl;
|
|
snprintf(&r->path[0],1023,"%s",path);
|
|
r->path[1023]=0;
|
|
r->request_type=RHIZOME_HTTP_REQUEST_RECEIVING_MULTIPART;
|
|
|
|
/* Find the end of the headers and start of any body bytes that we
|
|
have read so far. */
|
|
{
|
|
const char *eoh="\r\n\r\n";
|
|
int i=0;
|
|
for(i=0;i<r->request_length;i++) {
|
|
if (!strncmp(eoh,&r->request[i],strlen(eoh)))
|
|
break;
|
|
}
|
|
if (i>=r->request_length) {
|
|
/* Couldn't find the end of the headers, but this routine should
|
|
not be called if the end of headers has not been found.
|
|
Complain and go home. */
|
|
return
|
|
rhizome_server_simple_http_response(r, 404, "<html><h1>End of headers seems to have gone missing</h1></html>\r\n");
|
|
}
|
|
|
|
/* Process any outstanding bytes.
|
|
We need to copy the bytes to a separate buffer, because
|
|
r->request and r->request_length get used internally in the
|
|
parser, which is also why we need to zero r->request_length.
|
|
We also zero r->source_flags, which is used as the state
|
|
counter for parsing the multi-part form data.
|
|
*/
|
|
int count=r->request_length-i;
|
|
char buffer[count];
|
|
bcopy(&r->request[i],&buffer[0],count);
|
|
r->request_length=0;
|
|
r->source_flags=0;
|
|
rhizome_direct_process_post_multipart_bytes(r,buffer,count);
|
|
}
|
|
|
|
/* Handle the rest of the transfer asynchronously. */
|
|
return 0;
|
|
} else {
|
|
rhizome_server_simple_http_response(r, 404, "<html><h1>Not found</h1></html>\r\n");
|
|
}
|
|
}
|
|
} else {
|
|
if (debug & DEBUG_RHIZOME_TX)
|
|
DEBUGF("Received malformed HTTP request: %s", alloca_toprint(120, (const char *)r->request, r->request_length));
|
|
rhizome_server_simple_http_response(r, 400, "<html><h1>Malformed request</h1></html>\r\n");
|
|
}
|
|
|
|
/* Try sending data immediately. */
|
|
rhizome_server_http_send_bytes(r);
|
|
|
|
return 0;
|
|
}
|