mirror of
https://github.com/servalproject/serval-dna.git
synced 2024-12-18 20:57:56 +00:00
refactored rhizome direct code to separate http transport specific
code from general rhizome direct BAR buffer gathering and processing.
This commit is contained in:
parent
1d4c865a35
commit
d796a482b7
@ -44,6 +44,7 @@ SRCS= \
|
||||
rhizome_crypto.c \
|
||||
rhizome_database.c \
|
||||
rhizome_direct.c \
|
||||
rhizome_direct_http.c \
|
||||
rhizome_fetch.c \
|
||||
rhizome_http.c \
|
||||
rhizome_packetformats.c \
|
||||
|
461
rhizome_direct.c
461
rhizome_direct.c
@ -100,9 +100,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
server at this stage, but can make use of our own spartan HTTP server already
|
||||
integrated into servald.
|
||||
|
||||
In light of the above, the Rhizome Direct process will need to have it's own TCP
|
||||
port number. It is also necessary to have a Rhizome Direct process running to
|
||||
accept Rhizome Direct requests from clients.
|
||||
In light of the above, all rhizome services and HTTP services are being
|
||||
transitioned from running in the main servald process, into a separate process
|
||||
started by servald calling fork() (but not exec, since the same starting image
|
||||
will be fine).
|
||||
|
||||
*/
|
||||
|
||||
@ -111,460 +112,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#include "str.h"
|
||||
#include <assert.h>
|
||||
|
||||
int rhizome_direct_clear_temporary_files(rhizome_http_request *r)
|
||||
{
|
||||
char filename[1024];
|
||||
char *fields[]={"manifest","data","unknown",NULL};
|
||||
int i;
|
||||
|
||||
for(i=0;fields[i];i++) {
|
||||
snprintf(filename,1024,"rhizomedirect.%d.%s",r->alarm.poll.fd,fields[i]);
|
||||
filename[1023]=0;
|
||||
DEBUGF("Unlinking '%s'",filename);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rhizome_direct_form_received(rhizome_http_request *r)
|
||||
{
|
||||
/* XXX This needs to be implemented.
|
||||
For now we just put out a "no content" response that makes testing convenient
|
||||
*/
|
||||
|
||||
/* XXX process completed form based on the set of fields seen */
|
||||
switch(r->fields_seen) {
|
||||
case RD_MIME_STATE_MANIFESTHEADERS
|
||||
|RD_MIME_STATE_DATAHEADERS:
|
||||
/* A bundle to import */
|
||||
DEBUGF("Call bundle import for rhizomedata.%d.{data,file}",
|
||||
r->alarm.poll.fd);
|
||||
char cmd[1024];
|
||||
snprintf(cmd,1024,
|
||||
"servald rhizome import bundle rhizomedirect.%d.data rhizomedirect.%d.manifest",
|
||||
r->alarm.poll.fd,r->alarm.poll.fd);
|
||||
cmd[1023]=0;
|
||||
int rv=system(cmd);
|
||||
int status=-1;
|
||||
|
||||
if (rv!=-1) status=WEXITSTATUS(rv);
|
||||
|
||||
DEBUGF("Import returned %d",status);
|
||||
|
||||
/* clean up after ourselves */
|
||||
rhizome_direct_clear_temporary_files(r);
|
||||
/* and report back to caller.
|
||||
201 = content created, which is probably appropriate for when we successfully
|
||||
import a bundle (or if we already have it).
|
||||
403 = forbidden, which might be appropriate if we refuse to accept it, e.g.,
|
||||
the import fails due to malformed data etc.
|
||||
|
||||
For now we are just returning "no content" as a place-holder while debugging.
|
||||
*/
|
||||
rhizome_server_simple_http_response(r, 204, "Move along. Nothing to see.");
|
||||
break;
|
||||
default:
|
||||
/* Clean up after ourselves */
|
||||
rhizome_direct_clear_temporary_files(r);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return rhizome_server_simple_http_response(r, 204, "Move along. Nothing to see.");
|
||||
|
||||
}
|
||||
|
||||
|
||||
int rhizome_direct_process_mime_line(rhizome_http_request *r,char *buffer,int count)
|
||||
{
|
||||
/* Check for boundary line at start of buffer.
|
||||
Boundary line = CRLF + "--" + boundary_string + optional whitespace + CRLF
|
||||
EXCEPT end of form boundary, which is:
|
||||
CRLF + "--" + boundary_string + "--" + CRLF
|
||||
|
||||
NOTE: We attach the "--" to boundary_string when setting things up so that
|
||||
we don't have to keep manually checking for it here.
|
||||
|
||||
NOTE: The parser eats the CRLF from the front, and attaches it to the end
|
||||
of the previous line. This means we need to rewind 2 bytes from whatever
|
||||
file we were writing to whenever we encounter a boundary line, at least
|
||||
if those last two bytes were CRLF. That can be safely assumed if we
|
||||
assume that the boundary string has been chosen to be a string never appearing
|
||||
anywhere in the contents of the form. In practice, that is only "almost
|
||||
certain" (according to the mathematical meaning of that phrase) if boundary
|
||||
strings are randomly selected and are of sufficient length.
|
||||
|
||||
NOTE: We are not supporting nested/mixed parts, as that would considerably
|
||||
complicate the parser. If the need arises in future, we will deal with it
|
||||
then. In the meantime, we will have something that meets our immediate
|
||||
needs for Rhizome Direct and a variety of use cases.
|
||||
*/
|
||||
|
||||
/* Regardless of the state of the parser, the presence of boundary lines
|
||||
is significant, so lets just check once, and remember the result.
|
||||
Similarly check a few other conditions. */
|
||||
int boundaryLine=0;
|
||||
if (!bcmp(buffer,r->boundary_string,r->boundary_string_length))
|
||||
boundaryLine=1;
|
||||
|
||||
int endOfForm=0;
|
||||
if (boundaryLine&&
|
||||
buffer[r->boundary_string_length]=='-'&&
|
||||
buffer[r->boundary_string_length+1]=='-')
|
||||
endOfForm=1;
|
||||
int blankLine=0;
|
||||
if (!strcmp(buffer,"\r\n")) blankLine=1;
|
||||
|
||||
DEBUGF("mime state: 0x%x, blankLine=%d, boundary=%d, EOF=%d, bytes=%d",
|
||||
r->source_flags,blankLine,boundaryLine,endOfForm,count);
|
||||
switch(r->source_flags) {
|
||||
case RD_MIME_STATE_INITIAL:
|
||||
if (boundaryLine) r->source_flags=RD_MIME_STATE_PARTHEADERS;
|
||||
break;
|
||||
case RD_MIME_STATE_PARTHEADERS:
|
||||
case RD_MIME_STATE_MANIFESTHEADERS:
|
||||
case RD_MIME_STATE_DATAHEADERS:
|
||||
DEBUGF("mime line: %s",r->request);
|
||||
if (blankLine) {
|
||||
/* End of headers */
|
||||
if (r->source_flags==RD_MIME_STATE_PARTHEADERS)
|
||||
{
|
||||
/* Multiple content-disposition lines. This is very naughty. */
|
||||
rhizome_server_simple_http_response
|
||||
(r, 400, "<html><h1>Malformed multi-part form POST: Missing content-disposition lines in MIME encoded part.</h1></html>\r\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Prepare to write to file for field.
|
||||
We may have multiple rhizome direct transactions running at the same
|
||||
time on different TCP connections. So serialise using file descriptor.
|
||||
We could use the boundary string or some other random thing, but using
|
||||
the file descriptor places a reasonable upper limit on the clutter that
|
||||
is possible, while still preventing collisions -- provided that we don't
|
||||
close the file descriptor until we have completed processing the
|
||||
request. */
|
||||
r->field_file=NULL;
|
||||
char filename[1024];
|
||||
char *field="unknown";
|
||||
switch(r->source_flags) {
|
||||
case RD_MIME_STATE_DATAHEADERS: field="data"; break;
|
||||
case RD_MIME_STATE_MANIFESTHEADERS: field="manifest"; break;
|
||||
}
|
||||
snprintf(filename,1024,"rhizomedirect.%d.%s",r->alarm.poll.fd,field);
|
||||
filename[1023]=0;
|
||||
DEBUGF("Writing to '%s'",filename);
|
||||
r->field_file=fopen(filename,"w");
|
||||
if (!r->field_file) {
|
||||
rhizome_direct_clear_temporary_files(r);
|
||||
rhizome_server_simple_http_response
|
||||
(r, 500, "<html><h1>Sorry, couldn't complete your request, reasonable as it was. Perhaps try again later.</h1></html>\r\n");
|
||||
return -1;
|
||||
}
|
||||
r->source_flags=RD_MIME_STATE_BODY;
|
||||
} else {
|
||||
char name[1024];
|
||||
char field[1024];
|
||||
if (sscanf(buffer,
|
||||
"Content-Disposition: form-data; name=\"%[^\"]\";"
|
||||
" filename=\"%[^\"]\"",field,name)==2)
|
||||
{
|
||||
if (r->source_flags!=RD_MIME_STATE_PARTHEADERS)
|
||||
{
|
||||
/* Multiple content-disposition lines. This is very naughty. */
|
||||
rhizome_server_simple_http_response
|
||||
(r, 400, "<html><h1>Malformed multi-part form POST: Multiple content-disposition lines in single MIME encoded part.</h1></html>\r\n");
|
||||
return -1;
|
||||
}
|
||||
DEBUGF("Found form part '%s' name '%s'",field,name);
|
||||
if (!strcasecmp(field,"manifest"))
|
||||
r->source_flags=RD_MIME_STATE_MANIFESTHEADERS;
|
||||
if (!strcasecmp(field,"data"))
|
||||
r->source_flags=RD_MIME_STATE_DATAHEADERS;
|
||||
if (r->source_flags!=RD_MIME_STATE_PARTHEADERS)
|
||||
r->fields_seen|=r->source_flags;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case RD_MIME_STATE_BODY:
|
||||
if (boundaryLine) {
|
||||
r->source_flags=RD_MIME_STATE_PARTHEADERS;
|
||||
|
||||
/* We will have written an extra CRLF to the end of the file,
|
||||
so prune that off. */
|
||||
fflush(r->field_file);
|
||||
int fd=fileno(r->field_file);
|
||||
off_t correct_size=ftell(r->field_file)-2;
|
||||
ftruncate(fd,correct_size);
|
||||
fclose(r->field_file);
|
||||
r->field_file=NULL;
|
||||
}
|
||||
else {
|
||||
int written=fwrite(r->request,count,1,r->field_file);
|
||||
DEBUGF("wrote %d lump of %d bytes",written,count);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (endOfForm) {
|
||||
/* End of form marker found.
|
||||
Pass it to function that deals with what has been received,
|
||||
and will also send response or close the http request if required. */
|
||||
|
||||
/* XXX Rewind last two bytes from file if open, and close file */
|
||||
|
||||
DEBUGF("Found end of form");
|
||||
return rhizome_direct_form_received(r);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rhizome_direct_process_post_multipart_bytes
|
||||
(rhizome_http_request *r,const char *bytes,int count)
|
||||
{
|
||||
{
|
||||
DEBUGF("Saw %d multi-part form bytes",count);
|
||||
FILE *f=fopen("post.log","a");
|
||||
if (f) fwrite(bytes,count,1,f);
|
||||
if (f) fclose(f);
|
||||
}
|
||||
|
||||
/* This function looks for multi-part form separators and descriptor lines,
|
||||
and streams any "manifest" or "data" blocks to respectively named files.
|
||||
|
||||
The challenge is that we might only get a partial boundary string passed
|
||||
to us. So we need to remember the last KB or so of data and glue it to
|
||||
the front of the current set of bytes.
|
||||
|
||||
In multi-part form parsing we don't need r->request for anything, so if
|
||||
we are not in a form part already, then we can stow the bytes there
|
||||
for reexamination when more bytes arrive.
|
||||
|
||||
Side effect will be that the entire boundary string and associated bits will
|
||||
need to be <=1KB, the size of r->request. This seems quite reasonable.
|
||||
|
||||
Example of such a block is:
|
||||
|
||||
------WebKitFormBoundaryEoJwSoSVW4qsrBZW
|
||||
Content-Disposition: form-data; name="manifest"; filename="spleen"
|
||||
Content-Type: application/octet-stream
|
||||
*/
|
||||
|
||||
int o;
|
||||
|
||||
/* Split into lines and process each line separately using a
|
||||
simple state machine.
|
||||
Lines containing binary are truncated into arbitrarily length pieces, but
|
||||
a newline will ALWAYS break the line.
|
||||
*/
|
||||
|
||||
for(o=0;o<count;o++)
|
||||
{
|
||||
int newline=0;
|
||||
if (bytes[o]=='\n')
|
||||
if (r->request_length>0&&r->request[r->request_length-1]=='\r')
|
||||
{ newline=1; r->request_length--; }
|
||||
if (r->request_length>1020) newline=2;
|
||||
if (newline) {
|
||||
/* Found end of line, so process it */
|
||||
if (newline==1) {
|
||||
/* Put the real new line onto the end if it was present, so that
|
||||
we don't go doing anything silly, like joining lines in files
|
||||
that really were separated by CRLF, or similarly inserting CRLF
|
||||
in the middle of slabs of bytes that were not CRLF terminated.
|
||||
*/
|
||||
r->request[r->request_length++]='\r';
|
||||
r->request[r->request_length++]='\n';
|
||||
}
|
||||
r->request[r->request_length]=0;
|
||||
if (rhizome_direct_process_mime_line(r,r->request,r->request_length))
|
||||
return -1;
|
||||
r->request_length=0;
|
||||
/* If a real new line was detected, then
|
||||
don't include the \n as part of the next line.
|
||||
But if it wasn't a real new line, then make sure we
|
||||
don't loose the byte. */
|
||||
if (newline==1) continue;
|
||||
}
|
||||
|
||||
r->request[r->request_length++]=bytes[o];
|
||||
}
|
||||
|
||||
r->source_count-=count;
|
||||
if (r->source_count<=0) {
|
||||
DEBUGF("Got to end of multi-part form data");
|
||||
|
||||
/* If the form is still being processed, then flush things through */
|
||||
if (r->request_type<0) {
|
||||
/* Flush out any remaining data */
|
||||
if (r->request_length) {
|
||||
DEBUGF("Flushing last %d bytes",r->request_length);
|
||||
r->request[r->request_length]=0;
|
||||
rhizome_direct_process_mime_line(r,r->request,r->request_length);
|
||||
}
|
||||
return rhizome_direct_form_received(r);
|
||||
} else {
|
||||
/* Form has already been processed, so do nothing */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rhizome_direct_parse_http_request(rhizome_http_request *r)
|
||||
{
|
||||
/* Switching to writing, so update the call-back */
|
||||
r->alarm.poll.events=POLLOUT;
|
||||
watch(&r->alarm);
|
||||
// Start building up a response.
|
||||
r->request_type = 0;
|
||||
// Parse the HTTP "GET" line.
|
||||
char *path = NULL;
|
||||
size_t pathlen = 0;
|
||||
if (str_startswith(r->request, "GET ", &path)) {
|
||||
char *p;
|
||||
// This loop is guaranteed to terminate before the end of the buffer, because we know that the
|
||||
// buffer contains at least "\n\n" and maybe "\r\n\r\n" at the end of the header block.
|
||||
for (p = path; !isspace(*p); ++p)
|
||||
;
|
||||
pathlen = p - path;
|
||||
if ( str_startswith(p, " HTTP/1.", &p)
|
||||
&& (str_startswith(p, "0", &p) || str_startswith(p, "1", &p))
|
||||
&& (str_startswith(p, "\r\n", &p) || str_startswith(p, "\n", &p))
|
||||
)
|
||||
path[pathlen] = '\0';
|
||||
else
|
||||
path = NULL;
|
||||
|
||||
if (path) {
|
||||
char *id = NULL;
|
||||
INFOF("RHIZOME HTTP SERVER, GET %s", alloca_toprint(1024, path, pathlen));
|
||||
if (strcmp(path, "/favicon.ico") == 0) {
|
||||
r->request_type = RHIZOME_HTTP_REQUEST_FAVICON;
|
||||
rhizome_server_http_response_header(r, 200, "image/vnd.microsoft.icon", favicon_len);
|
||||
} else {
|
||||
rhizome_server_simple_http_response(r, 404, "<html><h1>Not found</h1></html>\r\n");
|
||||
}
|
||||
}
|
||||
} else if (str_startswith(r->request, "POST ", &path)) {
|
||||
char *p;
|
||||
|
||||
// This loop is guaranteed to terminate before the end of the buffer, because we know that the
|
||||
// buffer contains at least "\n\n" and maybe "\r\n\r\n" at the end of the header block.
|
||||
for (p = path; !isspace(*p); ++p)
|
||||
;
|
||||
pathlen = p - path;
|
||||
if ( str_startswith(p, " HTTP/1.", &p)
|
||||
&& (str_startswith(p, "0", &p) || str_startswith(p, "1", &p))
|
||||
&& (str_startswith(p, "\r\n", &p) || str_startswith(p, "\n", &p))
|
||||
)
|
||||
path[pathlen] = '\0';
|
||||
else
|
||||
path = NULL;
|
||||
|
||||
if (path) {
|
||||
char *id = NULL;
|
||||
INFOF("RHIZOME HTTP SERVER, POST %s", alloca_toprint(1024, path, pathlen));
|
||||
if (strcmp(path, "/bundle") == 0) {
|
||||
/*
|
||||
We know we have the complete header, so get the content length and content type
|
||||
fields. From those we work out what to do with the body. */
|
||||
char *headers=&path[pathlen+1];
|
||||
int headerlen=r->request_length-(headers-r->request);
|
||||
const char *cl_str=str_str(headers,"Content-Length: ",headerlen);
|
||||
const char *ct_str=str_str(headers,"Content-Type: multipart/form-data; boundary=",headerlen);
|
||||
if (!cl_str)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>POST without content-length</h1></html>\r\n");
|
||||
if (!ct_str)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>POST without content-type (or unsupported content-type)</h1></html>\r\n");
|
||||
/* ok, we have content-type and content-length, now make sure they are
|
||||
well formed. */
|
||||
long long cl;
|
||||
if (sscanf(cl_str,"Content-Length: %lld",&cl)!=1)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>malformed Content-Length: header</h1></html>\r\n");
|
||||
char boundary_string[1024];
|
||||
int i;
|
||||
ct_str+=strlen("Content-Type: multipart/form-data; boundary=");
|
||||
for(i=0;i<1023&&*ct_str&&*ct_str!='\n'&&*ct_str!='\r';i++,ct_str++)
|
||||
boundary_string[i]=*ct_str;
|
||||
boundary_string[i]=0;
|
||||
if (i<4||i>128)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>malformed Content-Type: header</h1></html>\r\n");
|
||||
|
||||
DEBUGF("HTTP POST content-length=%lld, boundary string='%s'",
|
||||
cl,boundary_string);
|
||||
|
||||
/* Now start receiving and parsing multi-part data.
|
||||
We may have already received some of the post-header data, so
|
||||
rewind that if necessary. Need to start by finding actual end of
|
||||
headers, and passing any body bytes to the parser.
|
||||
Also need to tell the HTTP request that it has moved to multipart
|
||||
form data parsing, and what the actual requested action is.
|
||||
*/
|
||||
|
||||
/* Remember boundary string and source path.
|
||||
Put the preceeding -- on the front to make our life easier when
|
||||
parsing the rest later. */
|
||||
snprintf(&r->boundary_string[0],1023,"--%s",boundary_string);
|
||||
r->boundary_string[1023]=0;
|
||||
r->boundary_string_length=strlen(r->boundary_string);
|
||||
r->source_index=0;
|
||||
r->source_count=cl;
|
||||
snprintf(&r->path[0],1023,"%s",path);
|
||||
r->path[1023]=0;
|
||||
r->request_type=RHIZOME_HTTP_REQUEST_RECEIVING_MULTIPART;
|
||||
|
||||
/* Find the end of the headers and start of any body bytes that we
|
||||
have read so far. */
|
||||
{
|
||||
const char *eoh="\r\n\r\n";
|
||||
int i=0;
|
||||
for(i=0;i<r->request_length;i++) {
|
||||
if (!strncmp(eoh,&r->request[i],strlen(eoh)))
|
||||
break;
|
||||
}
|
||||
if (i>=r->request_length) {
|
||||
/* Couldn't find the end of the headers, but this routine should
|
||||
not be called if the end of headers has not been found.
|
||||
Complain and go home. */
|
||||
return
|
||||
rhizome_server_simple_http_response(r, 404, "<html><h1>End of headers seems to have gone missing</h1></html>\r\n");
|
||||
}
|
||||
|
||||
/* Process any outstanding bytes.
|
||||
We need to copy the bytes to a separate buffer, because
|
||||
r->request and r->request_length get used internally in the
|
||||
parser, which is also why we need to zero r->request_length.
|
||||
We also zero r->source_flags, which is used as the state
|
||||
counter for parsing the multi-part form data.
|
||||
*/
|
||||
int count=r->request_length-i;
|
||||
char buffer[count];
|
||||
bcopy(&r->request[i],&buffer[0],count);
|
||||
r->request_length=0;
|
||||
r->source_flags=0;
|
||||
rhizome_direct_process_post_multipart_bytes(r,buffer,count);
|
||||
}
|
||||
|
||||
/* Handle the rest of the transfer asynchronously. */
|
||||
return 0;
|
||||
} else {
|
||||
rhizome_server_simple_http_response(r, 404, "<html><h1>Not found</h1></html>\r\n");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (debug & DEBUG_RHIZOME_TX)
|
||||
DEBUGF("Received malformed HTTP request: %s", alloca_toprint(120, (const char *)r->request, r->request_length));
|
||||
rhizome_server_simple_http_response(r, 400, "<html><h1>Malformed request</h1></html>\r\n");
|
||||
}
|
||||
|
||||
/* Try sending data immediately. */
|
||||
rhizome_server_http_send_bytes(r);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int app_rhizome_direct_sync(int argc, const char *const *argv,
|
||||
struct command_line_option *o)
|
||||
{
|
||||
|
478
rhizome_direct_http.c
Normal file
478
rhizome_direct_http.c
Normal file
@ -0,0 +1,478 @@
|
||||
/*
|
||||
Serval Mesh Software
|
||||
Copyright (C) 2010-2012 Paul Gardner-Stephen
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; either version 2
|
||||
of the License, or (at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "serval.h"
|
||||
#include "rhizome.h"
|
||||
#include "str.h"
|
||||
#include <assert.h>
|
||||
|
||||
|
||||
int rhizome_direct_clear_temporary_files(rhizome_http_request *r)
|
||||
{
|
||||
char filename[1024];
|
||||
char *fields[]={"manifest","data","unknown",NULL};
|
||||
int i;
|
||||
|
||||
for(i=0;fields[i];i++) {
|
||||
snprintf(filename,1024,"rhizomedirect.%d.%s",r->alarm.poll.fd,fields[i]);
|
||||
filename[1023]=0;
|
||||
DEBUGF("Unlinking '%s'",filename);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rhizome_direct_form_received(rhizome_http_request *r)
|
||||
{
|
||||
/* XXX This needs to be implemented.
|
||||
For now we just put out a "no content" response that makes testing convenient
|
||||
*/
|
||||
|
||||
/* XXX process completed form based on the set of fields seen */
|
||||
switch(r->fields_seen) {
|
||||
case RD_MIME_STATE_MANIFESTHEADERS
|
||||
|RD_MIME_STATE_DATAHEADERS:
|
||||
/* A bundle to import */
|
||||
DEBUGF("Call bundle import for rhizomedata.%d.{data,file}",
|
||||
r->alarm.poll.fd);
|
||||
char cmd[1024];
|
||||
snprintf(cmd,1024,
|
||||
"servald rhizome import bundle rhizomedirect.%d.data rhizomedirect.%d.manifest",
|
||||
r->alarm.poll.fd,r->alarm.poll.fd);
|
||||
cmd[1023]=0;
|
||||
int rv=system(cmd);
|
||||
int status=-1;
|
||||
|
||||
if (rv!=-1) status=WEXITSTATUS(rv);
|
||||
|
||||
DEBUGF("Import returned %d",status);
|
||||
|
||||
/* clean up after ourselves */
|
||||
rhizome_direct_clear_temporary_files(r);
|
||||
/* and report back to caller.
|
||||
201 = content created, which is probably appropriate for when we successfully
|
||||
import a bundle (or if we already have it).
|
||||
403 = forbidden, which might be appropriate if we refuse to accept it, e.g.,
|
||||
the import fails due to malformed data etc.
|
||||
|
||||
For now we are just returning "no content" as a place-holder while debugging.
|
||||
*/
|
||||
rhizome_server_simple_http_response(r, 204, "Move along. Nothing to see.");
|
||||
break;
|
||||
default:
|
||||
/* Clean up after ourselves */
|
||||
rhizome_direct_clear_temporary_files(r);
|
||||
|
||||
|
||||
}
|
||||
|
||||
return rhizome_server_simple_http_response(r, 204, "Move along. Nothing to see.");
|
||||
|
||||
}
|
||||
|
||||
|
||||
int rhizome_direct_process_mime_line(rhizome_http_request *r,char *buffer,int count)
|
||||
{
|
||||
/* Check for boundary line at start of buffer.
|
||||
Boundary line = CRLF + "--" + boundary_string + optional whitespace + CRLF
|
||||
EXCEPT end of form boundary, which is:
|
||||
CRLF + "--" + boundary_string + "--" + CRLF
|
||||
|
||||
NOTE: We attach the "--" to boundary_string when setting things up so that
|
||||
we don't have to keep manually checking for it here.
|
||||
|
||||
NOTE: The parser eats the CRLF from the front, and attaches it to the end
|
||||
of the previous line. This means we need to rewind 2 bytes from whatever
|
||||
file we were writing to whenever we encounter a boundary line, at least
|
||||
if those last two bytes were CRLF. That can be safely assumed if we
|
||||
assume that the boundary string has been chosen to be a string never appearing
|
||||
anywhere in the contents of the form. In practice, that is only "almost
|
||||
certain" (according to the mathematical meaning of that phrase) if boundary
|
||||
strings are randomly selected and are of sufficient length.
|
||||
|
||||
NOTE: We are not supporting nested/mixed parts, as that would considerably
|
||||
complicate the parser. If the need arises in future, we will deal with it
|
||||
then. In the meantime, we will have something that meets our immediate
|
||||
needs for Rhizome Direct and a variety of use cases.
|
||||
*/
|
||||
|
||||
/* Regardless of the state of the parser, the presence of boundary lines
|
||||
is significant, so lets just check once, and remember the result.
|
||||
Similarly check a few other conditions. */
|
||||
int boundaryLine=0;
|
||||
if (!bcmp(buffer,r->boundary_string,r->boundary_string_length))
|
||||
boundaryLine=1;
|
||||
|
||||
int endOfForm=0;
|
||||
if (boundaryLine&&
|
||||
buffer[r->boundary_string_length]=='-'&&
|
||||
buffer[r->boundary_string_length+1]=='-')
|
||||
endOfForm=1;
|
||||
int blankLine=0;
|
||||
if (!strcmp(buffer,"\r\n")) blankLine=1;
|
||||
|
||||
DEBUGF("mime state: 0x%x, blankLine=%d, boundary=%d, EOF=%d, bytes=%d",
|
||||
r->source_flags,blankLine,boundaryLine,endOfForm,count);
|
||||
switch(r->source_flags) {
|
||||
case RD_MIME_STATE_INITIAL:
|
||||
if (boundaryLine) r->source_flags=RD_MIME_STATE_PARTHEADERS;
|
||||
break;
|
||||
case RD_MIME_STATE_PARTHEADERS:
|
||||
case RD_MIME_STATE_MANIFESTHEADERS:
|
||||
case RD_MIME_STATE_DATAHEADERS:
|
||||
DEBUGF("mime line: %s",r->request);
|
||||
if (blankLine) {
|
||||
/* End of headers */
|
||||
if (r->source_flags==RD_MIME_STATE_PARTHEADERS)
|
||||
{
|
||||
/* Multiple content-disposition lines. This is very naughty. */
|
||||
rhizome_server_simple_http_response
|
||||
(r, 400, "<html><h1>Malformed multi-part form POST: Missing content-disposition lines in MIME encoded part.</h1></html>\r\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Prepare to write to file for field.
|
||||
We may have multiple rhizome direct transactions running at the same
|
||||
time on different TCP connections. So serialise using file descriptor.
|
||||
We could use the boundary string or some other random thing, but using
|
||||
the file descriptor places a reasonable upper limit on the clutter that
|
||||
is possible, while still preventing collisions -- provided that we don't
|
||||
close the file descriptor until we have completed processing the
|
||||
request. */
|
||||
r->field_file=NULL;
|
||||
char filename[1024];
|
||||
char *field="unknown";
|
||||
switch(r->source_flags) {
|
||||
case RD_MIME_STATE_DATAHEADERS: field="data"; break;
|
||||
case RD_MIME_STATE_MANIFESTHEADERS: field="manifest"; break;
|
||||
}
|
||||
snprintf(filename,1024,"rhizomedirect.%d.%s",r->alarm.poll.fd,field);
|
||||
filename[1023]=0;
|
||||
DEBUGF("Writing to '%s'",filename);
|
||||
r->field_file=fopen(filename,"w");
|
||||
if (!r->field_file) {
|
||||
rhizome_direct_clear_temporary_files(r);
|
||||
rhizome_server_simple_http_response
|
||||
(r, 500, "<html><h1>Sorry, couldn't complete your request, reasonable as it was. Perhaps try again later.</h1></html>\r\n");
|
||||
return -1;
|
||||
}
|
||||
r->source_flags=RD_MIME_STATE_BODY;
|
||||
} else {
|
||||
char name[1024];
|
||||
char field[1024];
|
||||
if (sscanf(buffer,
|
||||
"Content-Disposition: form-data; name=\"%[^\"]\";"
|
||||
" filename=\"%[^\"]\"",field,name)==2)
|
||||
{
|
||||
if (r->source_flags!=RD_MIME_STATE_PARTHEADERS)
|
||||
{
|
||||
/* Multiple content-disposition lines. This is very naughty. */
|
||||
rhizome_server_simple_http_response
|
||||
(r, 400, "<html><h1>Malformed multi-part form POST: Multiple content-disposition lines in single MIME encoded part.</h1></html>\r\n");
|
||||
return -1;
|
||||
}
|
||||
DEBUGF("Found form part '%s' name '%s'",field,name);
|
||||
if (!strcasecmp(field,"manifest"))
|
||||
r->source_flags=RD_MIME_STATE_MANIFESTHEADERS;
|
||||
if (!strcasecmp(field,"data"))
|
||||
r->source_flags=RD_MIME_STATE_DATAHEADERS;
|
||||
if (r->source_flags!=RD_MIME_STATE_PARTHEADERS)
|
||||
r->fields_seen|=r->source_flags;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case RD_MIME_STATE_BODY:
|
||||
if (boundaryLine) {
|
||||
r->source_flags=RD_MIME_STATE_PARTHEADERS;
|
||||
|
||||
/* We will have written an extra CRLF to the end of the file,
|
||||
so prune that off. */
|
||||
fflush(r->field_file);
|
||||
int fd=fileno(r->field_file);
|
||||
off_t correct_size=ftell(r->field_file)-2;
|
||||
ftruncate(fd,correct_size);
|
||||
fclose(r->field_file);
|
||||
r->field_file=NULL;
|
||||
}
|
||||
else {
|
||||
int written=fwrite(r->request,count,1,r->field_file);
|
||||
DEBUGF("wrote %d lump of %d bytes",written,count);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (endOfForm) {
|
||||
/* End of form marker found.
|
||||
Pass it to function that deals with what has been received,
|
||||
and will also send response or close the http request if required. */
|
||||
|
||||
/* XXX Rewind last two bytes from file if open, and close file */
|
||||
|
||||
DEBUGF("Found end of form");
|
||||
return rhizome_direct_form_received(r);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rhizome_direct_process_post_multipart_bytes
|
||||
(rhizome_http_request *r,const char *bytes,int count)
|
||||
{
|
||||
{
|
||||
DEBUGF("Saw %d multi-part form bytes",count);
|
||||
FILE *f=fopen("post.log","a");
|
||||
if (f) fwrite(bytes,count,1,f);
|
||||
if (f) fclose(f);
|
||||
}
|
||||
|
||||
/* This function looks for multi-part form separators and descriptor lines,
|
||||
and streams any "manifest" or "data" blocks to respectively named files.
|
||||
|
||||
The challenge is that we might only get a partial boundary string passed
|
||||
to us. So we need to remember the last KB or so of data and glue it to
|
||||
the front of the current set of bytes.
|
||||
|
||||
In multi-part form parsing we don't need r->request for anything, so if
|
||||
we are not in a form part already, then we can stow the bytes there
|
||||
for reexamination when more bytes arrive.
|
||||
|
||||
Side effect will be that the entire boundary string and associated bits will
|
||||
need to be <=1KB, the size of r->request. This seems quite reasonable.
|
||||
|
||||
Example of such a block is:
|
||||
|
||||
------WebKitFormBoundaryEoJwSoSVW4qsrBZW
|
||||
Content-Disposition: form-data; name="manifest"; filename="spleen"
|
||||
Content-Type: application/octet-stream
|
||||
*/
|
||||
|
||||
int o;
|
||||
|
||||
/* Split into lines and process each line separately using a
|
||||
simple state machine.
|
||||
Lines containing binary are truncated into arbitrarily length pieces, but
|
||||
a newline will ALWAYS break the line.
|
||||
*/
|
||||
|
||||
for(o=0;o<count;o++)
|
||||
{
|
||||
int newline=0;
|
||||
if (bytes[o]=='\n')
|
||||
if (r->request_length>0&&r->request[r->request_length-1]=='\r')
|
||||
{ newline=1; r->request_length--; }
|
||||
if (r->request_length>1020) newline=2;
|
||||
if (newline) {
|
||||
/* Found end of line, so process it */
|
||||
if (newline==1) {
|
||||
/* Put the real new line onto the end if it was present, so that
|
||||
we don't go doing anything silly, like joining lines in files
|
||||
that really were separated by CRLF, or similarly inserting CRLF
|
||||
in the middle of slabs of bytes that were not CRLF terminated.
|
||||
*/
|
||||
r->request[r->request_length++]='\r';
|
||||
r->request[r->request_length++]='\n';
|
||||
}
|
||||
r->request[r->request_length]=0;
|
||||
if (rhizome_direct_process_mime_line(r,r->request,r->request_length))
|
||||
return -1;
|
||||
r->request_length=0;
|
||||
/* If a real new line was detected, then
|
||||
don't include the \n as part of the next line.
|
||||
But if it wasn't a real new line, then make sure we
|
||||
don't loose the byte. */
|
||||
if (newline==1) continue;
|
||||
}
|
||||
|
||||
r->request[r->request_length++]=bytes[o];
|
||||
}
|
||||
|
||||
r->source_count-=count;
|
||||
if (r->source_count<=0) {
|
||||
DEBUGF("Got to end of multi-part form data");
|
||||
|
||||
/* If the form is still being processed, then flush things through */
|
||||
if (r->request_type<0) {
|
||||
/* Flush out any remaining data */
|
||||
if (r->request_length) {
|
||||
DEBUGF("Flushing last %d bytes",r->request_length);
|
||||
r->request[r->request_length]=0;
|
||||
rhizome_direct_process_mime_line(r,r->request,r->request_length);
|
||||
}
|
||||
return rhizome_direct_form_received(r);
|
||||
} else {
|
||||
/* Form has already been processed, so do nothing */
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int rhizome_direct_parse_http_request(rhizome_http_request *r)
|
||||
{
|
||||
/* Switching to writing, so update the call-back */
|
||||
r->alarm.poll.events=POLLOUT;
|
||||
watch(&r->alarm);
|
||||
// Start building up a response.
|
||||
r->request_type = 0;
|
||||
// Parse the HTTP "GET" line.
|
||||
char *path = NULL;
|
||||
size_t pathlen = 0;
|
||||
if (str_startswith(r->request, "GET ", &path)) {
|
||||
char *p;
|
||||
// This loop is guaranteed to terminate before the end of the buffer, because we know that the
|
||||
// buffer contains at least "\n\n" and maybe "\r\n\r\n" at the end of the header block.
|
||||
for (p = path; !isspace(*p); ++p)
|
||||
;
|
||||
pathlen = p - path;
|
||||
if ( str_startswith(p, " HTTP/1.", &p)
|
||||
&& (str_startswith(p, "0", &p) || str_startswith(p, "1", &p))
|
||||
&& (str_startswith(p, "\r\n", &p) || str_startswith(p, "\n", &p))
|
||||
)
|
||||
path[pathlen] = '\0';
|
||||
else
|
||||
path = NULL;
|
||||
|
||||
if (path) {
|
||||
char *id = NULL;
|
||||
INFOF("RHIZOME HTTP SERVER, GET %s", alloca_toprint(1024, path, pathlen));
|
||||
if (strcmp(path, "/favicon.ico") == 0) {
|
||||
r->request_type = RHIZOME_HTTP_REQUEST_FAVICON;
|
||||
rhizome_server_http_response_header(r, 200, "image/vnd.microsoft.icon", favicon_len);
|
||||
} else {
|
||||
rhizome_server_simple_http_response(r, 404, "<html><h1>Not found</h1></html>\r\n");
|
||||
}
|
||||
}
|
||||
} else if (str_startswith(r->request, "POST ", &path)) {
|
||||
char *p;
|
||||
|
||||
// This loop is guaranteed to terminate before the end of the buffer, because we know that the
|
||||
// buffer contains at least "\n\n" and maybe "\r\n\r\n" at the end of the header block.
|
||||
for (p = path; !isspace(*p); ++p)
|
||||
;
|
||||
pathlen = p - path;
|
||||
if ( str_startswith(p, " HTTP/1.", &p)
|
||||
&& (str_startswith(p, "0", &p) || str_startswith(p, "1", &p))
|
||||
&& (str_startswith(p, "\r\n", &p) || str_startswith(p, "\n", &p))
|
||||
)
|
||||
path[pathlen] = '\0';
|
||||
else
|
||||
path = NULL;
|
||||
|
||||
if (path) {
|
||||
char *id = NULL;
|
||||
INFOF("RHIZOME HTTP SERVER, POST %s", alloca_toprint(1024, path, pathlen));
|
||||
if (strcmp(path, "/bundle") == 0) {
|
||||
/*
|
||||
We know we have the complete header, so get the content length and content type
|
||||
fields. From those we work out what to do with the body. */
|
||||
char *headers=&path[pathlen+1];
|
||||
int headerlen=r->request_length-(headers-r->request);
|
||||
const char *cl_str=str_str(headers,"Content-Length: ",headerlen);
|
||||
const char *ct_str=str_str(headers,"Content-Type: multipart/form-data; boundary=",headerlen);
|
||||
if (!cl_str)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>POST without content-length</h1></html>\r\n");
|
||||
if (!ct_str)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>POST without content-type (or unsupported content-type)</h1></html>\r\n");
|
||||
/* ok, we have content-type and content-length, now make sure they are
|
||||
well formed. */
|
||||
long long cl;
|
||||
if (sscanf(cl_str,"Content-Length: %lld",&cl)!=1)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>malformed Content-Length: header</h1></html>\r\n");
|
||||
char boundary_string[1024];
|
||||
int i;
|
||||
ct_str+=strlen("Content-Type: multipart/form-data; boundary=");
|
||||
for(i=0;i<1023&&*ct_str&&*ct_str!='\n'&&*ct_str!='\r';i++,ct_str++)
|
||||
boundary_string[i]=*ct_str;
|
||||
boundary_string[i]=0;
|
||||
if (i<4||i>128)
|
||||
return
|
||||
rhizome_server_simple_http_response(r,400,"<html><h1>malformed Content-Type: header</h1></html>\r\n");
|
||||
|
||||
DEBUGF("HTTP POST content-length=%lld, boundary string='%s'",
|
||||
cl,boundary_string);
|
||||
|
||||
/* Now start receiving and parsing multi-part data.
|
||||
We may have already received some of the post-header data, so
|
||||
rewind that if necessary. Need to start by finding actual end of
|
||||
headers, and passing any body bytes to the parser.
|
||||
Also need to tell the HTTP request that it has moved to multipart
|
||||
form data parsing, and what the actual requested action is.
|
||||
*/
|
||||
|
||||
/* Remember boundary string and source path.
|
||||
Put the preceeding -- on the front to make our life easier when
|
||||
parsing the rest later. */
|
||||
snprintf(&r->boundary_string[0],1023,"--%s",boundary_string);
|
||||
r->boundary_string[1023]=0;
|
||||
r->boundary_string_length=strlen(r->boundary_string);
|
||||
r->source_index=0;
|
||||
r->source_count=cl;
|
||||
snprintf(&r->path[0],1023,"%s",path);
|
||||
r->path[1023]=0;
|
||||
r->request_type=RHIZOME_HTTP_REQUEST_RECEIVING_MULTIPART;
|
||||
|
||||
/* Find the end of the headers and start of any body bytes that we
|
||||
have read so far. */
|
||||
{
|
||||
const char *eoh="\r\n\r\n";
|
||||
int i=0;
|
||||
for(i=0;i<r->request_length;i++) {
|
||||
if (!strncmp(eoh,&r->request[i],strlen(eoh)))
|
||||
break;
|
||||
}
|
||||
if (i>=r->request_length) {
|
||||
/* Couldn't find the end of the headers, but this routine should
|
||||
not be called if the end of headers has not been found.
|
||||
Complain and go home. */
|
||||
return
|
||||
rhizome_server_simple_http_response(r, 404, "<html><h1>End of headers seems to have gone missing</h1></html>\r\n");
|
||||
}
|
||||
|
||||
/* Process any outstanding bytes.
|
||||
We need to copy the bytes to a separate buffer, because
|
||||
r->request and r->request_length get used internally in the
|
||||
parser, which is also why we need to zero r->request_length.
|
||||
We also zero r->source_flags, which is used as the state
|
||||
counter for parsing the multi-part form data.
|
||||
*/
|
||||
int count=r->request_length-i;
|
||||
char buffer[count];
|
||||
bcopy(&r->request[i],&buffer[0],count);
|
||||
r->request_length=0;
|
||||
r->source_flags=0;
|
||||
rhizome_direct_process_post_multipart_bytes(r,buffer,count);
|
||||
}
|
||||
|
||||
/* Handle the rest of the transfer asynchronously. */
|
||||
return 0;
|
||||
} else {
|
||||
rhizome_server_simple_http_response(r, 404, "<html><h1>Not found</h1></html>\r\n");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (debug & DEBUG_RHIZOME_TX)
|
||||
DEBUGF("Received malformed HTTP request: %s", alloca_toprint(120, (const char *)r->request, r->request_length));
|
||||
rhizome_server_simple_http_response(r, 400, "<html><h1>Malformed request</h1></html>\r\n");
|
||||
}
|
||||
|
||||
/* Try sending data immediately. */
|
||||
rhizome_server_http_send_bytes(r);
|
||||
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user