Improve HTTP/1.1 byterange handling

Fix parsing of a Range: header to support:
 - multiple ranges (parsed, but not returned)
 - suffix byte ranges ("-2139")
 - correct handling of incorrectly formatted range headers
   (correct behaviour is to ignore the header and return the full
    file)
 - return appropriate error for ranges outside the file

Multiple ranges are parsed, but only the first range is returned.
Returning multiple ranges requires using the multipart/byterange
content type.
This commit is contained in:
Jeremy Fitzhardinge 2010-03-09 20:59:13 -07:00
parent d13784b649
commit 63b28d707b
2 changed files with 127 additions and 27 deletions

View File

@ -583,6 +583,30 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, unittest.TestCase):
d.addCallback(_got) d.addCallback(_got)
return d return d
def test_GET_FILEURL_partial_end_range(self):
headers = {"range": "bytes=-5"}
length = len(self.BAR_CONTENTS)
d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
return_response=True)
def _got((res, status, headers)):
self.failUnlessEqual(int(status), 206)
self.failUnless(headers.has_key("content-range"))
self.failUnlessEqual(headers["content-range"][0],
"bytes %d-%d/%d" % (length-5, length-1, length))
self.failUnlessEqual(res, self.BAR_CONTENTS[-5:])
d.addCallback(_got)
return d
def test_GET_FILEURL_partial_range_overrun(self):
headers = {"range": "bytes=100-200"}
length = len(self.BAR_CONTENTS)
d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_overrun",
"416 Requested Range not satisfiable",
"First beyond end of file",
self.GET, self.public_url + "/foo/bar.txt",
headers=headers)
return d
def test_HEAD_FILEURL_range(self): def test_HEAD_FILEURL_range(self):
headers = {"range": "bytes=1-10"} headers = {"range": "bytes=1-10"}
d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers, d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
@ -609,13 +633,38 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, unittest.TestCase):
d.addCallback(_got) d.addCallback(_got)
return d return d
def test_HEAD_FILEURL_partial_end_range(self):
headers = {"range": "bytes=-5"}
length = len(self.BAR_CONTENTS)
d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
return_response=True)
def _got((res, status, headers)):
self.failUnlessEqual(int(status), 206)
self.failUnless(headers.has_key("content-range"))
self.failUnlessEqual(headers["content-range"][0],
"bytes %d-%d/%d" % (length-5, length-1, length))
d.addCallback(_got)
return d
def test_HEAD_FILEURL_partial_range_overrun(self):
headers = {"range": "bytes=100-200"}
length = len(self.BAR_CONTENTS)
d = self.shouldFail2(error.Error, "test_HEAD_FILEURL_range_overrun",
"416 Requested Range not satisfiable",
"",
self.HEAD, self.public_url + "/foo/bar.txt",
headers=headers)
return d
def test_GET_FILEURL_range_bad(self): def test_GET_FILEURL_range_bad(self):
headers = {"range": "BOGUS=fizbop-quarnak"} headers = {"range": "BOGUS=fizbop-quarnak"}
d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_bad", d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
"400 Bad Request", return_response=True)
"Syntactically invalid http range header", def _got((res, status, headers)):
self.GET, self.public_url + "/foo/bar.txt", self.failUnlessEqual(int(status), 200)
headers=headers) self.failUnless(not headers.has_key("content-range"))
self.failUnlessEqual(res, self.BAR_CONTENTS)
d.addCallback(_got)
return d return d
def test_HEAD_FILEURL(self): def test_HEAD_FILEURL(self):

View File

@ -335,6 +335,54 @@ class FileDownloader(rend.Page):
self.filenode = filenode self.filenode = filenode
self.filename = filename self.filename = filename
def parse_range_header(self, range):
# Parse a byte ranges according to RFC 2616 "14.35.1 Byte
# Ranges". Returns None if the range doesn't make sense so it
# can be ignored (per the spec). When successful, returns a
# list of (first,last) inclusive range tuples.
filesize = self.filenode.get_size()
assert isinstance(filesize, (int,long)), filesize
try:
# byte-ranges-specifier
units, rangeset = range.split('=', 1)
if units != 'bytes':
return None # nothing else supported
def parse_range(r):
first, last = r.split('-', 1)
if first is '':
# suffix-byte-range-spec
first = filesize - long(last)
last = filesize - 1
else:
# byte-range-spec
# first-byte-pos
first = long(first)
# last-byte-pos
if last is '':
last = filesize - 1
else:
last = long(last)
if last < first:
raise ValueError
return (first, last)
# byte-range-set
#
# Note: the spec uses "1#" for the list of ranges, which
# implicitly allows whitespace around the ',' separators,
# so strip it.
return [ parse_range(r.strip()) for r in rangeset.split(',') ]
except ValueError:
return None
def renderHTTP(self, ctx): def renderHTTP(self, ctx):
req = IRequest(ctx) req = IRequest(ctx)
gte = static.getTypeAndEncoding gte = static.getTypeAndEncoding
@ -356,7 +404,7 @@ class FileDownloader(rend.Page):
filesize = self.filenode.get_size() filesize = self.filenode.get_size()
assert isinstance(filesize, (int,long)), filesize assert isinstance(filesize, (int,long)), filesize
offset, size = 0, None first, size = 0, None
contentsize = filesize contentsize = filesize
req.setHeader("accept-ranges", "bytes") req.setHeader("accept-ranges", "bytes")
if not self.filenode.is_mutable(): if not self.filenode.is_mutable():
@ -370,30 +418,33 @@ class FileDownloader(rend.Page):
# or maybe just use the URI for CHK and LIT. # or maybe just use the URI for CHK and LIT.
rangeheader = req.getHeader('range') rangeheader = req.getHeader('range')
if rangeheader: if rangeheader:
# adapted from nevow.static.File ranges = self.parse_range_header(rangeheader)
bytesrange = rangeheader.split('=')
if bytesrange[0] != 'bytes': # ranges = None means the header didn't parse, so ignore
raise WebError("Syntactically invalid http range header!") # the header as if it didn't exist. If is more than one
start, end = bytesrange[1].split('-') # range, then just return the first for now, until we can
if start: # generate multipart/byteranges.
offset = int(start) if ranges is not None:
if not end: first, last = ranges[0]
# RFC 2616 says:
# if first >= filesize:
# "If the last-byte-pos value is absent, or if the value is raise WebError('First beyond end of file',
# greater than or equal to the current length of the http.REQUESTED_RANGE_NOT_SATISFIABLE)
# entity-body, last-byte-pos is taken to be equal to one less else:
# than the current length of the entity- body in bytes." first = max(0, first)
end = filesize - 1 last = min(filesize-1, last)
size = int(end) - offset + 1
req.setResponseCode(http.PARTIAL_CONTENT) req.setResponseCode(http.PARTIAL_CONTENT)
req.setHeader('content-range',"bytes %s-%s/%s" % req.setHeader('content-range',"bytes %s-%s/%s" %
(str(offset), str(offset+size-1), str(filesize))) (str(first), str(last),
contentsize = size str(filesize)))
contentsize = last - first + 1
size = contentsize
req.setHeader("content-length", str(contentsize)) req.setHeader("content-length", str(contentsize))
if req.method == "HEAD": if req.method == "HEAD":
return "" return ""
d = self.filenode.read(req, offset, size) d = self.filenode.read(req, first, size)
def _error(f): def _error(f):
if req.startedWriting: if req.startedWriting:
# The content-type is already set, and the response code has # The content-type is already set, and the response code has