mirror of
https://github.com/tahoe-lafs/tahoe-lafs.git
synced 2024-12-20 21:43:09 +00:00
Improve HTTP/1.1 byterange handling
Fix parsing of a Range: header to support: - multiple ranges (parsed, but not returned) - suffix byte ranges ("-2139") - correct handling of incorrectly formatted range headers (correct behaviour is to ignore the header and return the full file) - return appropriate error for ranges outside the file Multiple ranges are parsed, but only the first range is returned. Returning multiple ranges requires using the multipart/byterange content type.
This commit is contained in:
parent
d13784b649
commit
63b28d707b
@ -583,6 +583,30 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, unittest.TestCase):
|
||||
d.addCallback(_got)
|
||||
return d
|
||||
|
||||
def test_GET_FILEURL_partial_end_range(self):
|
||||
headers = {"range": "bytes=-5"}
|
||||
length = len(self.BAR_CONTENTS)
|
||||
d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
|
||||
return_response=True)
|
||||
def _got((res, status, headers)):
|
||||
self.failUnlessEqual(int(status), 206)
|
||||
self.failUnless(headers.has_key("content-range"))
|
||||
self.failUnlessEqual(headers["content-range"][0],
|
||||
"bytes %d-%d/%d" % (length-5, length-1, length))
|
||||
self.failUnlessEqual(res, self.BAR_CONTENTS[-5:])
|
||||
d.addCallback(_got)
|
||||
return d
|
||||
|
||||
def test_GET_FILEURL_partial_range_overrun(self):
|
||||
headers = {"range": "bytes=100-200"}
|
||||
length = len(self.BAR_CONTENTS)
|
||||
d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_overrun",
|
||||
"416 Requested Range not satisfiable",
|
||||
"First beyond end of file",
|
||||
self.GET, self.public_url + "/foo/bar.txt",
|
||||
headers=headers)
|
||||
return d
|
||||
|
||||
def test_HEAD_FILEURL_range(self):
|
||||
headers = {"range": "bytes=1-10"}
|
||||
d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
|
||||
@ -609,13 +633,38 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, unittest.TestCase):
|
||||
d.addCallback(_got)
|
||||
return d
|
||||
|
||||
def test_HEAD_FILEURL_partial_end_range(self):
|
||||
headers = {"range": "bytes=-5"}
|
||||
length = len(self.BAR_CONTENTS)
|
||||
d = self.HEAD(self.public_url + "/foo/bar.txt", headers=headers,
|
||||
return_response=True)
|
||||
def _got((res, status, headers)):
|
||||
self.failUnlessEqual(int(status), 206)
|
||||
self.failUnless(headers.has_key("content-range"))
|
||||
self.failUnlessEqual(headers["content-range"][0],
|
||||
"bytes %d-%d/%d" % (length-5, length-1, length))
|
||||
d.addCallback(_got)
|
||||
return d
|
||||
|
||||
def test_HEAD_FILEURL_partial_range_overrun(self):
|
||||
headers = {"range": "bytes=100-200"}
|
||||
length = len(self.BAR_CONTENTS)
|
||||
d = self.shouldFail2(error.Error, "test_HEAD_FILEURL_range_overrun",
|
||||
"416 Requested Range not satisfiable",
|
||||
"",
|
||||
self.HEAD, self.public_url + "/foo/bar.txt",
|
||||
headers=headers)
|
||||
return d
|
||||
|
||||
def test_GET_FILEURL_range_bad(self):
|
||||
headers = {"range": "BOGUS=fizbop-quarnak"}
|
||||
d = self.shouldFail2(error.Error, "test_GET_FILEURL_range_bad",
|
||||
"400 Bad Request",
|
||||
"Syntactically invalid http range header",
|
||||
self.GET, self.public_url + "/foo/bar.txt",
|
||||
headers=headers)
|
||||
d = self.GET(self.public_url + "/foo/bar.txt", headers=headers,
|
||||
return_response=True)
|
||||
def _got((res, status, headers)):
|
||||
self.failUnlessEqual(int(status), 200)
|
||||
self.failUnless(not headers.has_key("content-range"))
|
||||
self.failUnlessEqual(res, self.BAR_CONTENTS)
|
||||
d.addCallback(_got)
|
||||
return d
|
||||
|
||||
def test_HEAD_FILEURL(self):
|
||||
|
@ -335,6 +335,54 @@ class FileDownloader(rend.Page):
|
||||
self.filenode = filenode
|
||||
self.filename = filename
|
||||
|
||||
def parse_range_header(self, range):
|
||||
# Parse a byte ranges according to RFC 2616 "14.35.1 Byte
|
||||
# Ranges". Returns None if the range doesn't make sense so it
|
||||
# can be ignored (per the spec). When successful, returns a
|
||||
# list of (first,last) inclusive range tuples.
|
||||
|
||||
filesize = self.filenode.get_size()
|
||||
assert isinstance(filesize, (int,long)), filesize
|
||||
|
||||
try:
|
||||
# byte-ranges-specifier
|
||||
units, rangeset = range.split('=', 1)
|
||||
if units != 'bytes':
|
||||
return None # nothing else supported
|
||||
|
||||
def parse_range(r):
|
||||
first, last = r.split('-', 1)
|
||||
|
||||
if first is '':
|
||||
# suffix-byte-range-spec
|
||||
first = filesize - long(last)
|
||||
last = filesize - 1
|
||||
else:
|
||||
# byte-range-spec
|
||||
|
||||
# first-byte-pos
|
||||
first = long(first)
|
||||
|
||||
# last-byte-pos
|
||||
if last is '':
|
||||
last = filesize - 1
|
||||
else:
|
||||
last = long(last)
|
||||
|
||||
if last < first:
|
||||
raise ValueError
|
||||
|
||||
return (first, last)
|
||||
|
||||
# byte-range-set
|
||||
#
|
||||
# Note: the spec uses "1#" for the list of ranges, which
|
||||
# implicitly allows whitespace around the ',' separators,
|
||||
# so strip it.
|
||||
return [ parse_range(r.strip()) for r in rangeset.split(',') ]
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
def renderHTTP(self, ctx):
|
||||
req = IRequest(ctx)
|
||||
gte = static.getTypeAndEncoding
|
||||
@ -356,7 +404,7 @@ class FileDownloader(rend.Page):
|
||||
|
||||
filesize = self.filenode.get_size()
|
||||
assert isinstance(filesize, (int,long)), filesize
|
||||
offset, size = 0, None
|
||||
first, size = 0, None
|
||||
contentsize = filesize
|
||||
req.setHeader("accept-ranges", "bytes")
|
||||
if not self.filenode.is_mutable():
|
||||
@ -370,30 +418,33 @@ class FileDownloader(rend.Page):
|
||||
# or maybe just use the URI for CHK and LIT.
|
||||
rangeheader = req.getHeader('range')
|
||||
if rangeheader:
|
||||
# adapted from nevow.static.File
|
||||
bytesrange = rangeheader.split('=')
|
||||
if bytesrange[0] != 'bytes':
|
||||
raise WebError("Syntactically invalid http range header!")
|
||||
start, end = bytesrange[1].split('-')
|
||||
if start:
|
||||
offset = int(start)
|
||||
if not end:
|
||||
# RFC 2616 says:
|
||||
#
|
||||
# "If the last-byte-pos value is absent, or if the value is
|
||||
# greater than or equal to the current length of the
|
||||
# entity-body, last-byte-pos is taken to be equal to one less
|
||||
# than the current length of the entity- body in bytes."
|
||||
end = filesize - 1
|
||||
size = int(end) - offset + 1
|
||||
req.setResponseCode(http.PARTIAL_CONTENT)
|
||||
req.setHeader('content-range',"bytes %s-%s/%s" %
|
||||
(str(offset), str(offset+size-1), str(filesize)))
|
||||
contentsize = size
|
||||
ranges = self.parse_range_header(rangeheader)
|
||||
|
||||
# ranges = None means the header didn't parse, so ignore
|
||||
# the header as if it didn't exist. If is more than one
|
||||
# range, then just return the first for now, until we can
|
||||
# generate multipart/byteranges.
|
||||
if ranges is not None:
|
||||
first, last = ranges[0]
|
||||
|
||||
if first >= filesize:
|
||||
raise WebError('First beyond end of file',
|
||||
http.REQUESTED_RANGE_NOT_SATISFIABLE)
|
||||
else:
|
||||
first = max(0, first)
|
||||
last = min(filesize-1, last)
|
||||
|
||||
req.setResponseCode(http.PARTIAL_CONTENT)
|
||||
req.setHeader('content-range',"bytes %s-%s/%s" %
|
||||
(str(first), str(last),
|
||||
str(filesize)))
|
||||
contentsize = last - first + 1
|
||||
size = contentsize
|
||||
|
||||
req.setHeader("content-length", str(contentsize))
|
||||
if req.method == "HEAD":
|
||||
return ""
|
||||
d = self.filenode.read(req, offset, size)
|
||||
d = self.filenode.read(req, first, size)
|
||||
def _error(f):
|
||||
if req.startedWriting:
|
||||
# The content-type is already set, and the response code has
|
||||
|
Loading…
Reference in New Issue
Block a user