Content-Range headers are now checked (somewhat) and the server now sends

correct headers when reading beyond the end.
This commit is contained in:
Itamar Turner-Trauring 2022-07-19 16:10:22 -04:00
parent b3aff5c43b
commit 1b8b71b306
3 changed files with 51 additions and 14 deletions

View File

@ -7,6 +7,7 @@ from __future__ import annotations
from typing import Union, Optional, Sequence, Mapping, BinaryIO
from base64 import b64encode
from io import BytesIO
from os import SEEK_END
from attrs import define, asdict, frozen, field
@ -29,6 +30,7 @@ from treq.client import HTTPClient
from treq.testing import StubTreq
from OpenSSL import SSL
from cryptography.hazmat.bindings.openssl.binding import Binding
from werkzeug.http import parse_content_range_header
from .http_common import (
swissnum_auth_header,
@ -461,13 +463,36 @@ def read_share_chunk(
"GET",
url,
headers=Headers(
# Ranges in HTTP are _inclusive_, Python's convention is exclusive,
# but Range constructor does that the conversion for us.
{"range": [Range("bytes", [(offset, offset + length)]).to_header()]}
),
)
if response.code == http.PARTIAL_CONTENT:
body = yield response.content()
returnValue(body)
content_range = parse_content_range_header(
response.headers.getRawHeaders("content-range")[0]
)
supposed_length = content_range.stop - content_range.start
if supposed_length > length:
raise ValueError("Server sent more than we asked for?!")
# It might also send less than we asked for. That's (probably) OK, e.g.
# if we went past the end of the file.
body = yield limited_content(response, supposed_length)
body.seek(0, SEEK_END)
actual_length = body.tell()
if actual_length != supposed_length:
# Most likely a mutable that got changed out from under us, but
# concievably could be a bug...
raise ValueError(
f"Length of response sent from server ({actual_length}) "
+ f"didn't match Content-Range header ({supposed_length})"
)
body.seek(0)
returnValue(body.read())
else:
# Technically HTTP allows sending an OK with full body under these
# circumstances, but the server is not designed to do that so we ignore
# than possibility for now...
raise ClientException(response.code)

View File

@ -352,12 +352,10 @@ class _ReadRangeProducer:
if self.first_read and self.remaining > 0:
# For empty bodies the content-range header makes no sense since
# the end of the range is inclusive.
#
# TODO this is wrong for requests that go beyond the end of the
# share. This will be fixed in
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 by making that
# edge case not happen.
# the end of the range is inclusive. Actual conversion from
# Python's exclusive ranges to inclusive ranges is handled by
# werkzeug. The case where we're reading beyond the end of the
# share is handled by caller (read_range().)
self.request.setHeader(
"content-range",
ContentRange(
@ -368,7 +366,7 @@ class _ReadRangeProducer:
if not data and self.remaining > 0:
# TODO Either data is missing locally (storage issue?) or a bug,
# abort response with error? Until
# abort response with error. Until
# https://tahoe-lafs.org/trac/tahoe-lafs/ticket/3907 is implemented
# we continue anyway.
pass
@ -394,7 +392,9 @@ class _ReadRangeProducer:
pass
def read_range(request: Request, read_data: ReadData) -> Union[Deferred, bytes]:
def read_range(
request: Request, read_data: ReadData, share_length: int
) -> Union[Deferred, bytes]:
"""
Read an optional ``Range`` header, reads data appropriately via the given
callable, writes the data to the request.
@ -430,9 +430,12 @@ def read_range(request: Request, read_data: ReadData) -> Union[Deferred, bytes]:
):
raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE)
# TODO if end is beyond the end of the share, either return error, or maybe
# just return what we can...
offset, end = range_header.ranges[0]
# If we're being ask to read beyond the length of the share, just read
# less:
end = min(end, share_length)
# TODO when if end is now <= offset?
request.setResponseCode(http.PARTIAL_CONTENT)
d = Deferred()
request.registerProducer(
@ -675,7 +678,7 @@ class HTTPServer(object):
request.setResponseCode(http.NOT_FOUND)
return b""
return read_range(request, bucket.read)
return read_range(request, bucket.read, bucket.get_length())
@_authorized_route(
_app,
@ -763,6 +766,13 @@ class HTTPServer(object):
def read_mutable_chunk(self, request, authorization, storage_index, share_number):
"""Read a chunk from a mutable."""
try:
share_length = self._storage_server.get_mutable_share_length(
storage_index, share_number
)
except KeyError:
raise _HTTPError(http.NOT_FOUND)
def read_data(offset, length):
try:
return self._storage_server.slot_readv(
@ -771,7 +781,7 @@ class HTTPServer(object):
except KeyError:
raise _HTTPError(http.NOT_FOUND)
return read_range(request, read_data)
return read_range(request, read_data, share_length)
@_authorized_route(
_app, set(), "/v1/mutable/<storage_index:storage_index>/shares", methods=["GET"]

View File

@ -805,6 +805,8 @@ class StorageServer(service.MultiService):
"""Returns the length (in bytes) of a mutable."""
si_dir = storage_index_to_dir(storage_index)
path = os.path.join(self.sharedir, si_dir, str(share_number))
if not os.path.exists(path):
raise KeyError("No such storage index or share number")
return MutableShareFile(path).get_length()