From b45ee20ba8bd9b582ad479b228be47390145d0de Mon Sep 17 00:00:00 2001 From: Itamar Turner-Trauring Date: Wed, 14 Jun 2023 11:07:48 -0400 Subject: [PATCH] MyPy fixes for allmydata.storage. --- src/allmydata/storage/http_client.py | 23 +++++++++++------------ src/allmydata/storage/http_common.py | 2 +- src/allmydata/storage/http_server.py | 13 +++++++++---- src/allmydata/storage/lease.py | 4 +++- src/allmydata/storage/lease_schema.py | 2 +- src/allmydata/storage/server.py | 4 +++- 6 files changed, 28 insertions(+), 20 deletions(-) diff --git a/src/allmydata/storage/http_client.py b/src/allmydata/storage/http_client.py index 7f53a4378..59213417c 100644 --- a/src/allmydata/storage/http_client.py +++ b/src/allmydata/storage/http_client.py @@ -34,7 +34,7 @@ from werkzeug.datastructures import Range, ContentRange from twisted.web.http_headers import Headers from twisted.web import http from twisted.web.iweb import IPolicyForHTTPS, IResponse, IAgent -from twisted.internet.defer import inlineCallbacks, Deferred, succeed +from twisted.internet.defer import Deferred, succeed from twisted.internet.interfaces import ( IOpenSSLClientConnectionCreator, IReactorTime, @@ -70,7 +70,6 @@ except ImportError: pass - def _encode_si(si): # type: (bytes) -> str """Encode the storage index into Unicode string.""" return str(si_b2a(si), "ascii") @@ -179,24 +178,24 @@ def limited_content( This will time out if no data is received for 60 seconds; so long as a trickle of data continues to arrive, it will continue to run. """ - d = succeed(None) + result_deferred = succeed(None) # Sadly, addTimeout() won't work because we need access to the IDelayedCall # in order to reset it on each data chunk received. - timeout = clock.callLater(60, d.cancel) + timeout = clock.callLater(60, result_deferred.cancel) collector = _LengthLimitedCollector(max_length, timeout) with start_action( action_type="allmydata:storage:http-client:limited-content", max_length=max_length, ).context(): - d = DeferredContext(d) + d = DeferredContext(result_deferred) # Make really sure everything gets called in Deferred context, treq might # call collector directly... d.addCallback(lambda _: treq.collect(response, collector)) - def done(_): + def done(_: object) -> BytesIO: timeout.cancel() collector.f.seek(0) return collector.f @@ -659,15 +658,15 @@ class UploadProgress(object): required: RangeMap -@inlineCallbacks -def read_share_chunk( +@async_to_deferred +async def read_share_chunk( client: StorageClient, share_type: str, storage_index: bytes, share_number: int, offset: int, length: int, -) -> Deferred[bytes]: +) -> bytes: """ Download a chunk of data from a share. @@ -688,7 +687,7 @@ def read_share_chunk( # The default 60 second timeout is for getting the response, so it doesn't # include the time it takes to download the body... so we will will deal # with that later, via limited_content(). - response = yield client.request( + response = await client.request( "GET", url, headers=Headers( @@ -725,7 +724,7 @@ def read_share_chunk( raise ValueError("Server sent more than we asked for?!") # It might also send less than we asked for. That's (probably) OK, e.g. # if we went past the end of the file. - body = yield limited_content(response, client._clock, supposed_length) + body = await limited_content(response, client._clock, supposed_length) body.seek(0, SEEK_END) actual_length = body.tell() if actual_length != supposed_length: @@ -751,7 +750,7 @@ async def advise_corrupt_share( storage_index: bytes, share_number: int, reason: str, -): +) -> None: assert isinstance(reason, str) url = client.relative_url( "/storage/v1/{}/{}/{}/corrupt".format( diff --git a/src/allmydata/storage/http_common.py b/src/allmydata/storage/http_common.py index e5f07898e..f16a16785 100644 --- a/src/allmydata/storage/http_common.py +++ b/src/allmydata/storage/http_common.py @@ -22,7 +22,7 @@ def get_content_type(headers: Headers) -> Optional[str]: Returns ``None`` if no content-type was set. """ - values = headers.getRawHeaders("content-type") or [None] + values = headers.getRawHeaders("content-type", [None]) or [None] content_type = parse_options_header(values[0])[0] or None return content_type diff --git a/src/allmydata/storage/http_server.py b/src/allmydata/storage/http_server.py index 028ebf1c7..c63a4ca08 100644 --- a/src/allmydata/storage/http_server.py +++ b/src/allmydata/storage/http_server.py @@ -386,13 +386,16 @@ class _ReadRangeProducer: a request. """ - request: Request + request: Optional[Request] read_data: ReadData - result: Deferred + result: Optional[Deferred[bytes]] start: int remaining: int def resumeProducing(self): + if self.result is None or self.request is None: + return + to_read = min(self.remaining, 65536) data = self.read_data(self.start, to_read) assert len(data) <= to_read @@ -441,7 +444,7 @@ class _ReadRangeProducer: def read_range( request: Request, read_data: ReadData, share_length: int -) -> Union[Deferred, bytes]: +) -> Union[Deferred[bytes], bytes]: """ Read an optional ``Range`` header, reads data appropriately via the given callable, writes the data to the request. @@ -478,6 +481,8 @@ def read_range( raise _HTTPError(http.REQUESTED_RANGE_NOT_SATISFIABLE) offset, end = range_header.ranges[0] + assert end is not None # should've exited in block above this if so + # If we're being ask to read beyond the length of the share, just read # less: end = min(end, share_length) @@ -496,7 +501,7 @@ def read_range( ContentRange("bytes", offset, end).to_header(), ) - d = Deferred() + d: Deferred[bytes] = Deferred() request.registerProducer( _ReadRangeProducer( request, read_data_with_error_handling, d, offset, end - offset diff --git a/src/allmydata/storage/lease.py b/src/allmydata/storage/lease.py index c056a7d28..c0d11abfd 100644 --- a/src/allmydata/storage/lease.py +++ b/src/allmydata/storage/lease.py @@ -173,7 +173,9 @@ class LeaseInfo(object): """ return attr.assoc( self, - _expiration_time=new_expire_time, + # MyPy is unhappy with this; long-term solution is likely switch to + # new @frozen attrs API, with type annotations. + _expiration_time=new_expire_time, # type: ignore[call-arg] ) def is_renew_secret(self, candidate_secret): diff --git a/src/allmydata/storage/lease_schema.py b/src/allmydata/storage/lease_schema.py index 63d3d4ed8..ba7dc991a 100644 --- a/src/allmydata/storage/lease_schema.py +++ b/src/allmydata/storage/lease_schema.py @@ -56,7 +56,7 @@ class HashedLeaseSerializer(object): """ Hash a lease secret for storage. """ - return blake2b(secret, digest_size=32, encoder=RawEncoder()) + return blake2b(secret, digest_size=32, encoder=RawEncoder) @classmethod def _hash_lease_info(cls, lease_info): diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 6099636f8..d805df1c1 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -55,7 +55,9 @@ class StorageServer(service.MultiService): """ Implement the business logic for the storage server. """ - name = 'storage' + # The type in Twisted for services is wrong in 22.10... + # https://github.com/twisted/twisted/issues/10135 + name = 'storage' # type: ignore # only the tests change this to anything else LeaseCheckerClass = LeaseCheckingCrawler