Merge pull request #1251 from tahoe-lafs/3946-less-chatty-downloads

Make immutable downloads less chatty

Fixes ticket:3946
This commit is contained in:
Itamar Turner-Trauring 2023-02-21 09:37:30 -05:00 committed by GitHub
commit 0b0af62b16
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 107 additions and 21 deletions

View File

@ -3,11 +3,14 @@ Integration tests for getting and putting files, including reading from stdin
and stdout.
"""
from subprocess import Popen, PIPE
from subprocess import Popen, PIPE, check_output
import sys
import pytest
from pytest_twisted import ensureDeferred
from twisted.internet import reactor
from .util import run_in_thread, cli
from .util import run_in_thread, cli, reconfigure
DATA = b"abc123 this is not utf-8 decodable \xff\x00\x33 \x11"
try:
@ -62,3 +65,51 @@ def test_get_to_stdout(alice, get_put_alias, tmpdir):
)
assert p.stdout.read() == DATA
assert p.wait() == 0
@pytest.mark.skipif(
sys.platform.startswith("win"),
reason="reconfigure() has issues on Windows"
)
@ensureDeferred
async def test_upload_download_immutable_different_default_max_segment_size(alice, get_put_alias, tmpdir, request):
"""
Tahoe-LAFS used to have a default max segment size of 128KB, and is now
1MB. Test that an upload created when 128KB was the default can be
downloaded with 1MB as the default (i.e. old uploader, new downloader), and
vice versa, (new uploader, old downloader).
"""
tempfile = tmpdir.join("file")
large_data = DATA * 100_000
assert len(large_data) > 2 * 1024 * 1024
with tempfile.open("wb") as f:
f.write(large_data)
async def set_segment_size(segment_size):
await reconfigure(
reactor,
request,
alice,
(1, 1, 1),
None,
max_segment_size=segment_size
)
# 1. Upload file 1 with default segment size set to 1MB
await set_segment_size(1024 * 1024)
cli(alice, "put", str(tempfile), "getput:seg1024kb")
# 2. Download file 1 with default segment size set to 128KB
await set_segment_size(128 * 1024)
assert large_data == check_output(
["tahoe", "--node-directory", alice.node_dir, "get", "getput:seg1024kb", "-"]
)
# 3. Upload file 2 with default segment size set to 128KB
cli(alice, "put", str(tempfile), "getput:seg128kb")
# 4. Download file 2 with default segment size set to 1MB
await set_segment_size(1024 * 1024)
assert large_data == check_output(
["tahoe", "--node-directory", alice.node_dir, "get", "getput:seg128kb", "-"]
)

View File

@ -36,7 +36,8 @@ async def test_capability(reactor, request, alice, case, expected):
computed value.
"""
# rewrite alice's config to match params and convergence
await reconfigure(reactor, request, alice, (1, case.params.required, case.params.total), case.convergence)
await reconfigure(
reactor, request, alice, (1, case.params.required, case.params.total), case.convergence, case.segment_size)
# upload data in the correct format
actual = upload(alice, case.fmt, case.data)
@ -110,7 +111,8 @@ async def generate(
request,
alice,
(happy, case.params.required, case.params.total),
case.convergence
case.convergence,
case.segment_size
)
# Give the format a chance to make an RSA key if it needs it.

View File

@ -46,6 +46,7 @@ from allmydata.util.configutil import (
write_config,
)
from allmydata import client
from allmydata.interfaces import DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
import pytest_twisted
@ -729,11 +730,16 @@ def upload(alice: TahoeProcess, fmt: CHK | SSK, data: bytes) -> str:
return cli(*argv).decode("utf-8").strip()
async def reconfigure(reactor, request, node: TahoeProcess, params: tuple[int, int, int], convergence: None | bytes) -> None:
async def reconfigure(reactor, request, node: TahoeProcess,
params: tuple[int, int, int],
convergence: None | bytes,
max_segment_size: None | int = None) -> None:
"""
Reconfigure a Tahoe-LAFS node with different ZFEC parameters and
convergence secret.
TODO This appears to have issues on Windows.
If the current configuration is different from the specified
configuration, the node will be restarted so it takes effect.
@ -769,7 +775,22 @@ async def reconfigure(reactor, request, node: TahoeProcess, params: tuple[int, i
changed = True
config.write_private_config("convergence", base32.b2a(convergence))
if max_segment_size is not None:
cur_segment_size = int(config.get_config("client", "shares._max_immutable_segment_size_for_testing", DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE))
if cur_segment_size != max_segment_size:
changed = True
config.set_config(
"client",
"shares._max_immutable_segment_size_for_testing",
str(max_segment_size)
)
if changed:
# TODO reconfigure() seems to have issues on Windows. If you need to
# use it there, delete this assert and try to figure out what's going
# on...
assert not sys.platform.startswith("win")
# restart the node
print(f"Restarting {node.node_dir} for ZFEC reconfiguration")
await node.restart_async(reactor, request)

View File

@ -5,7 +5,7 @@ from __future__ import print_function
import sys, math
from allmydata import uri, storage
from allmydata.immutable import upload
from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
from allmydata.interfaces import DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
from allmydata.util import mathutil
def roundup(size, blocksize=4096):
@ -26,7 +26,7 @@ class BigFakeString(object):
def tell(self):
return self.fp
def calc(filesize, params=(3,7,10), segsize=DEFAULT_MAX_SEGMENT_SIZE):
def calc(filesize, params=(3,7,10), segsize=DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE):
num_shares = params[2]
if filesize <= upload.Uploader.URI_LIT_SIZE_THRESHOLD:
urisize = len(uri.LiteralFileURI("A"*filesize).to_string())

View File

@ -0,0 +1 @@
Downloads of large immutables should now finish much faster.

View File

@ -50,7 +50,7 @@ from allmydata.interfaces import (
IStatsProducer,
SDMF_VERSION,
MDMF_VERSION,
DEFAULT_MAX_SEGMENT_SIZE,
DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE,
IFoolscapStoragePlugin,
IAnnounceableStorageServer,
)
@ -88,6 +88,7 @@ _client_config = configutil.ValidConfiguration(
"shares.happy",
"shares.needed",
"shares.total",
"shares._max_immutable_segment_size_for_testing",
"storage.plugins",
),
"storage": (
@ -606,7 +607,7 @@ class _Client(node.Node, pollmixin.PollMixin):
DEFAULT_ENCODING_PARAMETERS = {"k": 3,
"happy": 7,
"n": 10,
"max_segment_size": DEFAULT_MAX_SEGMENT_SIZE,
"max_segment_size": DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE,
}
def __init__(self, config, main_tub, i2p_provider, tor_provider, introducer_clients,
@ -896,6 +897,13 @@ class _Client(node.Node, pollmixin.PollMixin):
DEP["k"] = int(self.config.get_config("client", "shares.needed", DEP["k"]))
DEP["n"] = int(self.config.get_config("client", "shares.total", DEP["n"]))
DEP["happy"] = int(self.config.get_config("client", "shares.happy", DEP["happy"]))
# At the moment this is only used for testing, thus the janky config
# attribute name.
DEP["max_segment_size"] = int(self.config.get_config(
"client",
"shares._max_immutable_segment_size_for_testing",
DEP["max_segment_size"])
)
# for the CLI to authenticate to local JSON endpoints
self._create_auth_token()

View File

@ -19,7 +19,7 @@ from foolscap.api import eventually
from allmydata import uri
from allmydata.codec import CRSDecoder
from allmydata.util import base32, log, hashutil, mathutil, observer
from allmydata.interfaces import DEFAULT_MAX_SEGMENT_SIZE
from allmydata.interfaces import DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
from allmydata.hashtree import IncompleteHashTree, BadHashError, \
NotEnoughHashesError
@ -49,6 +49,8 @@ class DownloadNode(object):
"""Internal class which manages downloads and holds state. External
callers use CiphertextFileNode instead."""
default_max_segment_size = DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
# Share._node points to me
def __init__(self, verifycap, storage_broker, secret_holder,
terminator, history, download_status):
@ -76,7 +78,7 @@ class DownloadNode(object):
# .guessed_segment_size, .guessed_num_segments, and
# .ciphertext_hash_tree (with a dummy, to let us guess which hashes
# we'll need)
self._build_guessed_tables(DEFAULT_MAX_SEGMENT_SIZE)
self._build_guessed_tables(self.default_max_segment_size)
# filled in when we parse a valid UEB
self.have_UEB = False

View File

@ -48,7 +48,7 @@ from allmydata.util.rrefutil import add_version_to_remote_reference
from allmydata.interfaces import IUploadable, IUploader, IUploadResults, \
IEncryptedUploadable, RIEncryptedUploadable, IUploadStatus, \
NoServersError, InsufficientVersionError, UploadUnhappinessError, \
DEFAULT_MAX_SEGMENT_SIZE, IPeerSelector
DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE, IPeerSelector
from allmydata.immutable import layout
from io import BytesIO
@ -1692,7 +1692,7 @@ class AssistedUploader(object):
class BaseUploadable(object):
# this is overridden by max_segment_size
default_max_segment_size = DEFAULT_MAX_SEGMENT_SIZE
default_max_segment_size = DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE
default_params_set = False
max_segment_size = None

View File

@ -41,7 +41,8 @@ URI = StringConstraint(300) # kind of arbitrary
MAX_BUCKETS = 256 # per peer -- zfec offers at most 256 shares per file
DEFAULT_MAX_SEGMENT_SIZE = 128*1024
# The default size for segments of new CHK ("immutable") uploads.
DEFAULT_IMMUTABLE_MAX_SEGMENT_SIZE = 1024*1024
ShareData = StringConstraint(None)
URIExtensionData = StringConstraint(1000)

View File

@ -36,7 +36,7 @@ from allmydata.mutable.layout import get_version_from_checkstring,\
SDMFSlotWriteProxy
KiB = 1024
DEFAULT_MAX_SEGMENT_SIZE = 128 * KiB
DEFAULT_MUTABLE_MAX_SEGMENT_SIZE = 128 * KiB
PUSHING_BLOCKS_STATE = 0
PUSHING_EVERYTHING_ELSE_STATE = 1
DONE_STATE = 2
@ -367,7 +367,7 @@ class Publish(object):
self.data = newdata
self.datalength = newdata.get_size()
#if self.datalength >= DEFAULT_MAX_SEGMENT_SIZE:
#if self.datalength >= DEFAULT_MUTABLE_MAX_SEGMENT_SIZE:
# self._version = MDMF_VERSION
#else:
# self._version = SDMF_VERSION
@ -551,7 +551,7 @@ class Publish(object):
def setup_encoding_parameters(self, offset=0):
if self._version == MDMF_VERSION:
segment_size = DEFAULT_MAX_SEGMENT_SIZE # 128 KiB by default
segment_size = DEFAULT_MUTABLE_MAX_SEGMENT_SIZE # 128 KiB by default
else:
segment_size = self.datalength # SDMF is only one segment
# this must be a multiple of self.required_shares

View File

@ -20,7 +20,7 @@ from testtools.matchers import (
from twisted.internet import defer
from allmydata.interfaces import MDMF_VERSION
from allmydata.mutable.filenode import MutableFileNode
from allmydata.mutable.publish import MutableData, DEFAULT_MAX_SEGMENT_SIZE
from allmydata.mutable.publish import MutableData, DEFAULT_MUTABLE_MAX_SEGMENT_SIZE
from ..no_network import GridTestMixin
from .. import common_util as testutil
@ -180,7 +180,7 @@ class Update(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin):
# long -- this is 7 segments in the default segment size. So we
# need to add 2 segments worth of data to push it over a
# power-of-two boundary.
segment = b"a" * DEFAULT_MAX_SEGMENT_SIZE
segment = b"a" * DEFAULT_MUTABLE_MAX_SEGMENT_SIZE
new_data = self.data + (segment * 2)
d0 = self.do_upload_mdmf()
def _run(ign):
@ -232,9 +232,9 @@ class Update(GridTestMixin, AsyncTestCase, testutil.ShouldFailMixin):
return d0
def test_multiple_segment_replace(self):
replace_offset = 2 * DEFAULT_MAX_SEGMENT_SIZE
replace_offset = 2 * DEFAULT_MUTABLE_MAX_SEGMENT_SIZE
new_data = self.data[:replace_offset]
new_segment = b"a" * DEFAULT_MAX_SEGMENT_SIZE
new_segment = b"a" * DEFAULT_MUTABLE_MAX_SEGMENT_SIZE
new_data += 2 * new_segment
new_data += b"replaced"
rest_offset = len(new_data)